mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-09 06:52:46 -04:00
configurable behavior timeout
This commit is contained in:
parent
a1af18230c
commit
5d2591ca66
3 changed files with 9 additions and 3 deletions
|
@ -106,6 +106,7 @@ def behavior_script(url, template_parameters=None, behaviors_dir=None):
|
||||||
Returns the javascript behavior string populated with template_parameters.
|
Returns the javascript behavior string populated with template_parameters.
|
||||||
'''
|
'''
|
||||||
import re, logging, json
|
import re, logging, json
|
||||||
|
behavior_timeout_custom = None
|
||||||
for behavior in behaviors(behaviors_dir=behaviors_dir):
|
for behavior in behaviors(behaviors_dir=behaviors_dir):
|
||||||
if re.match(behavior['url_regex'], url):
|
if re.match(behavior['url_regex'], url):
|
||||||
parameters = dict()
|
parameters = dict()
|
||||||
|
@ -119,8 +120,10 @@ def behavior_script(url, template_parameters=None, behaviors_dir=None):
|
||||||
logging.info(
|
logging.info(
|
||||||
'using template=%r populated with parameters=%r for %r',
|
'using template=%r populated with parameters=%r for %r',
|
||||||
behavior['behavior_js_template'], json.dumps(parameters), url)
|
behavior['behavior_js_template'], json.dumps(parameters), url)
|
||||||
return script
|
if 'behavior_timeout_sec' in parameters:
|
||||||
return None
|
behavior_timeout_custom = int(parameters['behavior_timeout_sec'])
|
||||||
|
return script, behavior_timeout_custom
|
||||||
|
return None, behavior_timeout_custom
|
||||||
|
|
||||||
class ThreadExceptionGate:
|
class ThreadExceptionGate:
|
||||||
logger = logging.getLogger(__module__ + "." + __qualname__)
|
logger = logging.getLogger(__module__ + "." + __qualname__)
|
||||||
|
|
|
@ -59,6 +59,7 @@
|
||||||
default_parameters:
|
default_parameters:
|
||||||
actions:
|
actions:
|
||||||
- selector: div.teaser, li.pager__item a
|
- selector: div.teaser, li.pager__item a
|
||||||
|
behavior_timeout_sec: 1800
|
||||||
- # https://webarchive.jira.com/browse/ARI-5389
|
- # https://webarchive.jira.com/browse/ARI-5389
|
||||||
url_regex: '^https?://pitchfork\.com/.*$'
|
url_regex: '^https?://pitchfork\.com/.*$'
|
||||||
behavior_js_template: umbraBehavior.js.j2
|
behavior_js_template: umbraBehavior.js.j2
|
||||||
|
|
|
@ -462,9 +462,11 @@ class Browser:
|
||||||
self.navigate_to_page(page_url, timeout=page_timeout)
|
self.navigate_to_page(page_url, timeout=page_timeout)
|
||||||
if on_screenshot:
|
if on_screenshot:
|
||||||
self._try_screenshot(on_screenshot)
|
self._try_screenshot(on_screenshot)
|
||||||
behavior_script = brozzler.behavior_script(
|
behavior_script, behavior_timeout_custom = brozzler.behavior_script(
|
||||||
page_url, behavior_parameters,
|
page_url, behavior_parameters,
|
||||||
behaviors_dir=behaviors_dir)
|
behaviors_dir=behaviors_dir)
|
||||||
|
if behavior_timeout_custom:
|
||||||
|
behavior_timeout = behavior_timeout_custom
|
||||||
self.run_behavior(behavior_script, timeout=behavior_timeout)
|
self.run_behavior(behavior_script, timeout=behavior_timeout)
|
||||||
if skip_extract_outlinks:
|
if skip_extract_outlinks:
|
||||||
outlinks = []
|
outlinks = []
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue