mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-12-17 09:33:51 -05:00
configurable behavior timeout
This commit is contained in:
parent
c934759852
commit
70af801da1
3 changed files with 9 additions and 3 deletions
|
|
@ -88,6 +88,7 @@ def behavior_script(url, template_parameters=None, behaviors_dir=None):
|
||||||
Returns the javascript behavior string populated with template_parameters.
|
Returns the javascript behavior string populated with template_parameters.
|
||||||
'''
|
'''
|
||||||
import re, logging
|
import re, logging
|
||||||
|
timeout_from_behavior = None
|
||||||
for behavior in behaviors(behaviors_dir=behaviors_dir):
|
for behavior in behaviors(behaviors_dir=behaviors_dir):
|
||||||
if re.match(behavior['url_regex'], url):
|
if re.match(behavior['url_regex'], url):
|
||||||
parameters = dict()
|
parameters = dict()
|
||||||
|
|
@ -101,8 +102,10 @@ def behavior_script(url, template_parameters=None, behaviors_dir=None):
|
||||||
logging.info(
|
logging.info(
|
||||||
'using template=%r populated with parameters=%r for %r',
|
'using template=%r populated with parameters=%r for %r',
|
||||||
behavior['behavior_js_template'], parameters, url)
|
behavior['behavior_js_template'], parameters, url)
|
||||||
return script
|
if 'behavior_timeout_sec' in parameters:
|
||||||
return None
|
timeout_from_behavior = int(parameters['behavior_timeout_sec'])
|
||||||
|
return script, timeout_from_behavior
|
||||||
|
return None, timeout_from_behavior
|
||||||
|
|
||||||
class ThreadExceptionGate:
|
class ThreadExceptionGate:
|
||||||
logger = logging.getLogger(__module__ + "." + __qualname__)
|
logger = logging.getLogger(__module__ + "." + __qualname__)
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,7 @@
|
||||||
default_parameters:
|
default_parameters:
|
||||||
actions:
|
actions:
|
||||||
- selector: div.teaser, li.pager__item a
|
- selector: div.teaser, li.pager__item a
|
||||||
|
behavior_timeout_sec: 1800
|
||||||
- # https://webarchive.jira.com/browse/ARI-5389
|
- # https://webarchive.jira.com/browse/ARI-5389
|
||||||
url_regex: '^https?://pitchfork\.com/.*$'
|
url_regex: '^https?://pitchfork\.com/.*$'
|
||||||
behavior_js_template: umbraBehavior.js.j2
|
behavior_js_template: umbraBehavior.js.j2
|
||||||
|
|
|
||||||
|
|
@ -448,9 +448,11 @@ class Browser:
|
||||||
if on_screenshot:
|
if on_screenshot:
|
||||||
jpeg_bytes = self.screenshot()
|
jpeg_bytes = self.screenshot()
|
||||||
on_screenshot(jpeg_bytes)
|
on_screenshot(jpeg_bytes)
|
||||||
behavior_script = brozzler.behavior_script(
|
behavior_script, timeout_from_behavior = brozzler.behavior_script(
|
||||||
page_url, behavior_parameters,
|
page_url, behavior_parameters,
|
||||||
behaviors_dir=behaviors_dir)
|
behaviors_dir=behaviors_dir)
|
||||||
|
if timeout_from_behavior > behavior_timeout:
|
||||||
|
behavior_timeout = timeout_from_behavior
|
||||||
self.run_behavior(behavior_script, timeout=behavior_timeout)
|
self.run_behavior(behavior_script, timeout=behavior_timeout)
|
||||||
if skip_extract_outlinks:
|
if skip_extract_outlinks:
|
||||||
outlinks = []
|
outlinks = []
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue