mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-21 16:16:28 -04:00
Merge branch 'ARI-5379' into qa
This commit is contained in:
commit
d3088c6418
@ -88,6 +88,7 @@ def behavior_script(url, template_parameters=None, behaviors_dir=None):
|
||||
Returns the javascript behavior string populated with template_parameters.
|
||||
'''
|
||||
import re, logging
|
||||
timeout_from_behavior = None
|
||||
for behavior in behaviors(behaviors_dir=behaviors_dir):
|
||||
if re.match(behavior['url_regex'], url):
|
||||
parameters = dict()
|
||||
@ -101,8 +102,10 @@ def behavior_script(url, template_parameters=None, behaviors_dir=None):
|
||||
logging.info(
|
||||
'using template=%r populated with parameters=%r for %r',
|
||||
behavior['behavior_js_template'], parameters, url)
|
||||
return script
|
||||
return None
|
||||
if 'behavior_timeout_sec' in parameters:
|
||||
timeout_from_behavior = int(parameters['behavior_timeout_sec'])
|
||||
return script, timeout_from_behavior
|
||||
return None, timeout_from_behavior
|
||||
|
||||
class ThreadExceptionGate:
|
||||
logger = logging.getLogger(__module__ + "." + __qualname__)
|
||||
|
@ -43,8 +43,9 @@
|
||||
default_parameters:
|
||||
actions:
|
||||
- selector: div.teaser, li.pager__item a
|
||||
- # https://webarchive.jira.com/browse/ARI-5430
|
||||
url_regex: '^https?://www\.careers\.ox\.ac\.uk/.*$'
|
||||
behavior_timeout_sec: 1800
|
||||
- # https://webarchive.jira.com/browse/ARI-5389
|
||||
url_regex: '^https?://pitchfork\.com/.*$'
|
||||
behavior_js_template: umbraBehavior.js.j2
|
||||
default_parameters:
|
||||
actions:
|
||||
|
@ -453,9 +453,11 @@ class Browser:
|
||||
if on_screenshot:
|
||||
jpeg_bytes = self.screenshot()
|
||||
on_screenshot(jpeg_bytes)
|
||||
behavior_script = brozzler.behavior_script(
|
||||
behavior_script, timeout_from_behavior = brozzler.behavior_script(
|
||||
page_url, behavior_parameters,
|
||||
behaviors_dir=behaviors_dir)
|
||||
if timeout_from_behavior > behavior_timeout:
|
||||
behavior_timeout = timeout_from_behavior
|
||||
self.run_behavior(behavior_script, timeout=behavior_timeout)
|
||||
if skip_extract_outlinks:
|
||||
outlinks = []
|
||||
|
Loading…
x
Reference in New Issue
Block a user