mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
Make behavior_timeout configurable
``behavior_timeout`` is hardcoded to 900s. With this MR we make it configurable with a default value of 900. We add a new variable to ``BrozzlerWorker`` and ``Browser``.
This commit is contained in:
parent
18a52f0b15
commit
bb93b04c23
@ -381,7 +381,7 @@ class Browser:
|
|||||||
on_request=None, on_response=None, on_screenshot=None,
|
on_request=None, on_response=None, on_screenshot=None,
|
||||||
username=None, password=None, hashtags=None,
|
username=None, password=None, hashtags=None,
|
||||||
skip_extract_outlinks=False, skip_visit_hashtags=False,
|
skip_extract_outlinks=False, skip_visit_hashtags=False,
|
||||||
page_timeout=300):
|
page_timeout=300, behavior_timeout=900):
|
||||||
'''
|
'''
|
||||||
Browses page in browser.
|
Browses page in browser.
|
||||||
|
|
||||||
@ -448,7 +448,7 @@ class Browser:
|
|||||||
on_screenshot(jpeg_bytes)
|
on_screenshot(jpeg_bytes)
|
||||||
behavior_script = brozzler.behavior_script(
|
behavior_script = brozzler.behavior_script(
|
||||||
page_url, behavior_parameters)
|
page_url, behavior_parameters)
|
||||||
self.run_behavior(behavior_script, timeout=900)
|
self.run_behavior(behavior_script, timeout=behavior_timeout)
|
||||||
if skip_extract_outlinks:
|
if skip_extract_outlinks:
|
||||||
outlinks = []
|
outlinks = []
|
||||||
else:
|
else:
|
||||||
|
@ -105,7 +105,7 @@ class BrozzlerWorker:
|
|||||||
self, frontier, service_registry=None, max_browsers=1,
|
self, frontier, service_registry=None, max_browsers=1,
|
||||||
chrome_exe="chromium-browser", warcprox_auto=False, proxy=None,
|
chrome_exe="chromium-browser", warcprox_auto=False, proxy=None,
|
||||||
skip_extract_outlinks=False, skip_visit_hashtags=False,
|
skip_extract_outlinks=False, skip_visit_hashtags=False,
|
||||||
page_timeout=300):
|
page_timeout=300, behavior_timeout=900):
|
||||||
self._frontier = frontier
|
self._frontier = frontier
|
||||||
self._service_registry = service_registry
|
self._service_registry = service_registry
|
||||||
self._max_browsers = max_browsers
|
self._max_browsers = max_browsers
|
||||||
@ -117,6 +117,7 @@ class BrozzlerWorker:
|
|||||||
self._skip_extract_outlinks = skip_extract_outlinks
|
self._skip_extract_outlinks = skip_extract_outlinks
|
||||||
self._skip_visit_hashtags = skip_visit_hashtags
|
self._skip_visit_hashtags = skip_visit_hashtags
|
||||||
self._page_timeout = page_timeout
|
self._page_timeout = page_timeout
|
||||||
|
self._behavior_timeout = behavior_timeout
|
||||||
|
|
||||||
self._browser_pool = brozzler.browser.BrowserPool(
|
self._browser_pool = brozzler.browser.BrowserPool(
|
||||||
max_browsers, chrome_exe=chrome_exe, ignore_cert_errors=True)
|
max_browsers, chrome_exe=chrome_exe, ignore_cert_errors=True)
|
||||||
@ -419,7 +420,8 @@ class BrozzlerWorker:
|
|||||||
on_request=on_request, hashtags=page.hashtags,
|
on_request=on_request, hashtags=page.hashtags,
|
||||||
skip_extract_outlinks=self._skip_extract_outlinks,
|
skip_extract_outlinks=self._skip_extract_outlinks,
|
||||||
skip_visit_hashtags=self._skip_visit_hashtags,
|
skip_visit_hashtags=self._skip_visit_hashtags,
|
||||||
page_timeout=self._page_timeout)
|
page_timeout=self._page_timeout,
|
||||||
|
behavior_timeout=self._behavior_timeout)
|
||||||
if final_page_url != page.url:
|
if final_page_url != page.url:
|
||||||
page.note_redirect(final_page_url)
|
page.note_redirect(final_page_url)
|
||||||
return outlinks
|
return outlinks
|
||||||
|
Loading…
x
Reference in New Issue
Block a user