Make behavior_timeout configurable

``behavior_timeout`` is hardcoded to 900s. With this MR we make it
configurable with a default value of 900. We add a new variable to
``BrozzlerWorker`` and ``Browser``.
This commit is contained in:
Vangelis Banos 2017-08-31 08:06:26 +00:00
parent 18a52f0b15
commit bb93b04c23
2 changed files with 6 additions and 4 deletions

View File

@ -381,7 +381,7 @@ class Browser:
on_request=None, on_response=None, on_screenshot=None, on_request=None, on_response=None, on_screenshot=None,
username=None, password=None, hashtags=None, username=None, password=None, hashtags=None,
skip_extract_outlinks=False, skip_visit_hashtags=False, skip_extract_outlinks=False, skip_visit_hashtags=False,
page_timeout=300): page_timeout=300, behavior_timeout=900):
''' '''
Browses page in browser. Browses page in browser.
@ -448,7 +448,7 @@ class Browser:
on_screenshot(jpeg_bytes) on_screenshot(jpeg_bytes)
behavior_script = brozzler.behavior_script( behavior_script = brozzler.behavior_script(
page_url, behavior_parameters) page_url, behavior_parameters)
self.run_behavior(behavior_script, timeout=900) self.run_behavior(behavior_script, timeout=behavior_timeout)
if skip_extract_outlinks: if skip_extract_outlinks:
outlinks = [] outlinks = []
else: else:

View File

@ -105,7 +105,7 @@ class BrozzlerWorker:
self, frontier, service_registry=None, max_browsers=1, self, frontier, service_registry=None, max_browsers=1,
chrome_exe="chromium-browser", warcprox_auto=False, proxy=None, chrome_exe="chromium-browser", warcprox_auto=False, proxy=None,
skip_extract_outlinks=False, skip_visit_hashtags=False, skip_extract_outlinks=False, skip_visit_hashtags=False,
page_timeout=300): page_timeout=300, behavior_timeout=900):
self._frontier = frontier self._frontier = frontier
self._service_registry = service_registry self._service_registry = service_registry
self._max_browsers = max_browsers self._max_browsers = max_browsers
@ -117,6 +117,7 @@ class BrozzlerWorker:
self._skip_extract_outlinks = skip_extract_outlinks self._skip_extract_outlinks = skip_extract_outlinks
self._skip_visit_hashtags = skip_visit_hashtags self._skip_visit_hashtags = skip_visit_hashtags
self._page_timeout = page_timeout self._page_timeout = page_timeout
self._behavior_timeout = behavior_timeout
self._browser_pool = brozzler.browser.BrowserPool( self._browser_pool = brozzler.browser.BrowserPool(
max_browsers, chrome_exe=chrome_exe, ignore_cert_errors=True) max_browsers, chrome_exe=chrome_exe, ignore_cert_errors=True)
@ -419,7 +420,8 @@ class BrozzlerWorker:
on_request=on_request, hashtags=page.hashtags, on_request=on_request, hashtags=page.hashtags,
skip_extract_outlinks=self._skip_extract_outlinks, skip_extract_outlinks=self._skip_extract_outlinks,
skip_visit_hashtags=self._skip_visit_hashtags, skip_visit_hashtags=self._skip_visit_hashtags,
page_timeout=self._page_timeout) page_timeout=self._page_timeout,
behavior_timeout=self._behavior_timeout)
if final_page_url != page.url: if final_page_url != page.url:
page.note_redirect(final_page_url) page.note_redirect(final_page_url)
return outlinks return outlinks