From bb93b04c23877f796cc48c758feb286b798e572a Mon Sep 17 00:00:00 2001 From: Vangelis Banos Date: Thu, 31 Aug 2017 08:06:26 +0000 Subject: [PATCH] Make behavior_timeout configurable ``behavior_timeout`` is hardcoded to 900s. With this MR we make it configurable with a default value of 900. We add a new variable to ``BrozzlerWorker`` and ``Browser``. --- brozzler/browser.py | 4 ++-- brozzler/worker.py | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/brozzler/browser.py b/brozzler/browser.py index 206c0c0..583b498 100644 --- a/brozzler/browser.py +++ b/brozzler/browser.py @@ -381,7 +381,7 @@ class Browser: on_request=None, on_response=None, on_screenshot=None, username=None, password=None, hashtags=None, skip_extract_outlinks=False, skip_visit_hashtags=False, - page_timeout=300): + page_timeout=300, behavior_timeout=900): ''' Browses page in browser. @@ -448,7 +448,7 @@ class Browser: on_screenshot(jpeg_bytes) behavior_script = brozzler.behavior_script( page_url, behavior_parameters) - self.run_behavior(behavior_script, timeout=900) + self.run_behavior(behavior_script, timeout=behavior_timeout) if skip_extract_outlinks: outlinks = [] else: diff --git a/brozzler/worker.py b/brozzler/worker.py index a28f3fe..6253b63 100644 --- a/brozzler/worker.py +++ b/brozzler/worker.py @@ -105,7 +105,7 @@ class BrozzlerWorker: self, frontier, service_registry=None, max_browsers=1, chrome_exe="chromium-browser", warcprox_auto=False, proxy=None, skip_extract_outlinks=False, skip_visit_hashtags=False, - page_timeout=300): + page_timeout=300, behavior_timeout=900): self._frontier = frontier self._service_registry = service_registry self._max_browsers = max_browsers @@ -117,6 +117,7 @@ class BrozzlerWorker: self._skip_extract_outlinks = skip_extract_outlinks self._skip_visit_hashtags = skip_visit_hashtags self._page_timeout = page_timeout + self._behavior_timeout = behavior_timeout self._browser_pool = brozzler.browser.BrowserPool( max_browsers, chrome_exe=chrome_exe, ignore_cert_errors=True) @@ -419,7 +420,8 @@ class BrozzlerWorker: on_request=on_request, hashtags=page.hashtags, skip_extract_outlinks=self._skip_extract_outlinks, skip_visit_hashtags=self._skip_visit_hashtags, - page_timeout=self._page_timeout) + page_timeout=self._page_timeout, + behavior_timeout=self._behavior_timeout) if final_page_url != page.url: page.note_redirect(final_page_url) return outlinks