diff --git a/README.rst b/README.rst index 2681945..6dee908 100644 --- a/README.rst +++ b/README.rst @@ -26,7 +26,8 @@ Requirements Worth noting is that the browser requires a graphical environment to run. You already have this on your laptop, but on a server it will probably require -deploying some additional infrastructure (typically X11). The vagrant +deploying some additional infrastructure (typically X11; note that Xvfb does +not support screenshots; Xvnc4, from package vnc4server, does). The vagrant configuration in the brozzler repository (still a work in progress) has an example setup. @@ -181,7 +182,7 @@ Headless Chrome (experimental) `Headless Chromium `_ is now available in stable Chrome releases for 64-bit Linux and may be -used to run the browser without a visibe window or X11 at all. +used to run the browser without a visible window or X11 at all. To try this out, create a wrapper script like ~/bin/chrome-headless.sh: diff --git a/brozzler/browser.py b/brozzler/browser.py index dd9e1b9..09131f1 100644 --- a/brozzler/browser.py +++ b/brozzler/browser.py @@ -381,7 +381,7 @@ class Browser: on_request=None, on_response=None, on_screenshot=None, username=None, password=None, hashtags=None, skip_extract_outlinks=False, skip_visit_hashtags=False, - page_timeout=300): + page_timeout=300, behavior_timeout=900): ''' Browses page in browser. @@ -448,7 +448,7 @@ class Browser: on_screenshot(jpeg_bytes) behavior_script = brozzler.behavior_script( page_url, behavior_parameters) - self.run_behavior(behavior_script, timeout=900) + self.run_behavior(behavior_script, timeout=behavior_timeout) if skip_extract_outlinks: outlinks = [] else: diff --git a/brozzler/worker.py b/brozzler/worker.py index a28f3fe..6253b63 100644 --- a/brozzler/worker.py +++ b/brozzler/worker.py @@ -105,7 +105,7 @@ class BrozzlerWorker: self, frontier, service_registry=None, max_browsers=1, chrome_exe="chromium-browser", warcprox_auto=False, proxy=None, skip_extract_outlinks=False, skip_visit_hashtags=False, - page_timeout=300): + page_timeout=300, behavior_timeout=900): self._frontier = frontier self._service_registry = service_registry self._max_browsers = max_browsers @@ -117,6 +117,7 @@ class BrozzlerWorker: self._skip_extract_outlinks = skip_extract_outlinks self._skip_visit_hashtags = skip_visit_hashtags self._page_timeout = page_timeout + self._behavior_timeout = behavior_timeout self._browser_pool = brozzler.browser.BrowserPool( max_browsers, chrome_exe=chrome_exe, ignore_cert_errors=True) @@ -419,7 +420,8 @@ class BrozzlerWorker: on_request=on_request, hashtags=page.hashtags, skip_extract_outlinks=self._skip_extract_outlinks, skip_visit_hashtags=self._skip_visit_hashtags, - page_timeout=self._page_timeout) + page_timeout=self._page_timeout, + behavior_timeout=self._behavior_timeout) if final_page_url != page.url: page.note_redirect(final_page_url) return outlinks