diff --git a/brozzler/browser.py b/brozzler/browser.py index 8a9626b..206c0c0 100644 --- a/brozzler/browser.py +++ b/brozzler/browser.py @@ -380,7 +380,8 @@ class Browser: user_agent=None, behavior_parameters=None, on_request=None, on_response=None, on_screenshot=None, username=None, password=None, hashtags=None, - skip_extract_outlinks=False, skip_visit_hashtags=False): + skip_extract_outlinks=False, skip_visit_hashtags=False, + page_timeout=300): ''' Browses page in browser. @@ -433,15 +434,15 @@ class Browser: self.configure_browser( extra_headers=extra_headers, user_agent=user_agent) - self.navigate_to_page(page_url, timeout=300) + self.navigate_to_page(page_url, timeout=page_timeout) if password: - self.try_login(username, password, timeout=300) + self.try_login(username, password, timeout=page_timeout) # if login redirected us, return to page_url if page_url != self.url().split('#')[0]: self.logger.debug( 'login navigated away from %s; returning!', page_url) - self.navigate_to_page(page_url, timeout=300) + self.navigate_to_page(page_url, timeout=page_timeout) if on_screenshot: jpeg_bytes = self.screenshot() on_screenshot(jpeg_bytes) diff --git a/brozzler/worker.py b/brozzler/worker.py index 6bbe399..a28f3fe 100644 --- a/brozzler/worker.py +++ b/brozzler/worker.py @@ -104,7 +104,8 @@ class BrozzlerWorker: def __init__( self, frontier, service_registry=None, max_browsers=1, chrome_exe="chromium-browser", warcprox_auto=False, proxy=None, - skip_extract_outlinks=False, skip_visit_hashtags=False): + skip_extract_outlinks=False, skip_visit_hashtags=False, + page_timeout=300): self._frontier = frontier self._service_registry = service_registry self._max_browsers = max_browsers @@ -115,6 +116,7 @@ class BrozzlerWorker: self._proxy_is_warcprox = None self._skip_extract_outlinks = skip_extract_outlinks self._skip_visit_hashtags = skip_visit_hashtags + self._page_timeout = page_timeout self._browser_pool = brozzler.browser.BrowserPool( max_browsers, chrome_exe=chrome_exe, ignore_cert_errors=True) @@ -416,7 +418,8 @@ class BrozzlerWorker: on_screenshot=_on_screenshot, on_response=_on_response, on_request=on_request, hashtags=page.hashtags, skip_extract_outlinks=self._skip_extract_outlinks, - skip_visit_hashtags=self._skip_visit_hashtags) + skip_visit_hashtags=self._skip_visit_hashtags, + page_timeout=self._page_timeout) if final_page_url != page.url: page.note_redirect(final_page_url) return outlinks