Configurable page timeout

The page loading timeout was hard-coded to 300s. With this change,
we make it configurable with a default value of 300.
This commit is contained in:
Vangelis Banos 2017-08-23 08:05:14 +00:00
parent 12e02ae401
commit 00513af877
2 changed files with 10 additions and 6 deletions

View file

@ -380,7 +380,8 @@ class Browser:
user_agent=None, behavior_parameters=None, user_agent=None, behavior_parameters=None,
on_request=None, on_response=None, on_screenshot=None, on_request=None, on_response=None, on_screenshot=None,
username=None, password=None, hashtags=None, username=None, password=None, hashtags=None,
skip_extract_outlinks=False, skip_visit_hashtags=False): skip_extract_outlinks=False, skip_visit_hashtags=False,
page_timeout=300):
''' '''
Browses page in browser. Browses page in browser.
@ -433,15 +434,15 @@ class Browser:
self.configure_browser( self.configure_browser(
extra_headers=extra_headers, extra_headers=extra_headers,
user_agent=user_agent) user_agent=user_agent)
self.navigate_to_page(page_url, timeout=300) self.navigate_to_page(page_url, timeout=page_timeout)
if password: if password:
self.try_login(username, password, timeout=300) self.try_login(username, password, timeout=page_timeout)
# if login redirected us, return to page_url # if login redirected us, return to page_url
if page_url != self.url().split('#')[0]: if page_url != self.url().split('#')[0]:
self.logger.debug( self.logger.debug(
'login navigated away from %s; returning!', 'login navigated away from %s; returning!',
page_url) page_url)
self.navigate_to_page(page_url, timeout=300) self.navigate_to_page(page_url, timeout=page_timeout)
if on_screenshot: if on_screenshot:
jpeg_bytes = self.screenshot() jpeg_bytes = self.screenshot()
on_screenshot(jpeg_bytes) on_screenshot(jpeg_bytes)

View file

@ -104,7 +104,8 @@ class BrozzlerWorker:
def __init__( def __init__(
self, frontier, service_registry=None, max_browsers=1, self, frontier, service_registry=None, max_browsers=1,
chrome_exe="chromium-browser", warcprox_auto=False, proxy=None, chrome_exe="chromium-browser", warcprox_auto=False, proxy=None,
skip_extract_outlinks=False, skip_visit_hashtags=False): skip_extract_outlinks=False, skip_visit_hashtags=False,
page_timeout=300):
self._frontier = frontier self._frontier = frontier
self._service_registry = service_registry self._service_registry = service_registry
self._max_browsers = max_browsers self._max_browsers = max_browsers
@ -115,6 +116,7 @@ class BrozzlerWorker:
self._proxy_is_warcprox = None self._proxy_is_warcprox = None
self._skip_extract_outlinks = skip_extract_outlinks self._skip_extract_outlinks = skip_extract_outlinks
self._skip_visit_hashtags = skip_visit_hashtags self._skip_visit_hashtags = skip_visit_hashtags
self._page_timeout = page_timeout
self._browser_pool = brozzler.browser.BrowserPool( self._browser_pool = brozzler.browser.BrowserPool(
max_browsers, chrome_exe=chrome_exe, ignore_cert_errors=True) max_browsers, chrome_exe=chrome_exe, ignore_cert_errors=True)
@ -416,7 +418,8 @@ class BrozzlerWorker:
on_screenshot=_on_screenshot, on_response=_on_response, on_screenshot=_on_screenshot, on_response=_on_response,
on_request=on_request, hashtags=page.hashtags, on_request=on_request, hashtags=page.hashtags,
skip_extract_outlinks=self._skip_extract_outlinks, skip_extract_outlinks=self._skip_extract_outlinks,
skip_visit_hashtags=self._skip_visit_hashtags) skip_visit_hashtags=self._skip_visit_hashtags,
page_timeout=self._page_timeout)
if final_page_url != page.url: if final_page_url != page.url:
page.note_redirect(final_page_url) page.note_redirect(final_page_url)
return outlinks return outlinks