mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 00:29:53 -05:00
Merge pull request #63 from vbanos/configurable-page-timeout
Thank you, @vbanos!
This commit is contained in:
commit
f810603cdf
@ -380,7 +380,8 @@ class Browser:
|
||||
user_agent=None, behavior_parameters=None,
|
||||
on_request=None, on_response=None, on_screenshot=None,
|
||||
username=None, password=None, hashtags=None,
|
||||
skip_extract_outlinks=False, skip_visit_hashtags=False):
|
||||
skip_extract_outlinks=False, skip_visit_hashtags=False,
|
||||
page_timeout=300):
|
||||
'''
|
||||
Browses page in browser.
|
||||
|
||||
@ -433,15 +434,15 @@ class Browser:
|
||||
self.configure_browser(
|
||||
extra_headers=extra_headers,
|
||||
user_agent=user_agent)
|
||||
self.navigate_to_page(page_url, timeout=300)
|
||||
self.navigate_to_page(page_url, timeout=page_timeout)
|
||||
if password:
|
||||
self.try_login(username, password, timeout=300)
|
||||
self.try_login(username, password, timeout=page_timeout)
|
||||
# if login redirected us, return to page_url
|
||||
if page_url != self.url().split('#')[0]:
|
||||
self.logger.debug(
|
||||
'login navigated away from %s; returning!',
|
||||
page_url)
|
||||
self.navigate_to_page(page_url, timeout=300)
|
||||
self.navigate_to_page(page_url, timeout=page_timeout)
|
||||
if on_screenshot:
|
||||
jpeg_bytes = self.screenshot()
|
||||
on_screenshot(jpeg_bytes)
|
||||
|
@ -104,7 +104,8 @@ class BrozzlerWorker:
|
||||
def __init__(
|
||||
self, frontier, service_registry=None, max_browsers=1,
|
||||
chrome_exe="chromium-browser", warcprox_auto=False, proxy=None,
|
||||
skip_extract_outlinks=False, skip_visit_hashtags=False):
|
||||
skip_extract_outlinks=False, skip_visit_hashtags=False,
|
||||
page_timeout=300):
|
||||
self._frontier = frontier
|
||||
self._service_registry = service_registry
|
||||
self._max_browsers = max_browsers
|
||||
@ -115,6 +116,7 @@ class BrozzlerWorker:
|
||||
self._proxy_is_warcprox = None
|
||||
self._skip_extract_outlinks = skip_extract_outlinks
|
||||
self._skip_visit_hashtags = skip_visit_hashtags
|
||||
self._page_timeout = page_timeout
|
||||
|
||||
self._browser_pool = brozzler.browser.BrowserPool(
|
||||
max_browsers, chrome_exe=chrome_exe, ignore_cert_errors=True)
|
||||
@ -416,7 +418,8 @@ class BrozzlerWorker:
|
||||
on_screenshot=_on_screenshot, on_response=_on_response,
|
||||
on_request=on_request, hashtags=page.hashtags,
|
||||
skip_extract_outlinks=self._skip_extract_outlinks,
|
||||
skip_visit_hashtags=self._skip_visit_hashtags)
|
||||
skip_visit_hashtags=self._skip_visit_hashtags,
|
||||
page_timeout=self._page_timeout)
|
||||
if final_page_url != page.url:
|
||||
page.note_redirect(final_page_url)
|
||||
return outlinks
|
||||
|
Loading…
x
Reference in New Issue
Block a user