mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
updates per Noah's review
This commit is contained in:
parent
830b0eef89
commit
a0330d9716
@ -429,17 +429,18 @@ class Browser:
|
|||||||
self.websock_thread.on_response = on_response
|
self.websock_thread.on_response = on_response
|
||||||
try:
|
try:
|
||||||
with brozzler.thread_accept_exceptions():
|
with brozzler.thread_accept_exceptions():
|
||||||
self.navigate_to_page(
|
self.configure_browser(
|
||||||
page_url, extra_headers=extra_headers,
|
extra_headers=extra_headers,
|
||||||
user_agent=user_agent, timeout=300)
|
user_agent=user_agent)
|
||||||
|
self.navigate_to_page(page_url, timeout=300)
|
||||||
if password:
|
if password:
|
||||||
self.try_login(username, password, timeout=300)
|
self.try_login(username, password, timeout=300)
|
||||||
# if login redirected us, return to page_url
|
# if login redirected us, return to page_url
|
||||||
if page_url != self.url().split('#')[0]:
|
if page_url != self.url().split('#')[0]:
|
||||||
self.logger.info('login navigated to %s, away from %s; returning!', self.url(), page_url)
|
self.logger.debug(
|
||||||
self.navigate_to_page(
|
'login navigated away from %s; returning!',
|
||||||
page_url, extra_headers=extra_headers,
|
page_url)
|
||||||
user_agent=user_agent, timeout=300)
|
self.navigate_to_page(page_url, timeout=300)
|
||||||
if on_screenshot:
|
if on_screenshot:
|
||||||
jpeg_bytes = self.screenshot()
|
jpeg_bytes = self.screenshot()
|
||||||
on_screenshot(jpeg_bytes)
|
on_screenshot(jpeg_bytes)
|
||||||
@ -485,8 +486,7 @@ class Browser:
|
|||||||
# run behavior again with short timeout?
|
# run behavior again with short timeout?
|
||||||
# retrieve outlinks again and append to list?
|
# retrieve outlinks again and append to list?
|
||||||
|
|
||||||
def navigate_to_page(
|
def configure_browser(self, extra_headers=None, user_agent=None):
|
||||||
self, page_url, extra_headers=None, user_agent=None, timeout=300):
|
|
||||||
headers = extra_headers or {}
|
headers = extra_headers or {}
|
||||||
headers['Accept-Encoding'] = 'identity'
|
headers['Accept-Encoding'] = 'identity'
|
||||||
self.send_to_chrome(
|
self.send_to_chrome(
|
||||||
@ -498,7 +498,7 @@ class Browser:
|
|||||||
method='Network.setUserAgentOverride',
|
method='Network.setUserAgentOverride',
|
||||||
params={'userAgent': user_agent})
|
params={'userAgent': user_agent})
|
||||||
|
|
||||||
# navigate to the page!
|
def navigate_to_page(self, page_url, timeout=300):
|
||||||
self.logger.info('navigating to page %s', page_url)
|
self.logger.info('navigating to page %s', page_url)
|
||||||
self.websock_thread.got_page_load_event = None
|
self.websock_thread.got_page_load_event = None
|
||||||
self.send_to_chrome(method='Page.navigate', params={'url': page_url})
|
self.send_to_chrome(method='Page.navigate', params={'url': page_url})
|
||||||
|
Loading…
x
Reference in New Issue
Block a user