mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-06-20 04:44:12 -04:00
take screenshot before running behavior (but after login) - thanks danielbicho
This commit is contained in:
parent
e58f4b7c44
commit
1054e8e3cb
2 changed files with 4 additions and 16 deletions
|
@ -426,13 +426,12 @@ class Browser:
|
||||||
user_agent=user_agent, timeout=300)
|
user_agent=user_agent, timeout=300)
|
||||||
if password:
|
if password:
|
||||||
self.try_login(username, password, timeout=300)
|
self.try_login(username, password, timeout=300)
|
||||||
|
if on_screenshot:
|
||||||
|
jpeg_bytes = self.screenshot()
|
||||||
|
on_screenshot(jpeg_bytes)
|
||||||
behavior_script = brozzler.behavior_script(
|
behavior_script = brozzler.behavior_script(
|
||||||
page_url, behavior_parameters)
|
page_url, behavior_parameters)
|
||||||
self.run_behavior(behavior_script, timeout=900)
|
self.run_behavior(behavior_script, timeout=900)
|
||||||
if on_screenshot:
|
|
||||||
self.scroll_to_top()
|
|
||||||
jpeg_bytes = self.screenshot()
|
|
||||||
on_screenshot(jpeg_bytes)
|
|
||||||
outlinks = self.extract_outlinks()
|
outlinks = self.extract_outlinks()
|
||||||
## for each hashtag not already visited:
|
## for each hashtag not already visited:
|
||||||
## navigate_to_hashtag (nothing to wait for so no timeout?)
|
## navigate_to_hashtag (nothing to wait for so no timeout?)
|
||||||
|
@ -502,17 +501,6 @@ class Browser:
|
||||||
jpeg_bytes = base64.b64decode(message['result']['data'])
|
jpeg_bytes = base64.b64decode(message['result']['data'])
|
||||||
return jpeg_bytes
|
return jpeg_bytes
|
||||||
|
|
||||||
def scroll_to_top(self, timeout=30):
|
|
||||||
self.logger.info('scrolling to top')
|
|
||||||
self.websock_thread.expect_result(self._command_id.peek())
|
|
||||||
msg_id = self.send_to_chrome(
|
|
||||||
method='Runtime.evaluate',
|
|
||||||
params={'expression': 'window.scrollTo(0, 0);'})
|
|
||||||
self._wait_for(
|
|
||||||
lambda: self.websock_thread.received_result(msg_id),
|
|
||||||
timeout=timeout)
|
|
||||||
self.websock_thread.pop_result(msg_id)
|
|
||||||
|
|
||||||
def url(self, timeout=30):
|
def url(self, timeout=30):
|
||||||
'''
|
'''
|
||||||
Returns value of document.URL from the browser.
|
Returns value of document.URL from the browser.
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -32,7 +32,7 @@ def find_package_data(package):
|
||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.1b9.dev186',
|
version='1.1b9.dev187',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue