Merge branch 'master' into qa

* master:
  bump up timeout waiting for websocket connection
  try taking screenshot 3 times, proceed on failure
This commit is contained in:
Noah Levitt 2018-02-14 12:33:53 -08:00
commit 3d12daea06
3 changed files with 15 additions and 7 deletions

View File

@ -1,7 +1,7 @@
'''
brozzler/browser.py - manages the browsers for brozzler
Copyright (C) 2014-2017 Internet Archive
Copyright (C) 2014-2018 Internet Archive
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -335,7 +335,7 @@ class Browser:
self.websock, name='WebsockThread:%s' % self.chrome.port)
self.websock_thread.start()
self._wait_for(lambda: self.websock_thread.is_open, timeout=10)
self._wait_for(lambda: self.websock_thread.is_open, timeout=30)
# tell browser to send us messages we're interested in
self.send_to_chrome(method='Network.enable')
@ -460,8 +460,7 @@ class Browser:
page_url)
self.navigate_to_page(page_url, timeout=page_timeout)
if on_screenshot:
jpeg_bytes = self.screenshot()
on_screenshot(jpeg_bytes)
self._try_screenshot(on_screenshot)
behavior_script, behavior_timeout_custom = brozzler.behavior_script(
page_url, behavior_parameters,
behaviors_dir=behaviors_dir)
@ -488,6 +487,15 @@ class Browser:
self.websock_thread.on_request = None
self.websock_thread.on_response = None
def _try_screenshot(self, on_screenshot):
for i in range(3):
try:
jpeg_bytes = self.screenshot()
on_screenshot(jpeg_bytes)
return
except BrowsingTimeout as e:
logging.error('attempt %s/3: %s', i+1, e)
def visit_hashtags(self, page_url, hashtags, outlinks):
_hashtags = set(hashtags or [])
for outlink in outlinks:
@ -558,7 +566,7 @@ class Browser:
'problem extracting outlinks, result message: %s', message)
return frozenset()
def screenshot(self, timeout=90):
def screenshot(self, timeout=45):
self.logger.info('taking screenshot')
self.websock_thread.expect_result(self._command_id.peek())
msg_id = self.send_to_chrome(method='Page.captureScreenshot')

View File

@ -3,7 +3,7 @@ brozzler/worker.py - BrozzlerWorker brozzles pages from the frontier, meaning
it runs youtube-dl on them, browses them and runs behaviors if appropriate,
scopes and adds outlinks to the frontier
Copyright (C) 2014-2017 Internet Archive
Copyright (C) 2014-2018 Internet Archive
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup(
name='brozzler',
version='1.1b13.dev285',
version='1.1b13.dev286',
description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler',
author='Noah Levitt',