wait for ack from browser setting request headers

guessing this might fix the issue where some requests are missing the
warcprox-meta header, which results in their being written to the wrong
warc
This commit is contained in:
Noah Levitt 2017-12-27 14:43:26 -08:00
parent 1dea1f3f93
commit cc6297ef60
2 changed files with 7 additions and 4 deletions

View File

@ -495,12 +495,15 @@ class Browser:
def configure_browser(self, extra_headers=None, user_agent=None):
headers = extra_headers or {}
headers['Accept-Encoding'] = 'gzip' # avoid encodings br, sdch
self.send_to_chrome(
self.websock_thread.expect_result(self._command_id.peek())
msg_id = self.send_to_chrome(
method='Network.setExtraHTTPHeaders',
params={'headers': headers})
self._wait_for(
lambda: self.websock_thread.received_result(msg_id),
timeout=10)
if user_agent:
self.send_to_chrome(
msg_id = self.send_to_chrome(
method='Network.setUserAgentOverride',
params={'userAgent': user_agent})

View File

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup(
name='brozzler',
version='1.1b12.dev274',
version='1.1b12.dev275',
description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler',
author='Noah Levitt',