mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-07 22:12:15 -04:00
wait for ack from browser setting request headers
guessing this might fix the issue where some requests are missing the warcprox-meta header, which results in their being written to the wrong warc
This commit is contained in:
parent
1dea1f3f93
commit
cc6297ef60
2 changed files with 7 additions and 4 deletions
|
@ -495,12 +495,15 @@ class Browser:
|
||||||
def configure_browser(self, extra_headers=None, user_agent=None):
|
def configure_browser(self, extra_headers=None, user_agent=None):
|
||||||
headers = extra_headers or {}
|
headers = extra_headers or {}
|
||||||
headers['Accept-Encoding'] = 'gzip' # avoid encodings br, sdch
|
headers['Accept-Encoding'] = 'gzip' # avoid encodings br, sdch
|
||||||
self.send_to_chrome(
|
self.websock_thread.expect_result(self._command_id.peek())
|
||||||
|
msg_id = self.send_to_chrome(
|
||||||
method='Network.setExtraHTTPHeaders',
|
method='Network.setExtraHTTPHeaders',
|
||||||
params={'headers': headers})
|
params={'headers': headers})
|
||||||
|
self._wait_for(
|
||||||
|
lambda: self.websock_thread.received_result(msg_id),
|
||||||
|
timeout=10)
|
||||||
if user_agent:
|
if user_agent:
|
||||||
self.send_to_chrome(
|
msg_id = self.send_to_chrome(
|
||||||
method='Network.setUserAgentOverride',
|
method='Network.setUserAgentOverride',
|
||||||
params={'userAgent': user_agent})
|
params={'userAgent': user_agent})
|
||||||
|
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -32,7 +32,7 @@ def find_package_data(package):
|
||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.1b12.dev274',
|
version='1.1b12.dev275',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue