mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-07 14:02:24 -04:00
wait for ack from browser setting request headers
guessing this might fix the issue where some requests are missing the warcprox-meta header, which results in their being written to the wrong warc
This commit is contained in:
parent
1dea1f3f93
commit
cc6297ef60
2 changed files with 7 additions and 4 deletions
|
@ -495,12 +495,15 @@ class Browser:
|
|||
def configure_browser(self, extra_headers=None, user_agent=None):
|
||||
headers = extra_headers or {}
|
||||
headers['Accept-Encoding'] = 'gzip' # avoid encodings br, sdch
|
||||
self.send_to_chrome(
|
||||
self.websock_thread.expect_result(self._command_id.peek())
|
||||
msg_id = self.send_to_chrome(
|
||||
method='Network.setExtraHTTPHeaders',
|
||||
params={'headers': headers})
|
||||
|
||||
self._wait_for(
|
||||
lambda: self.websock_thread.received_result(msg_id),
|
||||
timeout=10)
|
||||
if user_agent:
|
||||
self.send_to_chrome(
|
||||
msg_id = self.send_to_chrome(
|
||||
method='Network.setUserAgentOverride',
|
||||
params={'userAgent': user_agent})
|
||||
|
||||
|
|
2
setup.py
2
setup.py
|
@ -32,7 +32,7 @@ def find_package_data(package):
|
|||
|
||||
setuptools.setup(
|
||||
name='brozzler',
|
||||
version='1.1b12.dev274',
|
||||
version='1.1b12.dev275',
|
||||
description='Distributed web crawling with browsers',
|
||||
url='https://github.com/internetarchive/brozzler',
|
||||
author='Noah Levitt',
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue