mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-23 08:09:48 -05:00
restore support for on_response and on_request, with an automated test for on_response
This commit is contained in:
parent
c90c73372e
commit
eabb0fb114
@ -120,6 +120,9 @@ class WebsockReceiverThread(threading.Thread):
|
||||
self.is_open = False
|
||||
self.got_page_load_event = None
|
||||
|
||||
self.on_request = None
|
||||
self.on_response = None
|
||||
|
||||
self._result_messages = {}
|
||||
|
||||
def expect_result(self, msg_id):
|
||||
@ -178,14 +181,32 @@ class WebsockReceiverThread(threading.Thread):
|
||||
# resume execution
|
||||
self.websock.send(json.dumps(dict(id=0, method='Debugger.resume')))
|
||||
|
||||
def _network_response_received(self, message):
|
||||
# if (not self._reached_limit
|
||||
# and message['params']['response']['status'] == 420
|
||||
# and 'Warcprox-Meta' in CaseInsensitiveDict(
|
||||
# message['params']['response']['headers'])):
|
||||
# warcprox_meta = json.loads(CaseInsensitiveDict(
|
||||
# message['params']['response']['headers'])['Warcprox-Meta'])
|
||||
# self._reached_limit = brozzler.ReachedLimit(
|
||||
# warcprox_meta=warcprox_meta)
|
||||
# self.logger.info('reached limit %s', self._reached_limit)
|
||||
if self.on_response:
|
||||
self.on_response(message)
|
||||
|
||||
def _handle_message(self, websock, json_message):
|
||||
message = json.loads(json_message)
|
||||
if 'method' in message:
|
||||
if message['method'] == 'Page.loadEventFired':
|
||||
self.got_page_load_event = datetime.datetime.utcnow()
|
||||
elif message['method'] == 'Network.responseReceived':
|
||||
self._network_response_received(message)
|
||||
elif message['method'] == 'Network.requestWillBeSent':
|
||||
if self.on_request:
|
||||
self.on_request(message)
|
||||
elif message['method'] == 'Debugger.paused':
|
||||
self._debugger_paused(message)
|
||||
elif message["method"] == "Inspector.targetCrashed":
|
||||
elif message['method'] == 'Inspector.targetCrashed':
|
||||
self.logger.error(
|
||||
'''chrome tab went "aw snap" or "he's dead jim"!''')
|
||||
brozzler.thread_raise(self.calling_thread, BrowsingException)
|
||||
@ -375,6 +396,10 @@ class Browser:
|
||||
if self.is_browsing:
|
||||
raise BrowsingException('browser is already busy browsing a page')
|
||||
self.is_browsing = True
|
||||
if on_request:
|
||||
self.websock_thread.on_request = on_request
|
||||
if on_response:
|
||||
self.websock_thread.on_response = on_response
|
||||
try:
|
||||
self.navigate_to_page(
|
||||
page_url, extra_headers=extra_headers,
|
||||
@ -402,6 +427,8 @@ class Browser:
|
||||
raise BrowsingException(e)
|
||||
finally:
|
||||
self.is_browsing = False
|
||||
self.websock_thread.on_request = None
|
||||
self.websock_thread.on_response = None
|
||||
|
||||
def navigate_to_page(
|
||||
self, page_url, extra_headers=None, user_agent=None, timeout=300):
|
||||
|
1
tests/htdocs/site3/brozzler.svg
Symbolic link
1
tests/htdocs/site3/brozzler.svg
Symbolic link
@ -0,0 +1 @@
|
||||
../../../brozzler/dashboard/static/brozzler.svg
|
9
tests/htdocs/site3/page.html
Normal file
9
tests/htdocs/site3/page.html
Normal file
@ -0,0 +1,9 @@
|
||||
<html>
|
||||
<head>
|
||||
<title> some simple html </title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>an image</h1>
|
||||
<img src='brozzler.svg'>
|
||||
</body>
|
||||
</html>
|
@ -19,9 +19,71 @@ limitations under the License.
|
||||
|
||||
import pytest
|
||||
import brozzler
|
||||
import logging
|
||||
import os
|
||||
import http.server
|
||||
import threading
|
||||
import argparse
|
||||
|
||||
args = argparse.Namespace()
|
||||
args.log_level = logging.INFO
|
||||
brozzler.cli._configure_logging(args)
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def httpd(request):
|
||||
# SimpleHTTPRequestHandler always uses CWD so we have to chdir
|
||||
os.chdir(os.path.join(os.path.dirname(__file__), 'htdocs'))
|
||||
|
||||
httpd = http.server.HTTPServer(
|
||||
('localhost', 0), http.server.SimpleHTTPRequestHandler)
|
||||
httpd_thread = threading.Thread(name='httpd', target=httpd.serve_forever)
|
||||
httpd_thread.start()
|
||||
|
||||
def fin():
|
||||
httpd.shutdown()
|
||||
httpd.server_close()
|
||||
httpd_thread.join()
|
||||
request.addfinalizer(fin)
|
||||
|
||||
return httpd
|
||||
|
||||
def test_httpd(httpd):
|
||||
'''
|
||||
Tests that our http server is working as expected, and that two fetches
|
||||
of the same url return the same payload, proving it can be used to test
|
||||
deduplication.
|
||||
'''
|
||||
payload1 = content2 = None
|
||||
url = 'http://localhost:%s/site1/file1.txt' % httpd.server_port
|
||||
with urllib.request.urlopen(url) as response:
|
||||
assert response.status == 200
|
||||
payload1 = response.read()
|
||||
assert payload1
|
||||
|
||||
with urllib.request.urlopen(url) as response:
|
||||
assert response.status == 200
|
||||
payload2 = response.read()
|
||||
assert payload2
|
||||
|
||||
assert payload1 == payload2
|
||||
|
||||
def test_aw_snap_hes_dead_jim():
|
||||
chrome_exe = brozzler.suggest_default_chrome_exe()
|
||||
with brozzler.Browser(chrome_exe=chrome_exe) as browser:
|
||||
with pytest.raises(brozzler.BrowsingException):
|
||||
browser.browse_page('chrome://crash')
|
||||
|
||||
def test_on_response(httpd):
|
||||
response_urls = []
|
||||
def on_response(msg):
|
||||
response_urls.append(msg['params']['response']['url'])
|
||||
|
||||
chrome_exe = brozzler.suggest_default_chrome_exe()
|
||||
url = 'http://localhost:%s/site3/page.html' % httpd.server_port
|
||||
with brozzler.Browser(chrome_exe=chrome_exe) as browser:
|
||||
browser.browse_page(url, on_response=on_response)
|
||||
browser.browse_page(url)
|
||||
assert response_urls[0] == 'http://localhost:%s/site3/page.html' % httpd.server_port
|
||||
assert response_urls[1] == 'http://localhost:%s/site3/brozzler.svg' % httpd.server_port
|
||||
assert response_urls[2] == 'http://localhost:%s/favicon.ico' % httpd.server_port
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user