From eabb0fb114e5af634183db0a52a29f3caa9483b6 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Wed, 21 Dec 2016 18:35:55 -0800 Subject: [PATCH] restore support for on_response and on_request, with an automated test for on_response --- brozzler/browser.py | 29 ++++++++++++++- tests/htdocs/site3/brozzler.svg | 1 + tests/htdocs/site3/page.html | 9 +++++ tests/test_brozzling.py | 62 +++++++++++++++++++++++++++++++++ 4 files changed, 100 insertions(+), 1 deletion(-) create mode 120000 tests/htdocs/site3/brozzler.svg create mode 100644 tests/htdocs/site3/page.html diff --git a/brozzler/browser.py b/brozzler/browser.py index a947044..9765ab1 100644 --- a/brozzler/browser.py +++ b/brozzler/browser.py @@ -120,6 +120,9 @@ class WebsockReceiverThread(threading.Thread): self.is_open = False self.got_page_load_event = None + self.on_request = None + self.on_response = None + self._result_messages = {} def expect_result(self, msg_id): @@ -178,14 +181,32 @@ class WebsockReceiverThread(threading.Thread): # resume execution self.websock.send(json.dumps(dict(id=0, method='Debugger.resume'))) + def _network_response_received(self, message): + # if (not self._reached_limit + # and message['params']['response']['status'] == 420 + # and 'Warcprox-Meta' in CaseInsensitiveDict( + # message['params']['response']['headers'])): + # warcprox_meta = json.loads(CaseInsensitiveDict( + # message['params']['response']['headers'])['Warcprox-Meta']) + # self._reached_limit = brozzler.ReachedLimit( + # warcprox_meta=warcprox_meta) + # self.logger.info('reached limit %s', self._reached_limit) + if self.on_response: + self.on_response(message) + def _handle_message(self, websock, json_message): message = json.loads(json_message) if 'method' in message: if message['method'] == 'Page.loadEventFired': self.got_page_load_event = datetime.datetime.utcnow() + elif message['method'] == 'Network.responseReceived': + self._network_response_received(message) + elif message['method'] == 'Network.requestWillBeSent': + if self.on_request: + self.on_request(message) elif message['method'] == 'Debugger.paused': self._debugger_paused(message) - elif message["method"] == "Inspector.targetCrashed": + elif message['method'] == 'Inspector.targetCrashed': self.logger.error( '''chrome tab went "aw snap" or "he's dead jim"!''') brozzler.thread_raise(self.calling_thread, BrowsingException) @@ -375,6 +396,10 @@ class Browser: if self.is_browsing: raise BrowsingException('browser is already busy browsing a page') self.is_browsing = True + if on_request: + self.websock_thread.on_request = on_request + if on_response: + self.websock_thread.on_response = on_response try: self.navigate_to_page( page_url, extra_headers=extra_headers, @@ -402,6 +427,8 @@ class Browser: raise BrowsingException(e) finally: self.is_browsing = False + self.websock_thread.on_request = None + self.websock_thread.on_response = None def navigate_to_page( self, page_url, extra_headers=None, user_agent=None, timeout=300): diff --git a/tests/htdocs/site3/brozzler.svg b/tests/htdocs/site3/brozzler.svg new file mode 120000 index 0000000..5069ef8 --- /dev/null +++ b/tests/htdocs/site3/brozzler.svg @@ -0,0 +1 @@ +../../../brozzler/dashboard/static/brozzler.svg \ No newline at end of file diff --git a/tests/htdocs/site3/page.html b/tests/htdocs/site3/page.html new file mode 100644 index 0000000..64d4e1a --- /dev/null +++ b/tests/htdocs/site3/page.html @@ -0,0 +1,9 @@ + + + some simple html + + +

an image

+ + + diff --git a/tests/test_brozzling.py b/tests/test_brozzling.py index b41278d..ef7b0d5 100644 --- a/tests/test_brozzling.py +++ b/tests/test_brozzling.py @@ -19,9 +19,71 @@ limitations under the License. import pytest import brozzler +import logging +import os +import http.server +import threading +import argparse + +args = argparse.Namespace() +args.log_level = logging.INFO +brozzler.cli._configure_logging(args) + +@pytest.fixture(scope='module') +def httpd(request): + # SimpleHTTPRequestHandler always uses CWD so we have to chdir + os.chdir(os.path.join(os.path.dirname(__file__), 'htdocs')) + + httpd = http.server.HTTPServer( + ('localhost', 0), http.server.SimpleHTTPRequestHandler) + httpd_thread = threading.Thread(name='httpd', target=httpd.serve_forever) + httpd_thread.start() + + def fin(): + httpd.shutdown() + httpd.server_close() + httpd_thread.join() + request.addfinalizer(fin) + + return httpd + +def test_httpd(httpd): + ''' + Tests that our http server is working as expected, and that two fetches + of the same url return the same payload, proving it can be used to test + deduplication. + ''' + payload1 = content2 = None + url = 'http://localhost:%s/site1/file1.txt' % httpd.server_port + with urllib.request.urlopen(url) as response: + assert response.status == 200 + payload1 = response.read() + assert payload1 + + with urllib.request.urlopen(url) as response: + assert response.status == 200 + payload2 = response.read() + assert payload2 + + assert payload1 == payload2 def test_aw_snap_hes_dead_jim(): chrome_exe = brozzler.suggest_default_chrome_exe() with brozzler.Browser(chrome_exe=chrome_exe) as browser: with pytest.raises(brozzler.BrowsingException): browser.browse_page('chrome://crash') + +def test_on_response(httpd): + response_urls = [] + def on_response(msg): + response_urls.append(msg['params']['response']['url']) + + chrome_exe = brozzler.suggest_default_chrome_exe() + url = 'http://localhost:%s/site3/page.html' % httpd.server_port + with brozzler.Browser(chrome_exe=chrome_exe) as browser: + browser.browse_page(url, on_response=on_response) + browser.browse_page(url) + assert response_urls[0] == 'http://localhost:%s/site3/page.html' % httpd.server_port + assert response_urls[1] == 'http://localhost:%s/site3/brozzler.svg' % httpd.server_port + assert response_urls[2] == 'http://localhost:%s/favicon.ico' % httpd.server_port +