diff --git a/bin/browse-url b/bin/browse-url index ebfa906..e979fda 100755 --- a/bin/browse-url +++ b/bin/browse-url @@ -6,6 +6,8 @@ import os import sys import logging import umbra +import re +import datetime arg_parser = argparse.ArgumentParser(prog=os.path.basename(__file__), description='browse-url - open urls in chrome/chromium and run behaviors', @@ -24,7 +26,15 @@ args = arg_parser.parse_args(args=sys.argv[1:]) logging.basicConfig(stream=sys.stdout, level=args.log_level, format='%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s') + with umbra.Browser(chrome_exe=args.chrome_exe) as browser: for url in args.urls: - browser.browse_page(url) + + def on_screenshot(screenshot_png): + filename = "{}-{:%Y%m%d%H%M%S}.png".format(re.sub(r"\W", "_", url), datetime.datetime.now()) + with open(filename, mode='wb') as png_out: + png_out.write(screenshot_png) + logging.info("wrote screenshot to {}".format(filename)) + + browser.browse_page(url, on_screenshot=on_screenshot) diff --git a/umbra/browser.py b/umbra/browser.py index c72bd3e..3c09166 100644 --- a/umbra/browser.py +++ b/umbra/browser.py @@ -13,6 +13,7 @@ import signal import tempfile import os import socket +import base64 from umbra.behaviors import Behavior class BrowserPool: @@ -94,7 +95,7 @@ class Browser: def abort_browse_page(self): self._abort_browse_page = True - def browse_page(self, url, on_request=None): + def browse_page(self, url, on_request=None, on_screenshot=None): """Synchronously browses a page and runs behaviors. Raises BrowsingException if browsing the page fails in a non-critical @@ -102,6 +103,8 @@ class Browser: """ self.url = url self.on_request = on_request + self.on_screenshot = on_screenshot + self._waiting_on_screenshot_msg_id = None self._websock = websocket.WebSocketApp(self._websocket_url, on_open=self._visit_page, on_message=self._handle_message) @@ -128,6 +131,8 @@ class Browser: elif self._abort_browse_page: raise BrowsingException("browsing page aborted") finally: + self.capture_screenshot() + if self._websock and self._websock.sock and self._websock.sock.connected: try: self._websock.close() @@ -144,6 +149,10 @@ class Browser: self._behavior = None + + def capture_screenshot(self): + time.sleep(10) + def send_to_chrome(self, suppress_logging=False, **kwargs): msg_id = next(self.command_id) kwargs['id'] = msg_id @@ -178,14 +187,8 @@ class Browser: elif self.on_request: self.on_request(message) elif "method" in message and message["method"] == "Page.loadEventFired": - if self._behavior is None: - self.logger.info("Page.loadEventFired, starting behaviors url={} message={}".format(self.url, message)) - self._behavior = Behavior(self.url, self) - self._behavior.start() - else: - self.logger.warn("Page.loadEventFired again, perhaps original url had a meta refresh, or behaviors accidentally navigated to another page? starting behaviors again url={} message={}".format(self.url, message)) - self._behavior = Behavior(self.url, self) - self._behavior.start() + self.logger.info("Page.loadEventFired, requesting screenshot url={} message={}".format(self.url, message)) + self._waiting_on_screenshot_msg_id = self.send_to_chrome(method="Page.captureScreenshot") elif "method" in message and message["method"] == "Console.messageAdded": self.logger.debug("{} console.{} {}".format(websock.url, message["params"]["message"]["level"], @@ -203,7 +206,15 @@ class Browser: # resume execution self.send_to_chrome(method="Debugger.resume") elif "result" in message: - if self._behavior and self._behavior.is_waiting_on_result(message['id']): + if message["id"] == self._waiting_on_screenshot_msg_id: + if self.on_screenshot: + self.on_screenshot(base64.b64decode(message["result"]["data"])) + self._waiting_on_screenshot_msg_id = None + + self.logger.info("got screenshot, moving on to starting behaviors url={}".format(self.url)) + self._behavior = Behavior(self.url, self) + self._behavior.start() + elif self._behavior and self._behavior.is_waiting_on_result(message["id"]): self._behavior.notify_of_result(message) # elif "method" in message and message["method"] in ("Network.dataReceived", "Network.responseReceived", "Network.loadingFinished"): # pass @@ -212,7 +223,6 @@ class Browser: # else: # self.logger.debug("[no-method] {}".format(message)) - class Chrome: logger = logging.getLogger(__module__ + "." + __qualname__)