experimenting with captureScreenshot

This commit is contained in:
Noah Levitt 2015-06-16 18:42:21 -07:00
parent f254e2eec1
commit d8a962b29e
2 changed files with 32 additions and 12 deletions

View File

@ -6,6 +6,8 @@ import os
import sys import sys
import logging import logging
import umbra import umbra
import re
import datetime
arg_parser = argparse.ArgumentParser(prog=os.path.basename(__file__), arg_parser = argparse.ArgumentParser(prog=os.path.basename(__file__),
description='browse-url - open urls in chrome/chromium and run behaviors', description='browse-url - open urls in chrome/chromium and run behaviors',
@ -24,7 +26,15 @@ args = arg_parser.parse_args(args=sys.argv[1:])
logging.basicConfig(stream=sys.stdout, level=args.log_level, logging.basicConfig(stream=sys.stdout, level=args.log_level,
format='%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s') format='%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s')
with umbra.Browser(chrome_exe=args.chrome_exe) as browser: with umbra.Browser(chrome_exe=args.chrome_exe) as browser:
for url in args.urls: for url in args.urls:
browser.browse_page(url)
def on_screenshot(screenshot_png):
filename = "{}-{:%Y%m%d%H%M%S}.png".format(re.sub(r"\W", "_", url), datetime.datetime.now())
with open(filename, mode='wb') as png_out:
png_out.write(screenshot_png)
logging.info("wrote screenshot to {}".format(filename))
browser.browse_page(url, on_screenshot=on_screenshot)

View File

@ -13,6 +13,7 @@ import signal
import tempfile import tempfile
import os import os
import socket import socket
import base64
from umbra.behaviors import Behavior from umbra.behaviors import Behavior
class BrowserPool: class BrowserPool:
@ -94,7 +95,7 @@ class Browser:
def abort_browse_page(self): def abort_browse_page(self):
self._abort_browse_page = True self._abort_browse_page = True
def browse_page(self, url, on_request=None): def browse_page(self, url, on_request=None, on_screenshot=None):
"""Synchronously browses a page and runs behaviors. """Synchronously browses a page and runs behaviors.
Raises BrowsingException if browsing the page fails in a non-critical Raises BrowsingException if browsing the page fails in a non-critical
@ -102,6 +103,8 @@ class Browser:
""" """
self.url = url self.url = url
self.on_request = on_request self.on_request = on_request
self.on_screenshot = on_screenshot
self._waiting_on_screenshot_msg_id = None
self._websock = websocket.WebSocketApp(self._websocket_url, self._websock = websocket.WebSocketApp(self._websocket_url,
on_open=self._visit_page, on_message=self._handle_message) on_open=self._visit_page, on_message=self._handle_message)
@ -128,6 +131,8 @@ class Browser:
elif self._abort_browse_page: elif self._abort_browse_page:
raise BrowsingException("browsing page aborted") raise BrowsingException("browsing page aborted")
finally: finally:
self.capture_screenshot()
if self._websock and self._websock.sock and self._websock.sock.connected: if self._websock and self._websock.sock and self._websock.sock.connected:
try: try:
self._websock.close() self._websock.close()
@ -144,6 +149,10 @@ class Browser:
self._behavior = None self._behavior = None
def capture_screenshot(self):
time.sleep(10)
def send_to_chrome(self, suppress_logging=False, **kwargs): def send_to_chrome(self, suppress_logging=False, **kwargs):
msg_id = next(self.command_id) msg_id = next(self.command_id)
kwargs['id'] = msg_id kwargs['id'] = msg_id
@ -178,14 +187,8 @@ class Browser:
elif self.on_request: elif self.on_request:
self.on_request(message) self.on_request(message)
elif "method" in message and message["method"] == "Page.loadEventFired": elif "method" in message and message["method"] == "Page.loadEventFired":
if self._behavior is None: self.logger.info("Page.loadEventFired, requesting screenshot url={} message={}".format(self.url, message))
self.logger.info("Page.loadEventFired, starting behaviors url={} message={}".format(self.url, message)) self._waiting_on_screenshot_msg_id = self.send_to_chrome(method="Page.captureScreenshot")
self._behavior = Behavior(self.url, self)
self._behavior.start()
else:
self.logger.warn("Page.loadEventFired again, perhaps original url had a meta refresh, or behaviors accidentally navigated to another page? starting behaviors again url={} message={}".format(self.url, message))
self._behavior = Behavior(self.url, self)
self._behavior.start()
elif "method" in message and message["method"] == "Console.messageAdded": elif "method" in message and message["method"] == "Console.messageAdded":
self.logger.debug("{} console.{} {}".format(websock.url, self.logger.debug("{} console.{} {}".format(websock.url,
message["params"]["message"]["level"], message["params"]["message"]["level"],
@ -203,7 +206,15 @@ class Browser:
# resume execution # resume execution
self.send_to_chrome(method="Debugger.resume") self.send_to_chrome(method="Debugger.resume")
elif "result" in message: elif "result" in message:
if self._behavior and self._behavior.is_waiting_on_result(message['id']): if message["id"] == self._waiting_on_screenshot_msg_id:
if self.on_screenshot:
self.on_screenshot(base64.b64decode(message["result"]["data"]))
self._waiting_on_screenshot_msg_id = None
self.logger.info("got screenshot, moving on to starting behaviors url={}".format(self.url))
self._behavior = Behavior(self.url, self)
self._behavior.start()
elif self._behavior and self._behavior.is_waiting_on_result(message["id"]):
self._behavior.notify_of_result(message) self._behavior.notify_of_result(message)
# elif "method" in message and message["method"] in ("Network.dataReceived", "Network.responseReceived", "Network.loadingFinished"): # elif "method" in message and message["method"] in ("Network.dataReceived", "Network.responseReceived", "Network.loadingFinished"):
# pass # pass
@ -212,7 +223,6 @@ class Browser:
# else: # else:
# self.logger.debug("[no-method] {}".format(message)) # self.logger.debug("[no-method] {}".format(message))
class Chrome: class Chrome:
logger = logging.getLogger(__module__ + "." + __qualname__) logger = logging.getLogger(__module__ + "." + __qualname__)