mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 00:29:53 -05:00
experimenting with captureScreenshot
This commit is contained in:
parent
f254e2eec1
commit
d8a962b29e
@ -6,6 +6,8 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
import logging
|
import logging
|
||||||
import umbra
|
import umbra
|
||||||
|
import re
|
||||||
|
import datetime
|
||||||
|
|
||||||
arg_parser = argparse.ArgumentParser(prog=os.path.basename(__file__),
|
arg_parser = argparse.ArgumentParser(prog=os.path.basename(__file__),
|
||||||
description='browse-url - open urls in chrome/chromium and run behaviors',
|
description='browse-url - open urls in chrome/chromium and run behaviors',
|
||||||
@ -24,7 +26,15 @@ args = arg_parser.parse_args(args=sys.argv[1:])
|
|||||||
logging.basicConfig(stream=sys.stdout, level=args.log_level,
|
logging.basicConfig(stream=sys.stdout, level=args.log_level,
|
||||||
format='%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s')
|
format='%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s')
|
||||||
|
|
||||||
|
|
||||||
with umbra.Browser(chrome_exe=args.chrome_exe) as browser:
|
with umbra.Browser(chrome_exe=args.chrome_exe) as browser:
|
||||||
for url in args.urls:
|
for url in args.urls:
|
||||||
browser.browse_page(url)
|
|
||||||
|
def on_screenshot(screenshot_png):
|
||||||
|
filename = "{}-{:%Y%m%d%H%M%S}.png".format(re.sub(r"\W", "_", url), datetime.datetime.now())
|
||||||
|
with open(filename, mode='wb') as png_out:
|
||||||
|
png_out.write(screenshot_png)
|
||||||
|
logging.info("wrote screenshot to {}".format(filename))
|
||||||
|
|
||||||
|
browser.browse_page(url, on_screenshot=on_screenshot)
|
||||||
|
|
||||||
|
@ -13,6 +13,7 @@ import signal
|
|||||||
import tempfile
|
import tempfile
|
||||||
import os
|
import os
|
||||||
import socket
|
import socket
|
||||||
|
import base64
|
||||||
from umbra.behaviors import Behavior
|
from umbra.behaviors import Behavior
|
||||||
|
|
||||||
class BrowserPool:
|
class BrowserPool:
|
||||||
@ -94,7 +95,7 @@ class Browser:
|
|||||||
def abort_browse_page(self):
|
def abort_browse_page(self):
|
||||||
self._abort_browse_page = True
|
self._abort_browse_page = True
|
||||||
|
|
||||||
def browse_page(self, url, on_request=None):
|
def browse_page(self, url, on_request=None, on_screenshot=None):
|
||||||
"""Synchronously browses a page and runs behaviors.
|
"""Synchronously browses a page and runs behaviors.
|
||||||
|
|
||||||
Raises BrowsingException if browsing the page fails in a non-critical
|
Raises BrowsingException if browsing the page fails in a non-critical
|
||||||
@ -102,6 +103,8 @@ class Browser:
|
|||||||
"""
|
"""
|
||||||
self.url = url
|
self.url = url
|
||||||
self.on_request = on_request
|
self.on_request = on_request
|
||||||
|
self.on_screenshot = on_screenshot
|
||||||
|
self._waiting_on_screenshot_msg_id = None
|
||||||
|
|
||||||
self._websock = websocket.WebSocketApp(self._websocket_url,
|
self._websock = websocket.WebSocketApp(self._websocket_url,
|
||||||
on_open=self._visit_page, on_message=self._handle_message)
|
on_open=self._visit_page, on_message=self._handle_message)
|
||||||
@ -128,6 +131,8 @@ class Browser:
|
|||||||
elif self._abort_browse_page:
|
elif self._abort_browse_page:
|
||||||
raise BrowsingException("browsing page aborted")
|
raise BrowsingException("browsing page aborted")
|
||||||
finally:
|
finally:
|
||||||
|
self.capture_screenshot()
|
||||||
|
|
||||||
if self._websock and self._websock.sock and self._websock.sock.connected:
|
if self._websock and self._websock.sock and self._websock.sock.connected:
|
||||||
try:
|
try:
|
||||||
self._websock.close()
|
self._websock.close()
|
||||||
@ -144,6 +149,10 @@ class Browser:
|
|||||||
|
|
||||||
self._behavior = None
|
self._behavior = None
|
||||||
|
|
||||||
|
|
||||||
|
def capture_screenshot(self):
|
||||||
|
time.sleep(10)
|
||||||
|
|
||||||
def send_to_chrome(self, suppress_logging=False, **kwargs):
|
def send_to_chrome(self, suppress_logging=False, **kwargs):
|
||||||
msg_id = next(self.command_id)
|
msg_id = next(self.command_id)
|
||||||
kwargs['id'] = msg_id
|
kwargs['id'] = msg_id
|
||||||
@ -178,14 +187,8 @@ class Browser:
|
|||||||
elif self.on_request:
|
elif self.on_request:
|
||||||
self.on_request(message)
|
self.on_request(message)
|
||||||
elif "method" in message and message["method"] == "Page.loadEventFired":
|
elif "method" in message and message["method"] == "Page.loadEventFired":
|
||||||
if self._behavior is None:
|
self.logger.info("Page.loadEventFired, requesting screenshot url={} message={}".format(self.url, message))
|
||||||
self.logger.info("Page.loadEventFired, starting behaviors url={} message={}".format(self.url, message))
|
self._waiting_on_screenshot_msg_id = self.send_to_chrome(method="Page.captureScreenshot")
|
||||||
self._behavior = Behavior(self.url, self)
|
|
||||||
self._behavior.start()
|
|
||||||
else:
|
|
||||||
self.logger.warn("Page.loadEventFired again, perhaps original url had a meta refresh, or behaviors accidentally navigated to another page? starting behaviors again url={} message={}".format(self.url, message))
|
|
||||||
self._behavior = Behavior(self.url, self)
|
|
||||||
self._behavior.start()
|
|
||||||
elif "method" in message and message["method"] == "Console.messageAdded":
|
elif "method" in message and message["method"] == "Console.messageAdded":
|
||||||
self.logger.debug("{} console.{} {}".format(websock.url,
|
self.logger.debug("{} console.{} {}".format(websock.url,
|
||||||
message["params"]["message"]["level"],
|
message["params"]["message"]["level"],
|
||||||
@ -203,7 +206,15 @@ class Browser:
|
|||||||
# resume execution
|
# resume execution
|
||||||
self.send_to_chrome(method="Debugger.resume")
|
self.send_to_chrome(method="Debugger.resume")
|
||||||
elif "result" in message:
|
elif "result" in message:
|
||||||
if self._behavior and self._behavior.is_waiting_on_result(message['id']):
|
if message["id"] == self._waiting_on_screenshot_msg_id:
|
||||||
|
if self.on_screenshot:
|
||||||
|
self.on_screenshot(base64.b64decode(message["result"]["data"]))
|
||||||
|
self._waiting_on_screenshot_msg_id = None
|
||||||
|
|
||||||
|
self.logger.info("got screenshot, moving on to starting behaviors url={}".format(self.url))
|
||||||
|
self._behavior = Behavior(self.url, self)
|
||||||
|
self._behavior.start()
|
||||||
|
elif self._behavior and self._behavior.is_waiting_on_result(message["id"]):
|
||||||
self._behavior.notify_of_result(message)
|
self._behavior.notify_of_result(message)
|
||||||
# elif "method" in message and message["method"] in ("Network.dataReceived", "Network.responseReceived", "Network.loadingFinished"):
|
# elif "method" in message and message["method"] in ("Network.dataReceived", "Network.responseReceived", "Network.loadingFinished"):
|
||||||
# pass
|
# pass
|
||||||
@ -212,7 +223,6 @@ class Browser:
|
|||||||
# else:
|
# else:
|
||||||
# self.logger.debug("[no-method] {}".format(message))
|
# self.logger.debug("[no-method] {}".format(message))
|
||||||
|
|
||||||
|
|
||||||
class Chrome:
|
class Chrome:
|
||||||
logger = logging.getLogger(__module__ + "." + __qualname__)
|
logger = logging.getLogger(__module__ + "." + __qualname__)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user