UmbraWorker.send_to_chrome() - central place to send message to chrome via websocket

This commit is contained in:
Noah Levitt 2014-05-05 12:26:39 -07:00
parent a62a07e6b7
commit 55fad80553
2 changed files with 43 additions and 60 deletions

View File

@ -1,7 +1,7 @@
# vim: set sw=4 et: # vim: set sw=4 et:
import json import json
from itertools import chain import itertools
import os import os
import re import re
import logging import logging
@ -18,7 +18,7 @@ class Behavior:
def behaviors(): def behaviors():
if Behavior._behaviors is None: if Behavior._behaviors is None:
behaviors_directory = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ['behaviors.d']) behaviors_directory = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ['behaviors.d'])
behavior_files = chain(*[[os.path.join(dir, file) for file in files if file.endswith('.js') and file != 'default.js'] for dir, dirs, files in os.walk(behaviors_directory)]) behavior_files = itertools.chain(*[[os.path.join(dir, file) for file in files if file.endswith('.js') and file != 'default.js'] for dir, dirs, files in os.walk(behaviors_directory)])
Behavior._behaviors = [] Behavior._behaviors = []
for file_name in behavior_files: for file_name in behavior_files:
Behavior.logger.debug("reading behavior file {}".format(file_name)) Behavior.logger.debug("reading behavior file {}".format(file_name))
@ -46,10 +46,9 @@ class Behavior:
Behavior._default_behavior = behavior Behavior._default_behavior = behavior
return Behavior._default_behavior return Behavior._default_behavior
def __init__(self, url, websock, command_id): def __init__(self, url, umbra_worker):
self.url = url self.url = url
self.websock = websock self.umbra_worker = umbra_worker
self.command_id = command_id
self.script_finished = False self.script_finished = False
self.waiting_result_msg_ids = [] self.waiting_result_msg_ids = []
@ -65,18 +64,12 @@ class Behavior:
if self.active_behavior is None: if self.active_behavior is None:
self.active_behavior = Behavior.default_behavior() self.active_behavior = Behavior.default_behavior()
msg = json.dumps(dict(method="Runtime.evaluate", params={"expression": self.active_behavior['script']}, id=next(self.command_id))) self.umbra_worker.send_to_chrome(method="Runtime.evaluate", params={"expression": self.active_behavior['script']})
self.logger.debug('sending message to {}: {}'.format(self.websock, msg))
self.websock.send(msg)
self.notify_of_activity() self.notify_of_activity()
def is_finished(self): def is_finished(self):
msg_id = next(self.command_id) msg_id = self.umbra_worker.send_to_chrome(method="Runtime.evaluate", params={"expression": "umbraBehaviorFinished()"})
self.waiting_result_msg_ids.append(msg_id) self.waiting_result_msg_ids.append(msg_id)
msg = json.dumps(dict(method="Runtime.evaluate", params={"expression": "umbraBehaviorFinished()"}, id=msg_id))
self.logger.debug('sending message to {}: {}'.format(self.websock, msg))
self.websock.send(msg)
request_idle_timeout_sec = 30 request_idle_timeout_sec = 30
if self.active_behavior and 'request_idle_timeout_sec' in self.active_behavior: if self.active_behavior and 'request_idle_timeout_sec' in self.active_behavior:

View File

@ -2,13 +2,15 @@
# vim: set sw=4 et: # vim: set sw=4 et:
import logging import logging
import os, sys, argparse import sys
# logging.basicConfig(stream=sys.stdout, level=logging.INFO, # logging.basicConfig(stream=sys.stdout, level=logging.INFO,
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, logging.basicConfig(stream=sys.stdout, level=logging.DEBUG,
format='%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s') format='%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s')
from json import dumps, loads import os
import argparse
import json
import urllib.request, urllib.error, urllib.parse import urllib.request, urllib.error, urllib.parse
import itertools import itertools
import websocket import websocket
@ -17,7 +19,7 @@ import uuid
import threading import threading
import subprocess import subprocess
import signal import signal
from kombu import Connection, Exchange, Queue import kombu
import tempfile import tempfile
from umbra.behaviors import Behavior from umbra.behaviors import Behavior
@ -39,6 +41,7 @@ class UmbraWorker:
self.chrome_wait = chrome_wait self.chrome_wait = chrome_wait
self.client_id = client_id self.client_id = client_id
self._behavior = None self._behavior = None
self.websock = None
def browse_page(self, url, url_metadata): def browse_page(self, url, url_metadata):
"""Synchronously browse a page and run behaviors.""" """Synchronously browse a page and run behaviors."""
@ -47,17 +50,17 @@ class UmbraWorker:
self.url_metadata = url_metadata self.url_metadata = url_metadata
with tempfile.TemporaryDirectory() as user_data_dir: with tempfile.TemporaryDirectory() as user_data_dir:
with Chrome(self.chrome_port, self.chrome_exe, self.chrome_wait, user_data_dir) as websocket_url: with Chrome(self.chrome_port, self.chrome_exe, self.chrome_wait, user_data_dir) as websocket_url:
websock = websocket.WebSocketApp(websocket_url, self.websock = websocket.WebSocketApp(websocket_url,
on_open=self._visit_page, on_open=self._visit_page,
on_message=self._handle_message) on_message=self._handle_message)
websock_thread = threading.Thread(target=websock.run_forever, kwargs={'ping_timeout':0.5}) websock_thread = threading.Thread(target=self.websock.run_forever, kwargs={'ping_timeout':0.5})
websock_thread.start() websock_thread.start()
start = time.time() start = time.time()
while True: while True:
time.sleep(0.5) time.sleep(0.5)
if not websock or not websock.sock or not websock.sock.connected: if not self.websock or not self.websock.sock or not self.websock.sock.connected:
self.logger.error("websocket closed, did chrome die??? {}".format(websock)) self.logger.error("websocket closed, did chrome die??? {}".format(self.websock))
break break
elif time.time() - start > UmbraWorker.HARD_TIMEOUT_SECONDS: elif time.time() - start > UmbraWorker.HARD_TIMEOUT_SECONDS:
self.logger.info("finished browsing page, reached hard timeout of {} seconds url={}".format(UmbraWorker.HARD_TIMEOUT_SECONDS, self.url)) self.logger.info("finished browsing page, reached hard timeout of {} seconds url={}".format(UmbraWorker.HARD_TIMEOUT_SECONDS, self.url))
@ -67,41 +70,32 @@ class UmbraWorker:
break break
try: try:
websock.close() self.websock.close()
except BaseException as e: except BaseException as e:
self.logger.error("exception closing websocket {} - {}".format(websock, e)) self.logger.error("exception closing websocket {} - {}".format(self.websock, e))
websock_thread.join() websock_thread.join()
def send_to_chrome(self, **kwargs):
msg_id = next(self.command_id)
kwargs['id'] = msg_id
msg = json.dumps(kwargs)
self.logger.debug('sending message to {}: {}'.format(self.websock, msg))
self.websock.send(msg)
return msg_id
def _visit_page(self, websock): def _visit_page(self, websock):
msg = dumps(dict(method="Network.enable", id=next(self.command_id))) self.send_to_chrome(method="Network.enable")
self.logger.debug('sending message to {}: {}'.format(websock, msg)) self.send_to_chrome(method="Page.enable")
websock.send(msg) self.send_to_chrome(method="Console.enable")
self.send_to_chrome(method="Debugger.enable")
msg = dumps(dict(method="Page.enable", id=next(self.command_id))) self.send_to_chrome(method="Runtime.enable")
self.logger.debug('sending message to {}: {}'.format(websock, msg))
websock.send(msg)
msg = dumps(dict(method="Console.enable", id=next(self.command_id)))
self.logger.debug('sending message to {}: {}'.format(websock, msg))
websock.send(msg)
msg = dumps(dict(method="Debugger.enable", id=next(self.command_id)))
self.logger.debug('sending message to {}: {}'.format(websock, msg))
websock.send(msg)
msg = dumps(dict(method="Runtime.enable", id=next(self.command_id)))
self.logger.debug('sending message to {}: {}'.format(websock, msg))
websock.send(msg)
# disable google analytics, see _handle_message() where breakpoint is caught "Debugger.paused" # disable google analytics, see _handle_message() where breakpoint is caught "Debugger.paused"
msg = dumps(dict(method="Debugger.setBreakpointByUrl", id=next(self.command_id), params={"lineNumber": 1, "urlRegex":"https?://www.google-analytics.com/analytics.js"})) self.send_to_chrome(method="Debugger.setBreakpointByUrl", params={"lineNumber": 1, "urlRegex":"https?://www.google-analytics.com/analytics.js"})
self.logger.debug('sending message to {}: {}'.format(websock, msg))
websock.send(msg)
msg = dumps(dict(method="Page.navigate", id=next(self.command_id), params={"url": self.url})) # navigate to the page!
self.logger.debug('sending message to {}: {}'.format(websock, msg)) self.send_to_chrome(method="Page.navigate", params={"url": self.url})
websock.send(msg)
# XXX should this class know anything about amqp? or should it # XXX should this class know anything about amqp? or should it
# delegate this back up to the Umbra class? # delegate this back up to the Umbra class?
@ -118,7 +112,7 @@ class UmbraWorker:
def _handle_message(self, websock, message): def _handle_message(self, websock, message):
# self.logger.debug("message from {} - {}".format(websock.url, message[:95])) # self.logger.debug("message from {} - {}".format(websock.url, message[:95]))
# self.logger.debug("message from {} - {}".format(websock.url, message)) # self.logger.debug("message from {} - {}".format(websock.url, message))
message = loads(message) message = json.loads(message)
if "method" in message and message["method"] == "Network.requestWillBeSent": if "method" in message and message["method"] == "Network.requestWillBeSent":
if self._behavior: if self._behavior:
self._behavior.notify_of_activity() self._behavior.notify_of_activity()
@ -129,7 +123,7 @@ class UmbraWorker:
elif "method" in message and message["method"] == "Page.loadEventFired": elif "method" in message and message["method"] == "Page.loadEventFired":
if self._behavior is None: if self._behavior is None:
self.logger.info("Page.loadEventFired, starting behaviors url={} message={}".format(self.url, message)) self.logger.info("Page.loadEventFired, starting behaviors url={} message={}".format(self.url, message))
self._behavior = Behavior(self.url, websock, self.command_id) self._behavior = Behavior(self.url, self)
self._behavior.start() self._behavior.start()
else: else:
self.logger.warn("Page.loadEventFired but behaviors already running url={} message={}".format(self.url, message)) self.logger.warn("Page.loadEventFired but behaviors already running url={} message={}".format(self.url, message))
@ -145,14 +139,10 @@ class UmbraWorker:
scriptId = message['params']['callFrames'][0]['location']['scriptId'] scriptId = message['params']['callFrames'][0]['location']['scriptId']
# replace script # replace script
msg = dumps(dict(method="Debugger.setScriptSource", id=next(self.command_id), params={"scriptId": scriptId, "scriptSource":"console.log('google analytics is no more!');"})) self.send_to_chrome(method="Debugger.setScriptSource", params={"scriptId": scriptId, "scriptSource":"console.log('google analytics is no more!');"})
self.logger.debug('sending message to {}: {}'.format(websock, msg))
websock.send(msg)
# resume execution # resume execution
msg = dumps(dict(method="Debugger.resume", id=next(self.command_id))) self.send_to_chrome(method="Debugger.resume")
self.logger.debug('sending message to {}: {}'.format(websock, msg))
websock.send(msg)
elif "result" in message: elif "result" in message:
if self._behavior and self._behavior.is_waiting_on_result(message['id']): if self._behavior and self._behavior.is_waiting_on_result(message['id']):
self._behavior.notify_of_result(message) self._behavior.notify_of_result(message)
@ -217,10 +207,10 @@ class Umbra:
def _consume_amqp(self): def _consume_amqp(self):
while not self.amqp_stop.is_set(): while not self.amqp_stop.is_set():
try: try:
self.umbra_exchange = Exchange(name='umbra', type='direct', durable=True) self.umbra_exchange = kombu.Exchange(name='umbra', type='direct', durable=True)
url_queue = Queue('urls', routing_key='url', exchange=self.umbra_exchange) url_queue = kombu.Queue('urls', routing_key='url', exchange=self.umbra_exchange)
self.logger.info("connecting to amqp exchange={} at {}".format(self.umbra_exchange.name, self.amqp_url)) self.logger.info("connecting to amqp exchange={} at {}".format(self.umbra_exchange.name, self.amqp_url))
with Connection(self.amqp_url) as conn: with kombu.Connection(self.amqp_url) as conn:
if self.producer_lock is None: if self.producer_lock is None:
self.producer_lock = threading.Lock() self.producer_lock = threading.Lock()
with self.producer_lock: with self.producer_lock:
@ -273,7 +263,7 @@ class Chrome:
def fetch_debugging_json(): def fetch_debugging_json():
raw_json = urllib.request.urlopen("http://localhost:%s/json" % self.port).read() raw_json = urllib.request.urlopen("http://localhost:%s/json" % self.port).read()
json = raw_json.decode('utf-8') json = raw_json.decode('utf-8')
return loads(json) return json.loads(json)
# returns websocket url to chrome window with about:blank loaded # returns websocket url to chrome window with about:blank loaded
def __enter__(self): def __enter__(self):
@ -294,7 +284,7 @@ class Chrome:
while True: while True:
try: try:
raw_json = urllib.request.urlopen(json_url).read() raw_json = urllib.request.urlopen(json_url).read()
all_debug_info = loads(raw_json.decode('utf-8')) all_debug_info = json.loads(raw_json.decode('utf-8'))
debug_info = [x for x in all_debug_info if x['url'] == 'about:blank'] debug_info = [x for x in all_debug_info if x['url'] == 'about:blank']
if debug_info and 'webSocketDebuggerUrl' in debug_info[0]: if debug_info and 'webSocketDebuggerUrl' in debug_info[0]: