diff --git a/umbra/behaviors.d/facebook.js b/umbra/behaviors.d/facebook.js index 12e57d0..385af7f 100644 --- a/umbra/behaviors.d/facebook.js +++ b/umbra/behaviors.d/facebook.js @@ -1,4 +1,5 @@ -//.*facebook.com.* +//^https?://(?:www\.)?facebook.com/.*$ + var isOnScreen = function(e) { var eTop = e.getBoundingClientRect().top; return eTop > window.scrollY && eTop < window.scrollY + window.innerHeight; diff --git a/umbra/behaviors.d/flickr.js b/umbra/behaviors.d/flickr.js index 8cddfd6..c5b6707 100644 --- a/umbra/behaviors.d/flickr.js +++ b/umbra/behaviors.d/flickr.js @@ -1,4 +1,5 @@ -//.*flickr.com.* +//^https?://(?:www\.)?flickr.com/.*$ + setInterval(function() { window.scrollBy(0,50); }, 100); setTimeout(function() { diff --git a/umbra/behaviors.d/vimeo.js b/umbra/behaviors.d/vimeo.js new file mode 100644 index 0000000..dc779c8 --- /dev/null +++ b/umbra/behaviors.d/vimeo.js @@ -0,0 +1,7 @@ +//^https?://(?:www\.)?vimeo.com/.*$ + +var videoElements = document.getElementsByTagName('video'); +for (var i = 0; i < videoElements.length; i++) { + videoElements[i].play(); +} + diff --git a/umbra/behaviors.py b/umbra/behaviors.py index e8d4bb7..4518f7f 100644 --- a/umbra/behaviors.py +++ b/umbra/behaviors.py @@ -6,21 +6,20 @@ from itertools import chain import os, re import logging +logger = logging.getLogger('behaviors') behaviors_directory = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ['behaviors.d']) -behavior_files = chain(*[[dir + os.path.sep + file for file in files] for dir, dirs, files in os.walk(behaviors_directory)]) +behavior_files = chain(*[[os.path.join(dir, file) for file in files if file.endswith('.js')] for dir, dirs, files in os.walk(behaviors_directory)]) behaviors = [] for file_name in behavior_files: + logger.debug("reading behavior file {}".format(file_name)) lines = open(file_name).readlines() pattern, script = lines[0][2:].strip(), ''.join(lines[1:]) - behaviors.append({'site' : pattern, 'script': script}) + behaviors.append({'url_regex': pattern, 'script': script, 'file': file_name}) + logger.info("will run behaviors from {} to urls matching {}".format(file_name, pattern)) -print(behaviors) def execute(url, websock, command_id): - logger = logging.getLogger('behaviors') - print(behaviors) for behavior in behaviors: - print("Comparing %s and %s" %(behavior['site'], url)) - if re.match(behavior['site'], url): + if re.match(behavior['url_regex'], url): msg = dumps(dict(method="Runtime.evaluate", params={"expression": behavior['script']}, id=next(command_id))) logger.debug('sending message to {}: {}'.format(websock, msg)) websock.send(msg) diff --git a/umbra/umbra.py b/umbra/umbra.py index a094834..0fdc168 100755 --- a/umbra/umbra.py +++ b/umbra/umbra.py @@ -1,13 +1,18 @@ #!/usr/bin/env python # vim: set sw=4 et: +import logging +import os, sys, argparse +# logging.basicConfig(stream=sys.stdout, level=logging.INFO, +logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, + format='%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s') + from json import dumps, loads +import urllib.request, urllib.error, urllib.parse from itertools import count -import os,sys,argparse, urllib.request, urllib.error, urllib.parse import websocket import time import uuid -import logging import threading import subprocess import signal @@ -53,9 +58,10 @@ class UmbraWorker: if self.idle_timer: self.idle_timer.cancel() self.idle_timer = threading.Timer(10, self.page_done.set) + self.idle_timer.start() if not self.hard_stop_timer: #10 minutes is as long as we should give 1 page self.hard_stop_timer = threading.Timer(600, self.page_done.set) - self.idle_timer.start() + self.hard_stop_timer.start() def visit_page(self, websock): msg = dumps(dict(method="Network.enable", id=next(self.command_id))) @@ -81,10 +87,10 @@ class UmbraWorker: routing_key=self.client_id) def handle_message(self, websock, message): - self.logger.debug("handling message from websocket {} - {}".format(websock, message[:95])) - self._reset_idle_timer() + # self.logger.debug("handling message from websocket {} - {}".format(websock, message[:95])) message = loads(message) if "method" in message.keys() and message["method"] == "Network.requestWillBeSent": + self._reset_idle_timer() self.send_request_to_amqp(message) elif "method" in message.keys() and message["method"] == "Page.loadEventFired": self.logger.debug("got Page.loadEventFired, starting behaviors for {}".format(self.url)) @@ -199,7 +205,6 @@ class Chrome: self.chrome_process.wait() def main(): - arg_parser = argparse.ArgumentParser(prog=os.path.basename(sys.argv[0]), description='umbra - Browser automation tool', formatter_class=argparse.ArgumentDefaultsHelpFormatter)