From f2f78d2ced740a5f63cd8083bba7958610142ef6 Mon Sep 17 00:00:00 2001 From: Eldon Date: Wed, 5 Mar 2014 23:19:09 -0500 Subject: [PATCH] Convert from one big json file, to js files with a regex as a comment at the top. --- umbra/behaviors.d/facebook.js | 52 +++++++++++++++++++++++++++++++++++ umbra/behaviors.d/flickr.js | 16 +++++++++++ umbra/behaviors.json | 14 ---------- umbra/behaviors.py | 26 ++++++++++++------ umbra/umbra.py | 10 +++---- 5 files changed, 90 insertions(+), 28 deletions(-) create mode 100644 umbra/behaviors.d/facebook.js create mode 100644 umbra/behaviors.d/flickr.js delete mode 100644 umbra/behaviors.json diff --git a/umbra/behaviors.d/facebook.js b/umbra/behaviors.d/facebook.js new file mode 100644 index 0000000..8eab7db --- /dev/null +++ b/umbra/behaviors.d/facebook.js @@ -0,0 +1,52 @@ +//.*facebook.com.* +var isOnScreen = function(e) { + var eTop = e.getBoundingClientRect().top; + return eTop > window.scrollY && eTop < window.scrollY + window.innerHeight; +}; + +var THINGS_TO_CLICK_SELECTOR = 'a.UFIPagerLink > span, a.UFIPagerLink, a[href^="/browse/likes"], span.UFIReplySocialSentenceLinkText'; +var alreadyClicked = {}; +var intervalId; + +var intervalFunc = function() { + var closeButton = document.querySelector('a[title="Close"]'); + if (closeButton) { + console.log("clicking close button " + closeButton); + closeButton.click(); + return; + } + + var thingsToClick = document.querySelectorAll(THINGS_TO_CLICK_SELECTOR); + var clickedSomething = false; + var somethingLeftToClick = false; + + for (var i = 0; i < thingsToClick.length; i++) { + var target = thingsToClick[i]; + if (!(target in alreadyClicked)) { + if (isOnScreen(target)) { + // var pos = target.getBoundingClientRect().top; + // window.scrollTo(0, target.getBoundingClientRect().top - 100); + console.log("clicking at " + target.getBoundingClientRect().top + " on " + target); + target.click(); + target.style.border = '1px solid #0a0'; + alreadyClicked[target] = true; + clickedSomething = true; + break; + } else { + somethingLeftToClick = true; + } + } + } + + if (!clickedSomething) { + if (somethingLeftToClick) { + console.log("scrolling because everything on this screen has been clicked but there's more below document.body.clientHeight=" + document.body.clientHeight); + window.scrollBy(0, 100); + } else if (window.scrollY + window.innerHeight + 10 < document.body.clientHeight) { + console.log("scrolling because we're not to the bottom yet document.body.clientHeight=" + document.body.clientHeight); + window.scrollBy(0, 100); + } + } +} + +var intervalId = setInterval(intervalFunc, 200); diff --git a/umbra/behaviors.d/flickr.js b/umbra/behaviors.d/flickr.js new file mode 100644 index 0000000..8cddfd6 --- /dev/null +++ b/umbra/behaviors.d/flickr.js @@ -0,0 +1,16 @@ +//.*flickr.com.* +setInterval(function() { window.scrollBy(0,50); }, 100); + +setTimeout(function() { + a = document.evaluate("//a[contains(@class, 'sn-ico-slideshow')]", document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null ); + f = a.iterateNext(); + f.click();}, +5000); + +setTimeout(function() { + a = document.evaluate("//a[contains(@data-track, 'photo-click')]", document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null ); + setInterval(function() { + f = a.iterateNext(); + f.click(); + }, 5000); +}, 5000); diff --git a/umbra/behaviors.json b/umbra/behaviors.json deleted file mode 100644 index 7774ec9..0000000 --- a/umbra/behaviors.json +++ /dev/null @@ -1,14 +0,0 @@ -[ - { - "scripts": [ - "setInterval(function() { window.scrollBy(0,500); }, 150);" - ], - "site": ".*facebook.com.*" - }, - { - "scripts": [ - "setInterval(function() { window.scrollBy(0,50); }, 50);" - ], - "site": ".*flickr.com.*" - } -] diff --git a/umbra/behaviors.py b/umbra/behaviors.py index c92b2f2..e8d4bb7 100644 --- a/umbra/behaviors.py +++ b/umbra/behaviors.py @@ -2,17 +2,25 @@ from json import dumps, load from time import sleep +from itertools import chain import os, re import logging -behaviors_file = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ['behaviors.json']) +behaviors_directory = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ['behaviors.d']) +behavior_files = chain(*[[dir + os.path.sep + file for file in files] for dir, dirs, files in os.walk(behaviors_directory)]) +behaviors = [] +for file_name in behavior_files: + lines = open(file_name).readlines() + pattern, script = lines[0][2:].strip(), ''.join(lines[1:]) + behaviors.append({'site' : pattern, 'script': script}) + +print(behaviors) def execute(url, websock, command_id): logger = logging.getLogger('behaviors') - with open(behaviors_file) as js: - behaviors = load(js) - for behavior in behaviors: - if re.match(behavior['site'], url): - for script in behavior['scripts']: - msg = dumps(dict(method="Runtime.evaluate", params={"expression": script}, id=next(command_id))) - logger.debug('sending message to {}: {}'.format(websock, msg)) - websock.send(msg) + print(behaviors) + for behavior in behaviors: + print("Comparing %s and %s" %(behavior['site'], url)) + if re.match(behavior['site'], url): + msg = dumps(dict(method="Runtime.evaluate", params={"expression": behavior['script']}, id=next(command_id))) + logger.debug('sending message to {}: {}'.format(websock, msg)) + websock.send(msg) diff --git a/umbra/umbra.py b/umbra/umbra.py index babc9cd..a094834 100755 --- a/umbra/umbra.py +++ b/umbra/umbra.py @@ -13,6 +13,7 @@ import subprocess import signal from kombu import Connection, Exchange, Queue import tempfile +from umbra import behaviors class UmbraWorker: logger = logging.getLogger('umbra.UmbraWorker') @@ -27,6 +28,7 @@ class UmbraWorker: self.client_id = client_id self.page_done = threading.Event() self.idle_timer = None + self.hard_stop_timer = None def browse_page(self, url, url_metadata): with self.lock: @@ -50,7 +52,9 @@ class UmbraWorker: def _reset_idle_timer(self): if self.idle_timer: self.idle_timer.cancel() - self.idle_timer = threading.Timer(60, self.page_done.set) + self.idle_timer = threading.Timer(10, self.page_done.set) + if not self.hard_stop_timer: #10 minutes is as long as we should give 1 page + self.hard_stop_timer = threading.Timer(600, self.page_done.set) self.idle_timer.start() def visit_page(self, websock): @@ -84,7 +88,6 @@ class UmbraWorker: self.send_request_to_amqp(message) elif "method" in message.keys() and message["method"] == "Page.loadEventFired": self.logger.debug("got Page.loadEventFired, starting behaviors for {}".format(self.url)) - from umbra import behaviors behaviors.execute(self.url, websock, self.command_id) class Umbra: @@ -196,9 +199,6 @@ class Chrome: self.chrome_process.wait() def main(): - # logging.basicConfig(stream=sys.stdout, level=logging.INFO, - logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, - format='%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s') arg_parser = argparse.ArgumentParser(prog=os.path.basename(sys.argv[0]), description='umbra - Browser automation tool',