Convert from one big json file, to js files with a regex as a comment at the top.

This commit is contained in:
Eldon 2014-03-05 23:19:09 -05:00
parent 4c22891093
commit f2f78d2ced
5 changed files with 90 additions and 28 deletions

View File

@ -0,0 +1,52 @@
//.*facebook.com.*
var isOnScreen = function(e) {
var eTop = e.getBoundingClientRect().top;
return eTop > window.scrollY && eTop < window.scrollY + window.innerHeight;
};
var THINGS_TO_CLICK_SELECTOR = 'a.UFIPagerLink > span, a.UFIPagerLink, a[href^="/browse/likes"], span.UFIReplySocialSentenceLinkText';
var alreadyClicked = {};
var intervalId;
var intervalFunc = function() {
var closeButton = document.querySelector('a[title="Close"]');
if (closeButton) {
console.log("clicking close button " + closeButton);
closeButton.click();
return;
}
var thingsToClick = document.querySelectorAll(THINGS_TO_CLICK_SELECTOR);
var clickedSomething = false;
var somethingLeftToClick = false;
for (var i = 0; i < thingsToClick.length; i++) {
var target = thingsToClick[i];
if (!(target in alreadyClicked)) {
if (isOnScreen(target)) {
// var pos = target.getBoundingClientRect().top;
// window.scrollTo(0, target.getBoundingClientRect().top - 100);
console.log("clicking at " + target.getBoundingClientRect().top + " on " + target);
target.click();
target.style.border = '1px solid #0a0';
alreadyClicked[target] = true;
clickedSomething = true;
break;
} else {
somethingLeftToClick = true;
}
}
}
if (!clickedSomething) {
if (somethingLeftToClick) {
console.log("scrolling because everything on this screen has been clicked but there's more below document.body.clientHeight=" + document.body.clientHeight);
window.scrollBy(0, 100);
} else if (window.scrollY + window.innerHeight + 10 < document.body.clientHeight) {
console.log("scrolling because we're not to the bottom yet document.body.clientHeight=" + document.body.clientHeight);
window.scrollBy(0, 100);
}
}
}
var intervalId = setInterval(intervalFunc, 200);

View File

@ -0,0 +1,16 @@
//.*flickr.com.*
setInterval(function() { window.scrollBy(0,50); }, 100);
setTimeout(function() {
a = document.evaluate("//a[contains(@class, 'sn-ico-slideshow')]", document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null );
f = a.iterateNext();
f.click();},
5000);
setTimeout(function() {
a = document.evaluate("//a[contains(@data-track, 'photo-click')]", document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null );
setInterval(function() {
f = a.iterateNext();
f.click();
}, 5000);
}, 5000);

View File

@ -1,14 +0,0 @@
[
{
"scripts": [
"setInterval(function() { window.scrollBy(0,500); }, 150);"
],
"site": ".*facebook.com.*"
},
{
"scripts": [
"setInterval(function() { window.scrollBy(0,50); }, 50);"
],
"site": ".*flickr.com.*"
}
]

View File

@ -2,17 +2,25 @@
from json import dumps, load from json import dumps, load
from time import sleep from time import sleep
from itertools import chain
import os, re import os, re
import logging import logging
behaviors_file = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ['behaviors.json']) behaviors_directory = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ['behaviors.d'])
behavior_files = chain(*[[dir + os.path.sep + file for file in files] for dir, dirs, files in os.walk(behaviors_directory)])
behaviors = []
for file_name in behavior_files:
lines = open(file_name).readlines()
pattern, script = lines[0][2:].strip(), ''.join(lines[1:])
behaviors.append({'site' : pattern, 'script': script})
print(behaviors)
def execute(url, websock, command_id): def execute(url, websock, command_id):
logger = logging.getLogger('behaviors') logger = logging.getLogger('behaviors')
with open(behaviors_file) as js: print(behaviors)
behaviors = load(js) for behavior in behaviors:
for behavior in behaviors: print("Comparing %s and %s" %(behavior['site'], url))
if re.match(behavior['site'], url): if re.match(behavior['site'], url):
for script in behavior['scripts']: msg = dumps(dict(method="Runtime.evaluate", params={"expression": behavior['script']}, id=next(command_id)))
msg = dumps(dict(method="Runtime.evaluate", params={"expression": script}, id=next(command_id))) logger.debug('sending message to {}: {}'.format(websock, msg))
logger.debug('sending message to {}: {}'.format(websock, msg)) websock.send(msg)
websock.send(msg)

View File

@ -13,6 +13,7 @@ import subprocess
import signal import signal
from kombu import Connection, Exchange, Queue from kombu import Connection, Exchange, Queue
import tempfile import tempfile
from umbra import behaviors
class UmbraWorker: class UmbraWorker:
logger = logging.getLogger('umbra.UmbraWorker') logger = logging.getLogger('umbra.UmbraWorker')
@ -27,6 +28,7 @@ class UmbraWorker:
self.client_id = client_id self.client_id = client_id
self.page_done = threading.Event() self.page_done = threading.Event()
self.idle_timer = None self.idle_timer = None
self.hard_stop_timer = None
def browse_page(self, url, url_metadata): def browse_page(self, url, url_metadata):
with self.lock: with self.lock:
@ -50,7 +52,9 @@ class UmbraWorker:
def _reset_idle_timer(self): def _reset_idle_timer(self):
if self.idle_timer: if self.idle_timer:
self.idle_timer.cancel() self.idle_timer.cancel()
self.idle_timer = threading.Timer(60, self.page_done.set) self.idle_timer = threading.Timer(10, self.page_done.set)
if not self.hard_stop_timer: #10 minutes is as long as we should give 1 page
self.hard_stop_timer = threading.Timer(600, self.page_done.set)
self.idle_timer.start() self.idle_timer.start()
def visit_page(self, websock): def visit_page(self, websock):
@ -84,7 +88,6 @@ class UmbraWorker:
self.send_request_to_amqp(message) self.send_request_to_amqp(message)
elif "method" in message.keys() and message["method"] == "Page.loadEventFired": elif "method" in message.keys() and message["method"] == "Page.loadEventFired":
self.logger.debug("got Page.loadEventFired, starting behaviors for {}".format(self.url)) self.logger.debug("got Page.loadEventFired, starting behaviors for {}".format(self.url))
from umbra import behaviors
behaviors.execute(self.url, websock, self.command_id) behaviors.execute(self.url, websock, self.command_id)
class Umbra: class Umbra:
@ -196,9 +199,6 @@ class Chrome:
self.chrome_process.wait() self.chrome_process.wait()
def main(): def main():
# logging.basicConfig(stream=sys.stdout, level=logging.INFO,
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG,
format='%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s')
arg_parser = argparse.ArgumentParser(prog=os.path.basename(sys.argv[0]), arg_parser = argparse.ArgumentParser(prog=os.path.basename(sys.argv[0]),
description='umbra - Browser automation tool', description='umbra - Browser automation tool',