Convert from one big json file, to js files with a regex as a comment at the top.

This commit is contained in:
Eldon 2014-03-05 23:19:09 -05:00
parent 4c22891093
commit f2f78d2ced
5 changed files with 90 additions and 28 deletions

View File

@ -0,0 +1,52 @@
//.*facebook.com.*
var isOnScreen = function(e) {
var eTop = e.getBoundingClientRect().top;
return eTop > window.scrollY && eTop < window.scrollY + window.innerHeight;
};
var THINGS_TO_CLICK_SELECTOR = 'a.UFIPagerLink > span, a.UFIPagerLink, a[href^="/browse/likes"], span.UFIReplySocialSentenceLinkText';
var alreadyClicked = {};
var intervalId;
var intervalFunc = function() {
var closeButton = document.querySelector('a[title="Close"]');
if (closeButton) {
console.log("clicking close button " + closeButton);
closeButton.click();
return;
}
var thingsToClick = document.querySelectorAll(THINGS_TO_CLICK_SELECTOR);
var clickedSomething = false;
var somethingLeftToClick = false;
for (var i = 0; i < thingsToClick.length; i++) {
var target = thingsToClick[i];
if (!(target in alreadyClicked)) {
if (isOnScreen(target)) {
// var pos = target.getBoundingClientRect().top;
// window.scrollTo(0, target.getBoundingClientRect().top - 100);
console.log("clicking at " + target.getBoundingClientRect().top + " on " + target);
target.click();
target.style.border = '1px solid #0a0';
alreadyClicked[target] = true;
clickedSomething = true;
break;
} else {
somethingLeftToClick = true;
}
}
}
if (!clickedSomething) {
if (somethingLeftToClick) {
console.log("scrolling because everything on this screen has been clicked but there's more below document.body.clientHeight=" + document.body.clientHeight);
window.scrollBy(0, 100);
} else if (window.scrollY + window.innerHeight + 10 < document.body.clientHeight) {
console.log("scrolling because we're not to the bottom yet document.body.clientHeight=" + document.body.clientHeight);
window.scrollBy(0, 100);
}
}
}
var intervalId = setInterval(intervalFunc, 200);

View File

@ -0,0 +1,16 @@
//.*flickr.com.*
setInterval(function() { window.scrollBy(0,50); }, 100);
setTimeout(function() {
a = document.evaluate("//a[contains(@class, 'sn-ico-slideshow')]", document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null );
f = a.iterateNext();
f.click();},
5000);
setTimeout(function() {
a = document.evaluate("//a[contains(@data-track, 'photo-click')]", document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null );
setInterval(function() {
f = a.iterateNext();
f.click();
}, 5000);
}, 5000);

View File

@ -1,14 +0,0 @@
[
{
"scripts": [
"setInterval(function() { window.scrollBy(0,500); }, 150);"
],
"site": ".*facebook.com.*"
},
{
"scripts": [
"setInterval(function() { window.scrollBy(0,50); }, 50);"
],
"site": ".*flickr.com.*"
}
]

View File

@ -2,17 +2,25 @@
from json import dumps, load
from time import sleep
from itertools import chain
import os, re
import logging
behaviors_file = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ['behaviors.json'])
behaviors_directory = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ['behaviors.d'])
behavior_files = chain(*[[dir + os.path.sep + file for file in files] for dir, dirs, files in os.walk(behaviors_directory)])
behaviors = []
for file_name in behavior_files:
lines = open(file_name).readlines()
pattern, script = lines[0][2:].strip(), ''.join(lines[1:])
behaviors.append({'site' : pattern, 'script': script})
print(behaviors)
def execute(url, websock, command_id):
logger = logging.getLogger('behaviors')
with open(behaviors_file) as js:
behaviors = load(js)
for behavior in behaviors:
if re.match(behavior['site'], url):
for script in behavior['scripts']:
msg = dumps(dict(method="Runtime.evaluate", params={"expression": script}, id=next(command_id)))
logger.debug('sending message to {}: {}'.format(websock, msg))
websock.send(msg)
print(behaviors)
for behavior in behaviors:
print("Comparing %s and %s" %(behavior['site'], url))
if re.match(behavior['site'], url):
msg = dumps(dict(method="Runtime.evaluate", params={"expression": behavior['script']}, id=next(command_id)))
logger.debug('sending message to {}: {}'.format(websock, msg))
websock.send(msg)

View File

@ -13,6 +13,7 @@ import subprocess
import signal
from kombu import Connection, Exchange, Queue
import tempfile
from umbra import behaviors
class UmbraWorker:
logger = logging.getLogger('umbra.UmbraWorker')
@ -27,6 +28,7 @@ class UmbraWorker:
self.client_id = client_id
self.page_done = threading.Event()
self.idle_timer = None
self.hard_stop_timer = None
def browse_page(self, url, url_metadata):
with self.lock:
@ -50,7 +52,9 @@ class UmbraWorker:
def _reset_idle_timer(self):
if self.idle_timer:
self.idle_timer.cancel()
self.idle_timer = threading.Timer(60, self.page_done.set)
self.idle_timer = threading.Timer(10, self.page_done.set)
if not self.hard_stop_timer: #10 minutes is as long as we should give 1 page
self.hard_stop_timer = threading.Timer(600, self.page_done.set)
self.idle_timer.start()
def visit_page(self, websock):
@ -84,7 +88,6 @@ class UmbraWorker:
self.send_request_to_amqp(message)
elif "method" in message.keys() and message["method"] == "Page.loadEventFired":
self.logger.debug("got Page.loadEventFired, starting behaviors for {}".format(self.url))
from umbra import behaviors
behaviors.execute(self.url, websock, self.command_id)
class Umbra:
@ -196,9 +199,6 @@ class Chrome:
self.chrome_process.wait()
def main():
# logging.basicConfig(stream=sys.stdout, level=logging.INFO,
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG,
format='%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s')
arg_parser = argparse.ArgumentParser(prog=os.path.basename(sys.argv[0]),
description='umbra - Browser automation tool',