change magic first line of behavior js files to a commented-out json blob, which should include the fields 'url_regex' and 'request_idle_timeout_sec'; behavior.is_finished() incorporates the custom idle timeout into its check; also rename variables in behavior scripts with umbra/UMBRA_ prefix to sort of namespace them; and add "finished" logic to facebook and vimeo behaviors (flickr needs work to support it)

This commit is contained in:
Noah Levitt 2014-05-05 11:58:55 -07:00
parent 2a9633ad77
commit a62a07e6b7
5 changed files with 130 additions and 67 deletions

View file

@ -1,23 +1,13 @@
// {"request_idle_timeout_sec":10}
//
// vim:set sw=8 et: // vim:set sw=8 et:
//
// Scrolls to the bottom of the page. That's it at the moment.
//
// STATES = ['NASCENT', 'NEED_SCROLL', 'WAITING', 'FINISHED']
// var transition = prepareTransition(state);
// if (transition.callback) {
// newState.callback(state, newState);
// }
// state = newState;
// if (state.status === 'NASCENT') {
// } else if (state.status == 'NEED_SCROLL') {
// } else if (state.status == 'FINISHED') {
var UMBRA_FINISH_AFTER_IDLE_TIME = 10 * 1000; // ms
var umbraState = {'idleSince':null}; var umbraState = {'idleSince':null};
var umbraFinished = false; var umbraFinished = false;
var umbraIntervalFunc = function() { var umbraIntervalFunc = function() {
// var needToScroll = (window.scrollY + window.innerHeight + 10 < document.body.clientHeight);
// var needToScroll = (document.documentElement.scrollTop + document.documentElement.clientHeight < document.documentElement.scrollHeight);
var needToScroll = (window.scrollY + window.innerHeight < document.documentElement.scrollHeight); var needToScroll = (window.scrollY + window.innerHeight < document.documentElement.scrollHeight);
// console.log('intervalFunc umbraState.idleSince=' + umbraState.idleSince + ' needToScroll=' + needToScroll + ' window.scrollY=' + window.scrollY + ' window.innerHeight=' + window.innerHeight + ' document.documentElement.scrollHeight=' + document.documentElement.scrollHeight); // console.log('intervalFunc umbraState.idleSince=' + umbraState.idleSince + ' needToScroll=' + needToScroll + ' window.scrollY=' + window.scrollY + ' window.innerHeight=' + window.innerHeight + ' document.documentElement.scrollHeight=' + document.documentElement.scrollHeight);
@ -29,10 +19,15 @@ var umbraIntervalFunc = function() {
} }
} }
// If we haven't had anything to do (scrolled, clicked, etc) in this amount of
// time, then we consider ourselves finished with the page.
var UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC = 10;
// Called from outside of this script.
var umbraBehaviorFinished = function() { var umbraBehaviorFinished = function() {
if (umbraState.idleSince != null) { if (umbraState.idleSince != null) {
var idleTime = Date.now() - umbraState.idleSince; var idleTimeMs = Date.now() - umbraState.idleSince;
if (idleTime > UMBRA_FINISH_AFTER_IDLE_TIME) { if (idleTimeMs / 1000 > UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC) {
return true; return true;
} }
} }

View file

@ -1,7 +1,9 @@
//^https?://(?:www\.)?facebook\.com/.*$ // {"url_regex":"^https?://(?:www\\.)?facebook\\.com/.*$", "request_idle_timeout_sec":30}
//
// vim:set sw=8 et: // vim:set sw=8 et:
//
var aboveBelowOrOnScreen = function(e) { var umbraAboveBelowOrOnScreen = function(e) {
var eTop = e.getBoundingClientRect().top; var eTop = e.getBoundingClientRect().top;
if (eTop < window.scrollY) { if (eTop < window.scrollY) {
return -1; // above return -1; // above
@ -13,11 +15,11 @@ var aboveBelowOrOnScreen = function(e) {
} }
// comments - 'a.UFIPagerLink > span, a.UFIPagerLink, span.UFIReplySocialSentenceLinkText' // comments - 'a.UFIPagerLink > span, a.UFIPagerLink, span.UFIReplySocialSentenceLinkText'
var THINGS_TO_CLICK_SELECTOR = 'a[href^="/browse/likes"], *[rel="theater"]'; var UMBRA_THINGS_TO_CLICK_SELECTOR = 'a[href^="/browse/likes"], *[rel="theater"]';
var alreadyClicked = {}; var umbraAlreadyClicked = {};
var intervalId; var umbraState = {'idleSince':null};
var intervalFunc = function() { var umbraIntervalFunc = function() {
var closeButton = document.querySelector('a[title="Close"]'); var closeButton = document.querySelector('a[title="Close"]');
if (closeButton) { if (closeButton) {
console.log("clicking close button " + closeButton.outerHTML); console.log("clicking close button " + closeButton.outerHTML);
@ -31,15 +33,15 @@ var intervalFunc = function() {
return; return;
} }
var thingsToClick = document.querySelectorAll(THINGS_TO_CLICK_SELECTOR); var thingsToClick = document.querySelectorAll(UMBRA_THINGS_TO_CLICK_SELECTOR);
var clickedSomething = false; var clickedSomething = false;
var somethingLeftBelow = false; var somethingLeftBelow = false;
var missedAbove = 0; var missedAbove = 0;
for (var i = 0; i < thingsToClick.length; i++) { for (var i = 0; i < thingsToClick.length; i++) {
var target = thingsToClick[i]; var target = thingsToClick[i];
if (!(target in alreadyClicked)) { if (!(target in umbraAlreadyClicked)) {
var where = aboveBelowOrOnScreen(target); var where = umbraAboveBelowOrOnScreen(target);
if (where == 0) { // on screen if (where == 0) { // on screen
// var pos = target.getBoundingClientRect().top; // var pos = target.getBoundingClientRect().top;
// window.scrollTo(0, target.getBoundingClientRect().top - 100); // window.scrollTo(0, target.getBoundingClientRect().top - 100);
@ -48,8 +50,9 @@ var intervalFunc = function() {
target.click(); target.click();
} }
target.style.border = '1px solid #0a0'; target.style.border = '1px solid #0a0';
alreadyClicked[target] = true; umbraAlreadyClicked[target] = true;
clickedSomething = true; clickedSomething = true;
umbraState.idleSince = null;
break; break;
} else if (where > 0) { } else if (where > 0) {
somethingLeftBelow = true; somethingLeftBelow = true;
@ -67,11 +70,31 @@ var intervalFunc = function() {
if (somethingLeftBelow) { if (somethingLeftBelow) {
console.log("scrolling because everything on this screen has been clicked but there's more below document.body.clientHeight=" + document.body.clientHeight); console.log("scrolling because everything on this screen has been clicked but there's more below document.body.clientHeight=" + document.body.clientHeight);
window.scrollBy(0, 200); window.scrollBy(0, 200);
umbraState.idleSince = null;
} else if (window.scrollY + window.innerHeight + 10 < document.body.clientHeight) { } else if (window.scrollY + window.innerHeight + 10 < document.body.clientHeight) {
console.log("scrolling because we're not to the bottom yet document.body.clientHeight=" + document.body.clientHeight); console.log("scrolling because we're not to the bottom yet document.body.clientHeight=" + document.body.clientHeight);
window.scrollBy(0, 200); window.scrollBy(0, 200);
} umbraState.idleSince = null;
} else if (umbraState.idleSince == null) {
umbraState.idleSince = Date.now();
}
} }
} }
var intervalId = setInterval(intervalFunc, 200); // If we haven't had anything to do (scrolled, clicked, etc) in this amount of
// time, then we consider ourselves finished with the page.
var UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC = 10;
// Called from outside of this script.
var umbraBehaviorFinished = function() {
if (umbraState.idleSince != null) {
var idleTimeMs = Date.now() - umbraState.idleSince;
if (idleTimeMs / 1000 > UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC) {
return true;
}
}
return false;
}
var umbraIntervalId = setInterval(umbraIntervalFunc, 200);

View file

@ -1,17 +1,20 @@
//^https?://(?:www\.)?flickr\.com/.*$ // {"url_regex":"^https?://(?:www\\.)?flickr\\.com/.*$", "request_idle_timeout_sec":10}
//
// vim:set sw=8 et:
//
setInterval(function() { window.scrollBy(0,50); }, 100); setInterval(function() { window.scrollBy(0,50); }, 100);
setTimeout(function() { setTimeout(function() {
a = document.evaluate("//a[contains(@class, 'sn-ico-slideshow')]", document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null ); a = document.evaluate("//a[contains(@class, 'sn-ico-slideshow')]", document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null );
f = a.iterateNext();
f.click();},
5000);
setTimeout(function() {
a = document.evaluate("//a[contains(@data-track, 'photo-click')]", document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null );
setInterval(function() {
f = a.iterateNext(); f = a.iterateNext();
f.click(); f.click();
}, 5000); }, 5000);
setTimeout(function() {
a = document.evaluate("//a[contains(@data-track, 'photo-click')]", document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null );
setInterval(function() {
f = a.iterateNext();
f.click();
}, 5000);
}, 5000); }, 5000);

View file

@ -1,7 +1,27 @@
//^https?://(?:www\.)?vimeo.com/.*$ // {"url_regex":"^https?://(?:www\\.)?vimeo\\.com/.*$", "request_idle_timeout_sec":10}
//
// vim:set sw=8 et:
//
var videoElements = document.getElementsByTagName('video'); var umbraState = {'idleSince':null};
for (var i = 0; i < videoElements.length; i++) { var umbraVideoElements = document.getElementsByTagName('video');
videoElements[i].play(); for (var i = 0; i < umbraVideoElements.length; i++) {
umbraVideoElements[i].play();
}
umbraState.idleSince = Date.now();
// If we haven't had anything to do (scrolled, clicked, etc) in this amount of
// time, then we consider ourselves finished with the page.
var UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC = 10;
// Called from outside of this script.
var umbraBehaviorFinished = function() {
if (umbraState.idleSince != null) {
var idleTimeMs = Date.now() - umbraState.idleSince;
if (idleTimeMs / 1000 > UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC) {
return true;
}
}
return false;
} }

View file

@ -1,16 +1,18 @@
# vim: set sw=4 et: # vim: set sw=4 et:
from json import dumps, load import json
from itertools import chain from itertools import chain
import os, re import os
import re
import logging import logging
import time import time
import sys
class Behavior: class Behavior:
logger = logging.getLogger('umbra.behaviors.Behavior') logger = logging.getLogger('umbra.behaviors.Behavior')
_behaviors = None _behaviors = None
_default_behavior_script = None _default_behavior = None
@staticmethod @staticmethod
def behaviors(): def behaviors():
@ -20,21 +22,29 @@ class Behavior:
Behavior._behaviors = [] Behavior._behaviors = []
for file_name in behavior_files: for file_name in behavior_files:
Behavior.logger.debug("reading behavior file {}".format(file_name)) Behavior.logger.debug("reading behavior file {}".format(file_name))
lines = open(file_name).readlines() script = open(file_name, encoding='utf-8').read()
pattern, script = lines[0][2:].strip(), ''.join(lines[1:]) first_line = script[:script.find('\n')]
Behavior._behaviors.append({'url_regex': pattern, 'script': script, 'file': file_name}) behavior = json.loads(first_line[2:].strip())
Behavior.logger.info("will run behaviors from {} to urls matching {}".format(file_name, pattern)) behavior['script'] = script
behavior['file'] = file_name
Behavior._behaviors.append(behavior)
Behavior.logger.info("will run behaviors from {} on urls matching {}".format(file_name, behavior['url_regex']))
return Behavior._behaviors return Behavior._behaviors
@staticmethod @staticmethod
def default_behavior_script(): def default_behavior():
if Behavior._default_behavior_script is None: if Behavior._default_behavior is None:
behaviors_directory = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ['behaviors.d']) behaviors_directory = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ['behaviors.d'])
file_name = os.path.join(behaviors_directory, 'default.js') file_name = os.path.join(behaviors_directory, 'default.js')
Behavior.logger.debug("reading default behavior file {}".format(file_name)) Behavior.logger.debug("reading default behavior file {}".format(file_name))
Behavior._default_behavior_script = open(file_name).read() script = open(file_name, encoding='utf-8').read()
return Behavior._default_behavior_script first_line = script[:script.find('\n')]
behavior = json.loads(first_line[2:].strip())
behavior['script'] = script
behavior['file'] = file_name
Behavior._default_behavior = behavior
return Behavior._default_behavior
def __init__(self, url, websock, command_id): def __init__(self, url, websock, command_id):
self.url = url self.url = url
@ -43,32 +53,37 @@ class Behavior:
self.script_finished = False self.script_finished = False
self.waiting_result_msg_ids = [] self.waiting_result_msg_ids = []
self.active_behavior = None
self.last_activity = time.time()
def start(self): def start(self):
self.notify_of_activity()
script_started = False
for behavior in Behavior.behaviors(): for behavior in Behavior.behaviors():
if re.match(behavior['url_regex'], self.url): if re.match(behavior['url_regex'], self.url):
msg = dumps(dict(method="Runtime.evaluate", params={"expression": behavior['script']}, id=next(self.command_id))) self.active_behavior = behavior
self.logger.debug('sending message to {}: {}'.format(self.websock, msg))
self.websock.send(msg)
script_started = True
break break
if not script_started: if self.active_behavior is None:
msg = dumps(dict(method="Runtime.evaluate", params={"expression": Behavior.default_behavior_script()}, id=next(self.command_id))) self.active_behavior = Behavior.default_behavior()
self.logger.debug('sending message to {}: {}'.format(self.websock, msg))
self.websock.send(msg) msg = json.dumps(dict(method="Runtime.evaluate", params={"expression": self.active_behavior['script']}, id=next(self.command_id)))
self.logger.debug('sending message to {}: {}'.format(self.websock, msg))
self.websock.send(msg)
self.notify_of_activity()
def is_finished(self): def is_finished(self):
msg_id = next(self.command_id) msg_id = next(self.command_id)
self.waiting_result_msg_ids.append(msg_id) self.waiting_result_msg_ids.append(msg_id)
msg = dumps(dict(method="Runtime.evaluate", params={"expression": "umbraBehaviorFinished()"}, id=msg_id)) msg = json.dumps(dict(method="Runtime.evaluate", params={"expression": "umbraBehaviorFinished()"}, id=msg_id))
self.logger.debug('sending message to {}: {}'.format(self.websock, msg)) self.logger.debug('sending message to {}: {}'.format(self.websock, msg))
self.websock.send(msg) self.websock.send(msg)
return self.script_finished # XXX and idle_time > behavior_specified_idle_timeout request_idle_timeout_sec = 30
if self.active_behavior and 'request_idle_timeout_sec' in self.active_behavior:
request_idle_timeout_sec = self.active_behavior['request_idle_timeout_sec']
idle_time = time.time() - self.last_activity
return self.script_finished and idle_time > request_idle_timeout_sec
def is_waiting_on_result(self, msg_id): def is_waiting_on_result(self, msg_id):
return msg_id in self.waiting_result_msg_ids return msg_id in self.waiting_result_msg_ids
@ -87,4 +102,11 @@ class Behavior:
def notify_of_activity(self): def notify_of_activity(self):
self.last_activity = time.time() self.last_activity = time.time()
if __name__ == "__main__":
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG,
format='%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s')
logger = logging.getLogger('umbra.behaviors')
logger.info("custom behaviors: {}".format(Behavior.behaviors()))
logger.info("default behavior: {}".format(Behavior.default_behavior()))