change magic first line of behavior js files to a commented-out json blob, which should include the fields 'url_regex' and 'request_idle_timeout_sec'; behavior.is_finished() incorporates the custom idle timeout into its check; also rename variables in behavior scripts with umbra/UMBRA_ prefix to sort of namespace them; and add "finished" logic to facebook and vimeo behaviors (flickr needs work to support it)

This commit is contained in:
Noah Levitt 2014-05-05 11:58:55 -07:00
parent 2a9633ad77
commit a62a07e6b7
5 changed files with 130 additions and 67 deletions

View File

@ -1,23 +1,13 @@
// {"request_idle_timeout_sec":10}
//
// vim:set sw=8 et:
//
// Scrolls to the bottom of the page. That's it at the moment.
//
// STATES = ['NASCENT', 'NEED_SCROLL', 'WAITING', 'FINISHED']
// var transition = prepareTransition(state);
// if (transition.callback) {
// newState.callback(state, newState);
// }
// state = newState;
// if (state.status === 'NASCENT') {
// } else if (state.status == 'NEED_SCROLL') {
// } else if (state.status == 'FINISHED') {
var UMBRA_FINISH_AFTER_IDLE_TIME = 10 * 1000; // ms
var umbraState = {'idleSince':null};
var umbraFinished = false;
var umbraIntervalFunc = function() {
// var needToScroll = (window.scrollY + window.innerHeight + 10 < document.body.clientHeight);
// var needToScroll = (document.documentElement.scrollTop + document.documentElement.clientHeight < document.documentElement.scrollHeight);
var needToScroll = (window.scrollY + window.innerHeight < document.documentElement.scrollHeight);
// console.log('intervalFunc umbraState.idleSince=' + umbraState.idleSince + ' needToScroll=' + needToScroll + ' window.scrollY=' + window.scrollY + ' window.innerHeight=' + window.innerHeight + ' document.documentElement.scrollHeight=' + document.documentElement.scrollHeight);
@ -29,10 +19,15 @@ var umbraIntervalFunc = function() {
}
}
// If we haven't had anything to do (scrolled, clicked, etc) in this amount of
// time, then we consider ourselves finished with the page.
var UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC = 10;
// Called from outside of this script.
var umbraBehaviorFinished = function() {
if (umbraState.idleSince != null) {
var idleTime = Date.now() - umbraState.idleSince;
if (idleTime > UMBRA_FINISH_AFTER_IDLE_TIME) {
var idleTimeMs = Date.now() - umbraState.idleSince;
if (idleTimeMs / 1000 > UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC) {
return true;
}
}

View File

@ -1,7 +1,9 @@
//^https?://(?:www\.)?facebook\.com/.*$
// {"url_regex":"^https?://(?:www\\.)?facebook\\.com/.*$", "request_idle_timeout_sec":30}
//
// vim:set sw=8 et:
//
var aboveBelowOrOnScreen = function(e) {
var umbraAboveBelowOrOnScreen = function(e) {
var eTop = e.getBoundingClientRect().top;
if (eTop < window.scrollY) {
return -1; // above
@ -13,11 +15,11 @@ var aboveBelowOrOnScreen = function(e) {
}
// comments - 'a.UFIPagerLink > span, a.UFIPagerLink, span.UFIReplySocialSentenceLinkText'
var THINGS_TO_CLICK_SELECTOR = 'a[href^="/browse/likes"], *[rel="theater"]';
var alreadyClicked = {};
var intervalId;
var UMBRA_THINGS_TO_CLICK_SELECTOR = 'a[href^="/browse/likes"], *[rel="theater"]';
var umbraAlreadyClicked = {};
var umbraState = {'idleSince':null};
var intervalFunc = function() {
var umbraIntervalFunc = function() {
var closeButton = document.querySelector('a[title="Close"]');
if (closeButton) {
console.log("clicking close button " + closeButton.outerHTML);
@ -31,15 +33,15 @@ var intervalFunc = function() {
return;
}
var thingsToClick = document.querySelectorAll(THINGS_TO_CLICK_SELECTOR);
var thingsToClick = document.querySelectorAll(UMBRA_THINGS_TO_CLICK_SELECTOR);
var clickedSomething = false;
var somethingLeftBelow = false;
var missedAbove = 0;
for (var i = 0; i < thingsToClick.length; i++) {
var target = thingsToClick[i];
if (!(target in alreadyClicked)) {
var where = aboveBelowOrOnScreen(target);
if (!(target in umbraAlreadyClicked)) {
var where = umbraAboveBelowOrOnScreen(target);
if (where == 0) { // on screen
// var pos = target.getBoundingClientRect().top;
// window.scrollTo(0, target.getBoundingClientRect().top - 100);
@ -48,8 +50,9 @@ var intervalFunc = function() {
target.click();
}
target.style.border = '1px solid #0a0';
alreadyClicked[target] = true;
umbraAlreadyClicked[target] = true;
clickedSomething = true;
umbraState.idleSince = null;
break;
} else if (where > 0) {
somethingLeftBelow = true;
@ -67,11 +70,31 @@ var intervalFunc = function() {
if (somethingLeftBelow) {
console.log("scrolling because everything on this screen has been clicked but there's more below document.body.clientHeight=" + document.body.clientHeight);
window.scrollBy(0, 200);
umbraState.idleSince = null;
} else if (window.scrollY + window.innerHeight + 10 < document.body.clientHeight) {
console.log("scrolling because we're not to the bottom yet document.body.clientHeight=" + document.body.clientHeight);
window.scrollBy(0, 200);
}
umbraState.idleSince = null;
} else if (umbraState.idleSince == null) {
umbraState.idleSince = Date.now();
}
}
}
var intervalId = setInterval(intervalFunc, 200);
// If we haven't had anything to do (scrolled, clicked, etc) in this amount of
// time, then we consider ourselves finished with the page.
var UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC = 10;
// Called from outside of this script.
var umbraBehaviorFinished = function() {
if (umbraState.idleSince != null) {
var idleTimeMs = Date.now() - umbraState.idleSince;
if (idleTimeMs / 1000 > UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC) {
return true;
}
}
return false;
}
var umbraIntervalId = setInterval(umbraIntervalFunc, 200);

View File

@ -1,17 +1,20 @@
//^https?://(?:www\.)?flickr\.com/.*$
// {"url_regex":"^https?://(?:www\\.)?flickr\\.com/.*$", "request_idle_timeout_sec":10}
//
// vim:set sw=8 et:
//
setInterval(function() { window.scrollBy(0,50); }, 100);
setTimeout(function() {
a = document.evaluate("//a[contains(@class, 'sn-ico-slideshow')]", document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null );
f = a.iterateNext();
f.click();},
5000);
setTimeout(function() {
a = document.evaluate("//a[contains(@data-track, 'photo-click')]", document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null );
setInterval(function() {
a = document.evaluate("//a[contains(@class, 'sn-ico-slideshow')]", document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null );
f = a.iterateNext();
f.click();
}, 5000);
}, 5000);
setTimeout(function() {
a = document.evaluate("//a[contains(@data-track, 'photo-click')]", document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null );
setInterval(function() {
f = a.iterateNext();
f.click();
}, 5000);
}, 5000);

View File

@ -1,7 +1,27 @@
//^https?://(?:www\.)?vimeo.com/.*$
// {"url_regex":"^https?://(?:www\\.)?vimeo\\.com/.*$", "request_idle_timeout_sec":10}
//
// vim:set sw=8 et:
//
var videoElements = document.getElementsByTagName('video');
for (var i = 0; i < videoElements.length; i++) {
videoElements[i].play();
var umbraState = {'idleSince':null};
var umbraVideoElements = document.getElementsByTagName('video');
for (var i = 0; i < umbraVideoElements.length; i++) {
umbraVideoElements[i].play();
}
umbraState.idleSince = Date.now();
// If we haven't had anything to do (scrolled, clicked, etc) in this amount of
// time, then we consider ourselves finished with the page.
var UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC = 10;
// Called from outside of this script.
var umbraBehaviorFinished = function() {
if (umbraState.idleSince != null) {
var idleTimeMs = Date.now() - umbraState.idleSince;
if (idleTimeMs / 1000 > UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC) {
return true;
}
}
return false;
}

View File

@ -1,16 +1,18 @@
# vim: set sw=4 et:
from json import dumps, load
import json
from itertools import chain
import os, re
import os
import re
import logging
import time
import sys
class Behavior:
logger = logging.getLogger('umbra.behaviors.Behavior')
_behaviors = None
_default_behavior_script = None
_default_behavior = None
@staticmethod
def behaviors():
@ -20,21 +22,29 @@ class Behavior:
Behavior._behaviors = []
for file_name in behavior_files:
Behavior.logger.debug("reading behavior file {}".format(file_name))
lines = open(file_name).readlines()
pattern, script = lines[0][2:].strip(), ''.join(lines[1:])
Behavior._behaviors.append({'url_regex': pattern, 'script': script, 'file': file_name})
Behavior.logger.info("will run behaviors from {} to urls matching {}".format(file_name, pattern))
script = open(file_name, encoding='utf-8').read()
first_line = script[:script.find('\n')]
behavior = json.loads(first_line[2:].strip())
behavior['script'] = script
behavior['file'] = file_name
Behavior._behaviors.append(behavior)
Behavior.logger.info("will run behaviors from {} on urls matching {}".format(file_name, behavior['url_regex']))
return Behavior._behaviors
@staticmethod
def default_behavior_script():
if Behavior._default_behavior_script is None:
def default_behavior():
if Behavior._default_behavior is None:
behaviors_directory = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ['behaviors.d'])
file_name = os.path.join(behaviors_directory, 'default.js')
Behavior.logger.debug("reading default behavior file {}".format(file_name))
Behavior._default_behavior_script = open(file_name).read()
return Behavior._default_behavior_script
script = open(file_name, encoding='utf-8').read()
first_line = script[:script.find('\n')]
behavior = json.loads(first_line[2:].strip())
behavior['script'] = script
behavior['file'] = file_name
Behavior._default_behavior = behavior
return Behavior._default_behavior
def __init__(self, url, websock, command_id):
self.url = url
@ -43,32 +53,37 @@ class Behavior:
self.script_finished = False
self.waiting_result_msg_ids = []
self.active_behavior = None
self.last_activity = time.time()
def start(self):
self.notify_of_activity()
script_started = False
for behavior in Behavior.behaviors():
if re.match(behavior['url_regex'], self.url):
msg = dumps(dict(method="Runtime.evaluate", params={"expression": behavior['script']}, id=next(self.command_id)))
self.logger.debug('sending message to {}: {}'.format(self.websock, msg))
self.websock.send(msg)
script_started = True
self.active_behavior = behavior
break
if not script_started:
msg = dumps(dict(method="Runtime.evaluate", params={"expression": Behavior.default_behavior_script()}, id=next(self.command_id)))
self.logger.debug('sending message to {}: {}'.format(self.websock, msg))
self.websock.send(msg)
if self.active_behavior is None:
self.active_behavior = Behavior.default_behavior()
msg = json.dumps(dict(method="Runtime.evaluate", params={"expression": self.active_behavior['script']}, id=next(self.command_id)))
self.logger.debug('sending message to {}: {}'.format(self.websock, msg))
self.websock.send(msg)
self.notify_of_activity()
def is_finished(self):
msg_id = next(self.command_id)
self.waiting_result_msg_ids.append(msg_id)
msg = dumps(dict(method="Runtime.evaluate", params={"expression": "umbraBehaviorFinished()"}, id=msg_id))
msg = json.dumps(dict(method="Runtime.evaluate", params={"expression": "umbraBehaviorFinished()"}, id=msg_id))
self.logger.debug('sending message to {}: {}'.format(self.websock, msg))
self.websock.send(msg)
return self.script_finished # XXX and idle_time > behavior_specified_idle_timeout
request_idle_timeout_sec = 30
if self.active_behavior and 'request_idle_timeout_sec' in self.active_behavior:
request_idle_timeout_sec = self.active_behavior['request_idle_timeout_sec']
idle_time = time.time() - self.last_activity
return self.script_finished and idle_time > request_idle_timeout_sec
def is_waiting_on_result(self, msg_id):
return msg_id in self.waiting_result_msg_ids
@ -87,4 +102,11 @@ class Behavior:
def notify_of_activity(self):
self.last_activity = time.time()
if __name__ == "__main__":
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG,
format='%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s')
logger = logging.getLogger('umbra.behaviors')
logger.info("custom behaviors: {}".format(Behavior.behaviors()))
logger.info("default behavior: {}".format(Behavior.default_behavior()))