Merge pull request #39 from nlevitt/simple-behaviors

ARI-3775, ARI-3956 Simple behaviors
This commit is contained in:
Hunter 2015-04-16 15:01:49 -07:00
commit 903d2f3107
10 changed files with 110 additions and 54 deletions

View File

@ -9,7 +9,6 @@ def full_version_bytes():
import subprocess, time
try:
commit_bytes = subprocess.check_output(['git', 'log', '-1', '--pretty=format:%h'])
t_bytes = subprocess.check_output(['git', 'log', '-1', '--pretty=format:%ct'])
t = int(t_bytes.strip().decode('utf-8'))
tm = time.gmtime(t)
@ -32,8 +31,8 @@ setuptools.setup(name='umbra',
long_description=open('README.md').read(),
license='Apache License 2.0',
packages=['umbra'],
package_data={'umbra':['behaviors.d/*.js', 'version.txt']},
install_requires=['kombu', 'websocket-client-py3==0.13.1','argparse'],
package_data={'umbra':['behaviors.d/*.js*', 'behaviors.yaml', 'version.txt']},
install_requires=['kombu', 'websocket-client-py3==0.13.1', 'argparse', 'PyYAML'],
scripts=glob.glob('bin/*'),
zip_safe=False,
classifiers=[

View File

@ -1,9 +1,9 @@
// {"request_idle_timeout_sec":10}
//
// vim:set sw=8 et:
//
// Scrolls to the bottom of the page. That's it at the moment.
// Scrolls to the bottom of the page, and clicks on embedded soundcloud
// elements.
//
var umbraAboveBelowOrOnScreen = function(e) {
var eTop = e.getBoundingClientRect().top;
if (eTop < window.scrollY) {

View File

@ -1,7 +1,4 @@
// {"url_regex":"^https?://(?:www\\.)?facebook\\.com/.*$", "request_idle_timeout_sec":30}
//
// vim:set sw=8 et:
//
var umbraAboveBelowOrOnScreen = function(e) {
var eTop = e.getBoundingClientRect().top;

View File

@ -1,7 +1,4 @@
// {"url_regex":"^https?://(?:www\\.)?flickr\\.com/.*$", "request_idle_timeout_sec":10}
//
// vim:set sw=8 et:
//
setInterval(function() { window.scrollBy(0,50); }, 100);

View File

@ -1,5 +1,3 @@
// {"url_regex":"^https?://(?:www\\.)?instagram\\.com/.*$", "request_idle_timeout_sec":10}
//
// vim:set sw=8 et:
//

View File

@ -1,7 +1,4 @@
// {"url_regex":"^https?://(?:www\\.)?marquette\\.edu/.*$", "request_idle_timeout_sec":10}
//
// vim:set sw=8 et:
//
var umbraState = {'idleSince':null};
var umbraIntervalID = setInterval(umbraScrollInterval,50);

View File

@ -0,0 +1,44 @@
var umbraSimpleClicksBehavior = {
IDLE_TIMEOUT_SEC: 10,
idleSince: null,
alreadyClicked: {},
intervalFunc: function() {
var clickTargets = document.querySelectorAll("${click_css_selector}");
for (var i = 0; i < clickTargets.length; i++) {
var key = clickTargets[i].outerHTML;
if (!this.alreadyClicked[key]) {
console.log("clicking on " + key);
clickTargets[i].click();
this.alreadyClicked[key] = true;
this.idleSince = null;
return;
}
}
if (!this.idleSince) {
this.idleSince = Date.now();
}
},
start: function() {
var that = this;
this.intervalId = setInterval(function(){ that.intervalFunc() }, 250);
},
isFinished: function() {
if (this.idleSince != null) {
var idleTimeMs = Date.now() - this.idleSince;
if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) {
return true;
}
}
return false;
},
};
// Called from outside of this script.
var umbraBehaviorFinished = function() { return umbraSimpleClicksBehavior.isFinished() };
umbraSimpleClicksBehavior.start();

View File

@ -1,7 +1,4 @@
// {"url_regex":"^https?://(?:www\\.)?vimeo\\.com/.*$", "request_idle_timeout_sec":10}
//
// vim:set sw=8 et:
//
var umbraState = {'idleSince':null};
var umbraVideoElements = document.getElementsByTagName('video');

View File

@ -7,45 +7,35 @@ import re
import logging
import time
import sys
import yaml
import string
class Behavior:
logger = logging.getLogger(__module__ + "." + __qualname__)
_behaviors = None
_default_behavior = None
@staticmethod
def behaviors():
if Behavior._behaviors is None:
behaviors_directory = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ['behaviors.d'])
behavior_files = itertools.chain(*[[os.path.join(dir, file) for file in files if file.endswith('.js') and file != 'default.js'] for dir, dirs, files in os.walk(behaviors_directory)])
Behavior._behaviors = []
for file_name in behavior_files:
Behavior.logger.debug("reading behavior file {}".format(file_name))
script = open(file_name, encoding='utf-8').read()
first_line = script[:script.find('\n')]
behavior = json.loads(first_line[2:].strip())
behavior['script'] = script
behavior['file'] = file_name
Behavior._behaviors.append(behavior)
Behavior.logger.info("will run behaviors from {} on urls matching {}".format(file_name, behavior['url_regex']))
behaviors_yaml = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ['behaviors.yaml'])
with open(behaviors_yaml) as fin:
conf = yaml.load(fin)
Behavior._behaviors = conf['behaviors']
simpleclicks_js_in = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ["behaviors.d"] + ["simpleclicks.js.in"])
with open(simpleclicks_js_in) as fin:
simpleclicks_js_template = string.Template(fin.read())
for behavior in Behavior._behaviors:
if "behavior_js" in behavior:
behavior_js = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ["behaviors.d"] + [behavior["behavior_js"]])
behavior["script"] = open(behavior_js, encoding="utf-8").read()
elif "click_css_selector" in behavior:
behavior["script"] = simpleclicks_js_template.substitute(click_css_selector=behavior["click_css_selector"])
return Behavior._behaviors
@staticmethod
def default_behavior():
if Behavior._default_behavior is None:
behaviors_directory = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ['behaviors.d'])
file_name = os.path.join(behaviors_directory, 'default.js')
Behavior.logger.debug("reading default behavior file {}".format(file_name))
script = open(file_name, encoding='utf-8').read()
first_line = script[:script.find('\n')]
behavior = json.loads(first_line[2:].strip())
behavior['script'] = script
behavior['file'] = file_name
Behavior._default_behavior = behavior
return Behavior._default_behavior
def __init__(self, url, umbra_worker):
self.url = url
self.umbra_worker = umbra_worker
@ -58,14 +48,18 @@ class Behavior:
def start(self):
for behavior in Behavior.behaviors():
if re.match(behavior['url_regex'], self.url):
if "behavior_js" in behavior:
self.logger.info("using {} behavior for {}".format(behavior["behavior_js"], self.url))
elif "click_css_selector" in behavior:
self.logger.info("using simple click behavior with css selector {} for {}".format(behavior["click_css_selector"], self.url))
self.active_behavior = behavior
break
self.umbra_worker.send_to_chrome(method="Runtime.evaluate",
suppress_logging=True, params={"expression": behavior["script"]})
self.notify_of_activity()
return
if self.active_behavior is None:
self.active_behavior = Behavior.default_behavior()
self.umbra_worker.send_to_chrome(method="Runtime.evaluate", params={"expression": self.active_behavior['script']})
self.notify_of_activity()
self.logger.warn("no behavior to run on {}".format(self.url))
def is_finished(self):
msg_id = self.umbra_worker.send_to_chrome(method="Runtime.evaluate",
@ -102,6 +96,5 @@ if __name__ == "__main__":
format='%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s')
logger = logging.getLogger('umbra.behaviors')
logger.info("custom behaviors: {}".format(Behavior.behaviors()))
logger.info("default behavior: {}".format(Behavior.default_behavior()))

34
umbra/behaviors.yaml Normal file
View File

@ -0,0 +1,34 @@
# first matched behavior is used, so order matters here
behaviors:
-
url_regex: '^https?://(?:www\.)?facebook\.com/.*$'
behavior_js: facebook.js
request_idle_timeout_sec: 30
-
url_regex: '^https?://(?:www\.)?flickr\.com/.*$'
behavior_js: flickr.js
request_idle_timeout_sec: 10
-
url_regex: '^https?://(?:www\.)?marquette\.edu/.*$'
behavior_js: marquette_edu.js
request_idle_timeout_sec: 10
-
url_regex: '^https?://(?:www\.)?vimeo\.com/.*$'
behavior_js: vimeo.js
request_idle_timeout_sec: 10
-
url_regex: '^https?://(?:www\.)?instagram\.com/.*$'
behavior_js: instagram.js
request_idle_timeout_sec: 10
- # acalog https://webarchive.jira.com/browse/ARI-3775
url_regex: '^https?://.*[?&]catoid=[^?]*$'
click_css_selector: a[onclick]
request_idle_timeout_sec: 10
- # https://webarchive.jira.com/browse/ARI-3956
url_regex: '^https?://(?:www\.)?usask.ca/.*$'
click_css_selector: a[id='feature-next']
request_idle_timeout_sec: 10
- # default fallback brhavior
url_regex: '^.*$'
request_idle_timeout_sec: 10
behavior_js: default.js