mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
behaviors.yaml to configure behaviors, in preparation for "simple" behavior support
This commit is contained in:
parent
ffd60d35e6
commit
0647df1ab9
5
setup.py
5
setup.py
@ -9,7 +9,6 @@ def full_version_bytes():
|
|||||||
import subprocess, time
|
import subprocess, time
|
||||||
try:
|
try:
|
||||||
commit_bytes = subprocess.check_output(['git', 'log', '-1', '--pretty=format:%h'])
|
commit_bytes = subprocess.check_output(['git', 'log', '-1', '--pretty=format:%h'])
|
||||||
|
|
||||||
t_bytes = subprocess.check_output(['git', 'log', '-1', '--pretty=format:%ct'])
|
t_bytes = subprocess.check_output(['git', 'log', '-1', '--pretty=format:%ct'])
|
||||||
t = int(t_bytes.strip().decode('utf-8'))
|
t = int(t_bytes.strip().decode('utf-8'))
|
||||||
tm = time.gmtime(t)
|
tm = time.gmtime(t)
|
||||||
@ -32,8 +31,8 @@ setuptools.setup(name='umbra',
|
|||||||
long_description=open('README.md').read(),
|
long_description=open('README.md').read(),
|
||||||
license='Apache License 2.0',
|
license='Apache License 2.0',
|
||||||
packages=['umbra'],
|
packages=['umbra'],
|
||||||
package_data={'umbra':['behaviors.d/*.js', 'version.txt']},
|
package_data={'umbra':['behaviors.d/*.js', 'behaviors.yaml', 'version.txt']},
|
||||||
install_requires=['kombu', 'websocket-client-py3==0.13.1','argparse'],
|
install_requires=['kombu', 'websocket-client-py3==0.13.1', 'argparse', 'PyYAML'],
|
||||||
scripts=glob.glob('bin/*'),
|
scripts=glob.glob('bin/*'),
|
||||||
zip_safe=False,
|
zip_safe=False,
|
||||||
classifiers=[
|
classifiers=[
|
||||||
|
@ -1,5 +1,3 @@
|
|||||||
// {"request_idle_timeout_sec":10}
|
|
||||||
//
|
|
||||||
// vim:set sw=8 et:
|
// vim:set sw=8 et:
|
||||||
//
|
//
|
||||||
// Scrolls to the bottom of the page. That's it at the moment.
|
// Scrolls to the bottom of the page. That's it at the moment.
|
||||||
|
@ -1,7 +1,4 @@
|
|||||||
// {"url_regex":"^https?://(?:www\\.)?facebook\\.com/.*$", "request_idle_timeout_sec":30}
|
|
||||||
//
|
|
||||||
// vim:set sw=8 et:
|
// vim:set sw=8 et:
|
||||||
//
|
|
||||||
|
|
||||||
var umbraAboveBelowOrOnScreen = function(e) {
|
var umbraAboveBelowOrOnScreen = function(e) {
|
||||||
var eTop = e.getBoundingClientRect().top;
|
var eTop = e.getBoundingClientRect().top;
|
||||||
|
@ -1,7 +1,4 @@
|
|||||||
// {"url_regex":"^https?://(?:www\\.)?flickr\\.com/.*$", "request_idle_timeout_sec":10}
|
|
||||||
//
|
|
||||||
// vim:set sw=8 et:
|
// vim:set sw=8 et:
|
||||||
//
|
|
||||||
|
|
||||||
setInterval(function() { window.scrollBy(0,50); }, 100);
|
setInterval(function() { window.scrollBy(0,50); }, 100);
|
||||||
|
|
||||||
|
@ -1,7 +1,4 @@
|
|||||||
// {"url_regex":"^https?://(?:www\\.)?marquette\\.edu/.*$", "request_idle_timeout_sec":10}
|
|
||||||
//
|
|
||||||
// vim:set sw=8 et:
|
// vim:set sw=8 et:
|
||||||
//
|
|
||||||
|
|
||||||
var umbraState = {'idleSince':null};
|
var umbraState = {'idleSince':null};
|
||||||
var umbraIntervalID = setInterval(umbraScrollInterval,50);
|
var umbraIntervalID = setInterval(umbraScrollInterval,50);
|
||||||
|
@ -1,7 +1,4 @@
|
|||||||
// {"url_regex":"^https?://(?:www\\.)?vimeo\\.com/.*$", "request_idle_timeout_sec":10}
|
|
||||||
//
|
|
||||||
// vim:set sw=8 et:
|
// vim:set sw=8 et:
|
||||||
//
|
|
||||||
|
|
||||||
var umbraState = {'idleSince':null};
|
var umbraState = {'idleSince':null};
|
||||||
var umbraVideoElements = document.getElementsByTagName('video');
|
var umbraVideoElements = document.getElementsByTagName('video');
|
||||||
|
@ -7,45 +7,28 @@ import re
|
|||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
import sys
|
import sys
|
||||||
|
import yaml
|
||||||
|
|
||||||
class Behavior:
|
class Behavior:
|
||||||
logger = logging.getLogger(__module__ + "." + __qualname__)
|
logger = logging.getLogger(__module__ + "." + __qualname__)
|
||||||
|
|
||||||
_behaviors = None
|
_behaviors = None
|
||||||
_default_behavior = None
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def behaviors():
|
def behaviors():
|
||||||
if Behavior._behaviors is None:
|
if Behavior._behaviors is None:
|
||||||
behaviors_directory = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ['behaviors.d'])
|
behaviors_yaml = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ['behaviors.yaml'])
|
||||||
behavior_files = itertools.chain(*[[os.path.join(dir, file) for file in files if file.endswith('.js') and file != 'default.js'] for dir, dirs, files in os.walk(behaviors_directory)])
|
with open(behaviors_yaml) as fin:
|
||||||
Behavior._behaviors = []
|
conf = yaml.load(fin)
|
||||||
for file_name in behavior_files:
|
Behavior._behaviors = conf['behaviors']
|
||||||
Behavior.logger.debug("reading behavior file {}".format(file_name))
|
|
||||||
script = open(file_name, encoding='utf-8').read()
|
for behavior in Behavior._behaviors:
|
||||||
first_line = script[:script.find('\n')]
|
if "behavior_js" in behavior:
|
||||||
behavior = json.loads(first_line[2:].strip())
|
behavior_js = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ["behaviors.d"] + [behavior["behavior_js"]])
|
||||||
behavior['script'] = script
|
behavior["script"] = open(behavior_js, encoding="utf-8").read()
|
||||||
behavior['file'] = file_name
|
|
||||||
Behavior._behaviors.append(behavior)
|
|
||||||
Behavior.logger.info("will run behaviors from {} on urls matching {}".format(file_name, behavior['url_regex']))
|
|
||||||
|
|
||||||
return Behavior._behaviors
|
return Behavior._behaviors
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def default_behavior():
|
|
||||||
if Behavior._default_behavior is None:
|
|
||||||
behaviors_directory = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ['behaviors.d'])
|
|
||||||
file_name = os.path.join(behaviors_directory, 'default.js')
|
|
||||||
Behavior.logger.debug("reading default behavior file {}".format(file_name))
|
|
||||||
script = open(file_name, encoding='utf-8').read()
|
|
||||||
first_line = script[:script.find('\n')]
|
|
||||||
behavior = json.loads(first_line[2:].strip())
|
|
||||||
behavior['script'] = script
|
|
||||||
behavior['file'] = file_name
|
|
||||||
Behavior._default_behavior = behavior
|
|
||||||
return Behavior._default_behavior
|
|
||||||
|
|
||||||
def __init__(self, url, umbra_worker):
|
def __init__(self, url, umbra_worker):
|
||||||
self.url = url
|
self.url = url
|
||||||
self.umbra_worker = umbra_worker
|
self.umbra_worker = umbra_worker
|
||||||
@ -58,14 +41,14 @@ class Behavior:
|
|||||||
def start(self):
|
def start(self):
|
||||||
for behavior in Behavior.behaviors():
|
for behavior in Behavior.behaviors():
|
||||||
if re.match(behavior['url_regex'], self.url):
|
if re.match(behavior['url_regex'], self.url):
|
||||||
|
if "behavior_js" in behavior:
|
||||||
|
self.logger.info("using {} behavior for {}".format(behavior["behavior_js"], self.url))
|
||||||
self.active_behavior = behavior
|
self.active_behavior = behavior
|
||||||
break
|
self.umbra_worker.send_to_chrome(method="Runtime.evaluate",
|
||||||
|
suppress_logging=True, params={"expression": behavior["script"]})
|
||||||
if self.active_behavior is None:
|
|
||||||
self.active_behavior = Behavior.default_behavior()
|
|
||||||
|
|
||||||
self.umbra_worker.send_to_chrome(method="Runtime.evaluate", params={"expression": self.active_behavior['script']})
|
|
||||||
self.notify_of_activity()
|
self.notify_of_activity()
|
||||||
|
return
|
||||||
|
self.logger.warn("no behavior to run on {}".format(self.url))
|
||||||
|
|
||||||
def is_finished(self):
|
def is_finished(self):
|
||||||
msg_id = self.umbra_worker.send_to_chrome(method="Runtime.evaluate",
|
msg_id = self.umbra_worker.send_to_chrome(method="Runtime.evaluate",
|
||||||
|
22
umbra/behaviors.yaml
Normal file
22
umbra/behaviors.yaml
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
# first matched behavior is used, so order matters here
|
||||||
|
behaviors:
|
||||||
|
-
|
||||||
|
url_regex: '^https?://(?:www\.)?facebook\.com/.*$'
|
||||||
|
behavior_js: facebook.js
|
||||||
|
request_idle_timeout_sec: 30
|
||||||
|
-
|
||||||
|
url_regex: '^https?://(?:www\.)?flickr\.com/.*$'
|
||||||
|
behavior_js: flickr.js
|
||||||
|
request_idle_timeout_sec: 10
|
||||||
|
-
|
||||||
|
url_regex: '^https?://(?:www\.)?marquette\.edu/.*$'
|
||||||
|
behavior_js: marquette_edu.js
|
||||||
|
request_idle_timeout_sec: 10
|
||||||
|
-
|
||||||
|
url_regex: '^https?://(?:www\.)?vimeo\.com/.*$'
|
||||||
|
behavior_js: vimeo.js
|
||||||
|
request_idle_timeout_sec: 10
|
||||||
|
-
|
||||||
|
url_regex: '^.*$'
|
||||||
|
request_idle_timeout_sec: 10
|
||||||
|
behavior_js: default.js
|
Loading…
x
Reference in New Issue
Block a user