mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 00:29:53 -05:00
support for simple behavior that clicks on elements matching configured css selector; and one such behavior for acalog sites ARI-3775
This commit is contained in:
parent
0647df1ab9
commit
c5c642a990
2
setup.py
2
setup.py
@ -31,7 +31,7 @@ setuptools.setup(name='umbra',
|
|||||||
long_description=open('README.md').read(),
|
long_description=open('README.md').read(),
|
||||||
license='Apache License 2.0',
|
license='Apache License 2.0',
|
||||||
packages=['umbra'],
|
packages=['umbra'],
|
||||||
package_data={'umbra':['behaviors.d/*.js', 'behaviors.yaml', 'version.txt']},
|
package_data={'umbra':['behaviors.d/*.js*', 'behaviors.yaml', 'version.txt']},
|
||||||
install_requires=['kombu', 'websocket-client-py3==0.13.1', 'argparse', 'PyYAML'],
|
install_requires=['kombu', 'websocket-client-py3==0.13.1', 'argparse', 'PyYAML'],
|
||||||
scripts=glob.glob('bin/*'),
|
scripts=glob.glob('bin/*'),
|
||||||
zip_safe=False,
|
zip_safe=False,
|
||||||
|
38
umbra/behaviors.d/simpleclicks.js.in
Normal file
38
umbra/behaviors.d/simpleclicks.js.in
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
var umbraSimpleClicksBehavior = {
|
||||||
|
IDLE_TIMEOUT_SEC: 10,
|
||||||
|
idleSince: null,
|
||||||
|
clickTargets: [],
|
||||||
|
nextClickIndex: 0,
|
||||||
|
|
||||||
|
intervalFunc: function() {
|
||||||
|
if (this.nextClickIndex < this.clickTargets.length) {
|
||||||
|
this.clickTargets[this.nextClickIndex].click();
|
||||||
|
this.idleSince = null;
|
||||||
|
this.nextClickIndex++;
|
||||||
|
} else if (this.idleSince == null) {
|
||||||
|
this.idleSince = Date.now();
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
start: function() {
|
||||||
|
this.clickTargets = document.querySelectorAll("${click_css_selector}");
|
||||||
|
|
||||||
|
var that = this;
|
||||||
|
this.intervalId = setInterval(function(){ that.intervalFunc() }, 50);
|
||||||
|
},
|
||||||
|
|
||||||
|
isFinished: function() {
|
||||||
|
if (this.idleSince != null) {
|
||||||
|
var idleTimeMs = Date.now() - this.idleSince;
|
||||||
|
if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
// Called from outside of this script.
|
||||||
|
var umbraBehaviorFinished = function() { return umbraSimpleClicksBehavior.isFinished() };
|
||||||
|
|
||||||
|
umbraSimpleClicksBehavior.start();
|
@ -8,6 +8,7 @@ import logging
|
|||||||
import time
|
import time
|
||||||
import sys
|
import sys
|
||||||
import yaml
|
import yaml
|
||||||
|
import string
|
||||||
|
|
||||||
class Behavior:
|
class Behavior:
|
||||||
logger = logging.getLogger(__module__ + "." + __qualname__)
|
logger = logging.getLogger(__module__ + "." + __qualname__)
|
||||||
@ -22,10 +23,16 @@ class Behavior:
|
|||||||
conf = yaml.load(fin)
|
conf = yaml.load(fin)
|
||||||
Behavior._behaviors = conf['behaviors']
|
Behavior._behaviors = conf['behaviors']
|
||||||
|
|
||||||
|
simpleclicks_js_in = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ["behaviors.d"] + ["simpleclicks.js.in"])
|
||||||
|
with open(simpleclicks_js_in) as fin:
|
||||||
|
simpleclicks_js_template = string.Template(fin.read())
|
||||||
|
|
||||||
for behavior in Behavior._behaviors:
|
for behavior in Behavior._behaviors:
|
||||||
if "behavior_js" in behavior:
|
if "behavior_js" in behavior:
|
||||||
behavior_js = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ["behaviors.d"] + [behavior["behavior_js"]])
|
behavior_js = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ["behaviors.d"] + [behavior["behavior_js"]])
|
||||||
behavior["script"] = open(behavior_js, encoding="utf-8").read()
|
behavior["script"] = open(behavior_js, encoding="utf-8").read()
|
||||||
|
elif "click_css_selector" in behavior:
|
||||||
|
behavior["script"] = simpleclicks_js_template.substitute(click_css_selector=behavior["click_css_selector"])
|
||||||
|
|
||||||
return Behavior._behaviors
|
return Behavior._behaviors
|
||||||
|
|
||||||
@ -43,11 +50,15 @@ class Behavior:
|
|||||||
if re.match(behavior['url_regex'], self.url):
|
if re.match(behavior['url_regex'], self.url):
|
||||||
if "behavior_js" in behavior:
|
if "behavior_js" in behavior:
|
||||||
self.logger.info("using {} behavior for {}".format(behavior["behavior_js"], self.url))
|
self.logger.info("using {} behavior for {}".format(behavior["behavior_js"], self.url))
|
||||||
|
elif "click_css_selector" in behavior:
|
||||||
|
self.logger.info("using simple click behavior with css selector {} for {}".format(behavior["click_css_selector"], self.url))
|
||||||
|
|
||||||
self.active_behavior = behavior
|
self.active_behavior = behavior
|
||||||
self.umbra_worker.send_to_chrome(method="Runtime.evaluate",
|
self.umbra_worker.send_to_chrome(method="Runtime.evaluate",
|
||||||
suppress_logging=True, params={"expression": behavior["script"]})
|
suppress_logging=True, params={"expression": behavior["script"]})
|
||||||
self.notify_of_activity()
|
self.notify_of_activity()
|
||||||
return
|
return
|
||||||
|
|
||||||
self.logger.warn("no behavior to run on {}".format(self.url))
|
self.logger.warn("no behavior to run on {}".format(self.url))
|
||||||
|
|
||||||
def is_finished(self):
|
def is_finished(self):
|
||||||
@ -85,6 +96,5 @@ if __name__ == "__main__":
|
|||||||
format='%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s')
|
format='%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s')
|
||||||
logger = logging.getLogger('umbra.behaviors')
|
logger = logging.getLogger('umbra.behaviors')
|
||||||
logger.info("custom behaviors: {}".format(Behavior.behaviors()))
|
logger.info("custom behaviors: {}".format(Behavior.behaviors()))
|
||||||
logger.info("default behavior: {}".format(Behavior.default_behavior()))
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -16,6 +16,10 @@ behaviors:
|
|||||||
url_regex: '^https?://(?:www\.)?vimeo\.com/.*$'
|
url_regex: '^https?://(?:www\.)?vimeo\.com/.*$'
|
||||||
behavior_js: vimeo.js
|
behavior_js: vimeo.js
|
||||||
request_idle_timeout_sec: 10
|
request_idle_timeout_sec: 10
|
||||||
|
- # acalog https://webarchive.jira.com/browse/ARI-3775
|
||||||
|
url_regex: '^https?://.*[?&]catoid=[^?]*$'
|
||||||
|
click_css_selector: a[onclick]
|
||||||
|
request_idle_timeout_sec: 10
|
||||||
-
|
-
|
||||||
url_regex: '^.*$'
|
url_regex: '^.*$'
|
||||||
request_idle_timeout_sec: 10
|
request_idle_timeout_sec: 10
|
||||||
|
Loading…
x
Reference in New Issue
Block a user