mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-07-22 14:30:53 -04:00
simpledo first draft
This commit is contained in:
parent
76b658747e
commit
d861888864
2 changed files with 223 additions and 84 deletions
140
brozzler/behaviors.d/simpledo.js.template
Normal file
140
brozzler/behaviors.d/simpledo.js.template
Normal file
|
@ -0,0 +1,140 @@
|
||||||
|
/*
|
||||||
|
* brozzler/behaviors.d/simpledo.js.in - simpledo behavior template,
|
||||||
|
* acting on elements matching templatized css selector,
|
||||||
|
* based on simpleclicks.js.template and mouseovers.js.template
|
||||||
|
*
|
||||||
|
* Copyright (C) 2016 Internet Archive
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
var umbraBehavior = {
|
||||||
|
IDLE_TIMEOUT_SEC : 10,
|
||||||
|
idleSince : null,
|
||||||
|
|
||||||
|
intervalFunc : function() {
|
||||||
|
var didSomething = false;
|
||||||
|
var somethingLeftBelow = false;
|
||||||
|
var somethingLeftAbove = false;
|
||||||
|
var cssSelector = "${sdo_css_selector}";
|
||||||
|
var doAction = "${sdo_action}"; // currently supports click, mouseover
|
||||||
|
var doUntilTimeout = "${sdo_until_hard_timeout}";
|
||||||
|
|
||||||
|
//handle Python to JavaScript boolean conversion
|
||||||
|
doUntilTimeout == "True" ? doUntilTimeout = true : doUntilTimeout = false;
|
||||||
|
|
||||||
|
var iframes = document.querySelectorAll("iframe");
|
||||||
|
var documents = Array(iframes.length + 1);
|
||||||
|
documents[0] = document;
|
||||||
|
|
||||||
|
for (var i = 0; i < iframes.length; i++) {
|
||||||
|
documents[i+1] = iframes[i].contentWindow.document;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (var j = 0; j < documents.length; j++) {
|
||||||
|
|
||||||
|
var doTargets = documents[j].querySelectorAll(cssSelector);
|
||||||
|
|
||||||
|
for ( var i = 0; i < doTargets.length; i++) {
|
||||||
|
if (doTargets[i].umbraDone && !doUntilTimeout) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
var where = this.aboveBelowOrOnScreen(doTargets[i]);
|
||||||
|
|
||||||
|
if (where == 0) {
|
||||||
|
console.log("doing " + doAction + doTargets[i].outerHTML);
|
||||||
|
// do mouse over event on target
|
||||||
|
// since some urls are requsted only on
|
||||||
|
// this event - see
|
||||||
|
// https://webarchive.jira.com/browse/AITFIVE-451
|
||||||
|
var mouseOverEvent = document.createEvent('Events');
|
||||||
|
mouseOverEvent.initEvent('mouseover',true, false);
|
||||||
|
doTargets[i].dispatchEvent(mouseOverEvent);
|
||||||
|
|
||||||
|
if (doAction == 'click') {
|
||||||
|
doTargets[i].click();
|
||||||
|
} // add new do's here!
|
||||||
|
|
||||||
|
didSomething = true;
|
||||||
|
this.idleSince = null;
|
||||||
|
doTargets[i].umbraDone = true;
|
||||||
|
|
||||||
|
break; // break from doTargets loop (not from iframe loop)
|
||||||
|
} else if (where > 0) {
|
||||||
|
somethingLeftBelow = true;
|
||||||
|
} else if (where < 0) {
|
||||||
|
somethingLeftAbove = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!didSomething) {
|
||||||
|
if (somethingLeftAbove) {
|
||||||
|
// console.log("scrolling UP because everything on this screen has been done but we missed something above");
|
||||||
|
window.scrollBy(0, -500);
|
||||||
|
this.idleSince = null;
|
||||||
|
} else if (somethingLeftBelow) {
|
||||||
|
// console.log("scrolling because everything on this screen has been done but there's more below");
|
||||||
|
window.scrollBy(0, 200);
|
||||||
|
this.idleSince = null;
|
||||||
|
} else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) {
|
||||||
|
// console.log("scrolling because we're not to the bottom yet");
|
||||||
|
window.scrollBy(0, 200);
|
||||||
|
this.idleSince = null;
|
||||||
|
} else if (this.idleSince == null) {
|
||||||
|
this.idleSince = Date.now();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!this.idleSince) {
|
||||||
|
this.idleSince = Date.now();
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
start : function() {
|
||||||
|
var that = this;
|
||||||
|
this.intervalId = setInterval(function() {
|
||||||
|
that.intervalFunc()
|
||||||
|
}, 250);
|
||||||
|
},
|
||||||
|
|
||||||
|
isFinished : function() {
|
||||||
|
if (this.idleSince != null) {
|
||||||
|
var idleTimeMs = Date.now() - this.idleSince;
|
||||||
|
if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) {
|
||||||
|
clearInterval(this.intervalId);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
},
|
||||||
|
|
||||||
|
aboveBelowOrOnScreen : function(e) {
|
||||||
|
var eTop = e.getBoundingClientRect().top;
|
||||||
|
if (eTop < window.scrollY) {
|
||||||
|
return -1; // above
|
||||||
|
} else if (eTop > window.scrollY + window.innerHeight) {
|
||||||
|
return 1; // below
|
||||||
|
} else {
|
||||||
|
return 0; // on screen
|
||||||
|
}
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
// Called from outside of this script.
|
||||||
|
var umbraBehaviorFinished = function() {
|
||||||
|
return umbraBehavior.isFinished()
|
||||||
|
};
|
||||||
|
|
||||||
|
umbraBehavior.start();
|
|
@ -1,7 +1,7 @@
|
||||||
#
|
#
|
||||||
# brozzler/behaviors.yaml - behavior configuration
|
# brozzler/behaviors.yaml - behavior configuration
|
||||||
#
|
#
|
||||||
# Copyright (C) 2014-2016 Internet Archive
|
# Copyright (C) 2014-2017 Internet Archive
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
# you may not use this file except in compliance with the License.
|
# you may not use this file except in compliance with the License.
|
||||||
|
@ -17,86 +17,85 @@
|
||||||
#
|
#
|
||||||
|
|
||||||
# first matched behavior is used, so order matters here
|
# first matched behavior is used, so order matters here
|
||||||
-
|
behaviors:
|
||||||
|
-
|
||||||
url_regex: '^https?://(?:www\.)?facebook\.com/.*$'
|
url_regex: '^https?://(?:www\.)?facebook\.com/.*$'
|
||||||
behavior_js_template: facebook.js
|
behavior_js_template: facebook.js.template
|
||||||
|
# default_parameters:
|
||||||
|
# parameter_username: jdoe@example.com
|
||||||
|
# parameter_password: abcd1234
|
||||||
request_idle_timeout_sec: 30
|
request_idle_timeout_sec: 30
|
||||||
-
|
-
|
||||||
url_regex: '^https?://(?:www\.)?marquette\.edu/.*$'
|
url_regex: '^https?://(?:www\.)?marquette\.edu/.*$'
|
||||||
behavior_js_template: marquette_edu.js
|
behavior_js: marquette_edu.js
|
||||||
request_idle_timeout_sec: 10
|
request_idle_timeout_sec: 10
|
||||||
-
|
-
|
||||||
url_regex: '^https?://(?:www\.)?vimeo\.com/.*$'
|
url_regex: '^https?://(?:www\.)?vimeo\.com/.*$'
|
||||||
behavior_js_template: vimeo.js
|
behavior_js: vimeo.js
|
||||||
request_idle_timeout_sec: 10
|
request_idle_timeout_sec: 10
|
||||||
-
|
-
|
||||||
url_regex: '^https?://(?:www\.)?psu24.psu.edu/.*$'
|
url_regex: '^https?://(?:www\.)?psu24.psu.edu/.*$'
|
||||||
behavior_js_template: psu24.js
|
behavior_js: psu24.js
|
||||||
request_idle_timeout_sec: 10
|
request_idle_timeout_sec: 10
|
||||||
-
|
-
|
||||||
url_regex: '^https?://(?:www\.)?instagram\.com/.*$'
|
url_regex: '^https?://(?:www\.)?instagram\.com/.*$'
|
||||||
behavior_js_template: instagram.js
|
behavior_js: instagram.js
|
||||||
request_idle_timeout_sec: 10
|
request_idle_timeout_sec: 10
|
||||||
-
|
-
|
||||||
url_regex: '^https?://(?:www\.)?brooklynmuseum\.org/exhibitions/.*$'
|
url_regex: '^https?://(?:www\.)?brooklynmuseum\.org/exhibitions/.*$'
|
||||||
behavior_js_template: simpleclicks.js.j2
|
behavior_js_template: simpleclicks.js.template
|
||||||
default_parameters:
|
default_parameters:
|
||||||
click_css_selector: img.img-responsive
|
click_css_selector: img.img-responsive
|
||||||
click_until_hard_timeout: False
|
|
||||||
request_idle_timeout_sec: 10
|
request_idle_timeout_sec: 10
|
||||||
- # acalog https://webarchive.jira.com/browse/ARI-3775
|
- # acalog https://webarchive.jira.com/browse/ARI-3775
|
||||||
url_regex: '^https?://.*[?&]catoid=[^?]*$'
|
url_regex: '^https?://.*[?&]catoid=[^?]*$'
|
||||||
behavior_js_template: simpleclicks.js.j2
|
behavior_js_template: simpleclicks.js.template
|
||||||
default_parameters:
|
default_parameters:
|
||||||
click_css_selector: a[onclick]
|
click_css_selector: a[onclick]
|
||||||
click_until_hard_timeout: False
|
|
||||||
request_idle_timeout_sec: 10
|
request_idle_timeout_sec: 10
|
||||||
- # https://webarchive.jira.com/browse/ARI-3956
|
- # https://webarchive.jira.com/browse/ARI-3956
|
||||||
url_regex: '^https?://(?:www\.)?usask.ca/.*$'
|
url_regex: '^https?://(?:www\.)?usask.ca/.*$'
|
||||||
behavior_js_template: simpleclicks.js.j2
|
behavior_js_template: simpleclicks.js.template
|
||||||
default_parameters:
|
default_parameters:
|
||||||
click_css_selector: a[id='feature-next']
|
click_css_selector: a[id='feature-next']
|
||||||
click_until_hard_timeout: False
|
|
||||||
request_idle_timeout_sec: 10
|
request_idle_timeout_sec: 10
|
||||||
- # https://webarchive.jira.com/browse/AITFIVE-451
|
- # https://webarchive.jira.com/browse/AITFIVE-451
|
||||||
url_regex: '^https?://(?:www\.)?soundcloud.com/.*$'
|
url_regex: '^https?://(?:www\.)?soundcloud.com/.*$'
|
||||||
behavior_js_template: simpleclicks.js.j2
|
behavior_js_template: simpledo.js.template
|
||||||
default_parameters:
|
default_parameters:
|
||||||
click_css_selector: button.sc-button-play, button.playButton
|
sdo_css_selector: button.sc-button-play, button.playButton
|
||||||
click_until_hard_timeout: False
|
sdo_action: click
|
||||||
request_idle_timeout_sec: 10
|
request_idle_timeout_sec: 10
|
||||||
- # https://webarchive.jira.com/browse/AITFIVE-463
|
- # https://webarchive.jira.com/browse/AITFIVE-463
|
||||||
url_regex: '^https?://(?:www\.)?christophercerrone.com/.*$'
|
url_regex: '^https?://(?:www\.)?christophercerrone.com/.*$'
|
||||||
behavior_js_template: simpleclicks.js.j2
|
behavior_js_template: simpleclicks.js.template
|
||||||
default_parameters:
|
default_parameters:
|
||||||
click_css_selector: button.playButton.medium
|
click_css_selector: button.playButton.medium
|
||||||
click_until_hard_timeout: False
|
|
||||||
request_idle_timeout_sec: 10
|
request_idle_timeout_sec: 10
|
||||||
- # https://webarchive.jira.com/browse/ARI-4690
|
- # https://webarchive.jira.com/browse/ARI-4690
|
||||||
url_regex: '^https?://(?:www\.)?youtube.com/.*$'
|
url_regex: '^https?://(?:www\.)?youtube.com/.*$'
|
||||||
behavior_js_template: simpleclicks.js.j2
|
behavior_js_template: simpleclicks.js.template
|
||||||
default_parameters:
|
default_parameters:
|
||||||
click_css_selector: span.load-more-text
|
click_css_selector: span.load-more-text
|
||||||
click_until_hard_timeout: False
|
|
||||||
request_idle_timeout_sec: 10
|
request_idle_timeout_sec: 10
|
||||||
- # https://webarchive.jira.com/browse/ARI-4725
|
- # https://webarchive.jira.com/browse/ARI-4725
|
||||||
url_regex: '^https?://(?:www\.)?moma.org/.*$'
|
url_regex: '^https?://(?:www\.)?moma.org/.*$'
|
||||||
behavior_js_template: simpleclicks.js.j2
|
behavior_js_template: simpleclicks.js.template
|
||||||
default_parameters:
|
default_parameters:
|
||||||
click_css_selector: button[data-more-results-bottom-button]
|
click_css_selector: button[data-more-results-bottom-button]
|
||||||
click_until_hard_timeout: True
|
click_until_hard_timeout: True
|
||||||
request_idle_timeout_sec: 10
|
request_idle_timeout_sec: 10
|
||||||
- # https://webarchive.jira.com/browse/ARI-4692
|
- # https://webarchive.jira.com/browse/ARI-4692
|
||||||
url_regex: '^https?://(?:www\.)?fec.gov/data/.*$'
|
url_regex: '^https?://(?:www\.)?fec.gov/data/.*$'
|
||||||
behavior_js_template: fec_gov.js
|
behavior_js: fec_gov.js
|
||||||
request_idle_timeout_sec: 10
|
request_idle_timeout_sec: 10
|
||||||
- url_regex: '^https?://(?:www\.)?news\.com\.au/.*$'
|
- url_regex: '^https?://(?:www\.)?news\.com\.au/.*$'
|
||||||
behavior_js_template: mouseovers.js.j2
|
behavior_js_template: simpledo.js.template
|
||||||
default_parameters:
|
default_parameters:
|
||||||
mouseover_css_selector: .menu-item a
|
sdo_css_selector: .menu-item a
|
||||||
mouseover_until_hard_timeout: False
|
sdo_action: mouseover
|
||||||
request_idle_timeout_sec: 10
|
request_idle_timeout_sec: 10
|
||||||
- # default fallback behavior
|
- # default fallback behavior
|
||||||
url_regex: '^.*$'
|
url_regex: '^.*$'
|
||||||
request_idle_timeout_sec: 10
|
request_idle_timeout_sec: 10
|
||||||
behavior_js_template: default.js
|
behavior_js: default.js
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue