simpledo first draft

This commit is contained in:
Barbara Miller 2016-12-10 17:23:01 -08:00
parent d68053764c
commit 013adbc0c4
2 changed files with 146 additions and 4 deletions

View File

@ -0,0 +1,140 @@
/*
* brozzler/behaviors.d/simpledo.js.in - simpledo behavior template,
* acting on elements matching templatized css selector,
* based on simpleclicks.js.template and mouseovers.js.template
*
* Copyright (C) 2016 Internet Archive
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
var umbraBehavior = {
IDLE_TIMEOUT_SEC : 10,
idleSince : null,
intervalFunc : function() {
var didSomething = false;
var somethingLeftBelow = false;
var somethingLeftAbove = false;
var cssSelector = "${sdo_css_selector}";
var doAction = "${sdo_action}"; // currently supports click, mouseover
var doUntilTimeout = "${sdo_until_hard_timeout}";
//handle Python to JavaScript boolean conversion
doUntilTimeout == "True" ? doUntilTimeout = true : doUntilTimeout = false;
var iframes = document.querySelectorAll("iframe");
var documents = Array(iframes.length + 1);
documents[0] = document;
for (var i = 0; i < iframes.length; i++) {
documents[i+1] = iframes[i].contentWindow.document;
}
for (var j = 0; j < documents.length; j++) {
var doTargets = documents[j].querySelectorAll(cssSelector);
for ( var i = 0; i < doTargets.length; i++) {
if (doTargets[i].umbraDone && !doUntilTimeout) {
continue;
}
var where = this.aboveBelowOrOnScreen(doTargets[i]);
if (where == 0) {
console.log("doing " + doAction + doTargets[i].outerHTML);
// do mouse over event on target
// since some urls are requsted only on
// this event - see
// https://webarchive.jira.com/browse/AITFIVE-451
var mouseOverEvent = document.createEvent('Events');
mouseOverEvent.initEvent('mouseover',true, false);
doTargets[i].dispatchEvent(mouseOverEvent);
if (doAction == 'click') {
doTargets[i].click();
} // add new do's here!
didSomething = true;
this.idleSince = null;
doTargets[i].umbraDone = true;
break; // break from doTargets loop (not from iframe loop)
} else if (where > 0) {
somethingLeftBelow = true;
} else if (where < 0) {
somethingLeftAbove = true;
}
}
}
if (!didSomething) {
if (somethingLeftAbove) {
// console.log("scrolling UP because everything on this screen has been done but we missed something above");
window.scrollBy(0, -500);
this.idleSince = null;
} else if (somethingLeftBelow) {
// console.log("scrolling because everything on this screen has been done but there's more below");
window.scrollBy(0, 200);
this.idleSince = null;
} else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) {
// console.log("scrolling because we're not to the bottom yet");
window.scrollBy(0, 200);
this.idleSince = null;
} else if (this.idleSince == null) {
this.idleSince = Date.now();
}
}
if (!this.idleSince) {
this.idleSince = Date.now();
}
},
start : function() {
var that = this;
this.intervalId = setInterval(function() {
that.intervalFunc()
}, 250);
},
isFinished : function() {
if (this.idleSince != null) {
var idleTimeMs = Date.now() - this.idleSince;
if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) {
clearInterval(this.intervalId);
return true;
}
}
return false;
},
aboveBelowOrOnScreen : function(e) {
var eTop = e.getBoundingClientRect().top;
if (eTop < window.scrollY) {
return -1; // above
} else if (eTop > window.scrollY + window.innerHeight) {
return 1; // below
} else {
return 0; // on screen
}
},
};
// Called from outside of this script.
var umbraBehaviorFinished = function() {
return umbraBehavior.isFinished()
};
umbraBehavior.start();

View File

@ -61,9 +61,10 @@ behaviors:
request_idle_timeout_sec: 10
- # https://webarchive.jira.com/browse/AITFIVE-451
url_regex: '^https?://(?:www\.)?soundcloud.com/.*$'
behavior_js_template: simpleclicks.js.template
behavior_js_template: simpledo.js.template
default_parameters:
click_css_selector: button.sc-button-play, button.playButton
sdo_css_selector: button.sc-button-play, button.playButton
sdo_action: click
request_idle_timeout_sec: 10
- # https://webarchive.jira.com/browse/AITFIVE-463
url_regex: '^https?://(?:www\.)?christophercerrone.com/.*$'
@ -89,9 +90,10 @@ behaviors:
behavior_js: fec_gov.js
request_idle_timeout_sec: 10
- url_regex: '^https?://(?:www\.)?news\.com\.au/.*$'
behavior_js_template: mouseovers.js.template
behavior_js_template: simpledo.js.template
default_parameters:
mouseover_css_selector: .menu-item a
sdo_css_selector: .menu-item a
sdo_action: mouseover
request_idle_timeout_sec: 10
- # default fallback behavior
url_regex: '^.*$'