Merge pull request #71 from internetarchive/brofurb

JS class-based generalized behavior
This commit is contained in:
Noah Levitt 2018-01-18 12:23:18 -08:00 committed by GitHub
commit 9e80a3b0d3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 190 additions and 503 deletions

View File

@ -1,7 +1,7 @@
#
# brozzler/behaviors.yaml - behavior configuration
#
# Copyright (C) 2014-2016 Internet Archive
# Copyright (C) 2014-2018 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -37,6 +37,19 @@
url_regex: '^https?://(?:www\.)?instagram\.com/.*$'
behavior_js_template: instagram.js
request_idle_timeout_sec: 10
-
url_regex: '^https?://(?:www\.)?pm\.gc\.ca/.*$'
behavior_js_template: umbraBehavior.js.j2
default_parameters:
actions:
- selector: div.teaser, li.pager__item a
- # https://webarchive.jira.com/browse/ARI-5389
url_regex: '^https?://pitchfork\.com/.*$'
behavior_js_template: umbraBehavior.js.j2
default_parameters:
actions:
- selector: div.teaser, li.pager__item a
closeSelector: .pmf-artist-modal__close-btn
-
url_regex: '^https?://(?:www\.)?huffingtonpost\.com/.*$'
behavior_js_template: huffpostslides.js
@ -109,11 +122,11 @@
behavior_js_template: fec_gov.js
request_idle_timeout_sec: 10
- url_regex: '^https?://(?:www\.)?news\.com\.au/.*$'
behavior_js_template: mouseovers.js.j2
behavior_js_template: umbraBehavior.js.j2
default_parameters:
mouseover_css_selector: .menu-item a
mouseover_until_hard_timeout: False
request_idle_timeout_sec: 10
actions:
- selector: .menu-item a
do: mouseover
- # https://webarchive.jira.com/browse/ARI-5259
url_regex: '^https?://blog\.sina\.com\.cn/.*$'
behavior_js_template: simpleclicks.js.j2
@ -142,13 +155,6 @@
click_css_selector: img.link-overlay
click_until_hard_timeout: False
request_idle_timeout_sec: 10
- # https://webarchive.jira.com/browse/ARI-5389
url_regex: '^https?://pitchfork\.com/.*$'
behavior_js_template: pitchfork.js
- # https://webarchive.jira.com/browse/ARI-5379
url_regex: '^https?://(?:www\.)?pm\.gc\.ca/.*$'
behavior_js_template: pm-ca.js
request_idle_timeout_sec: 10
- # https://webarchive.jira.com/browse/ARI-4960
url_regex: '^https?://(?:www\.)?fortstjames.ca/community-events-calendar/$'
behavior_js_template: simpleclicks.js.j2
@ -158,5 +164,7 @@
request_idle_timeout_sec: 10
- # default fallback behavior
url_regex: '^.*$'
request_idle_timeout_sec: 10
behavior_js_template: default.js
behavior_js_template: umbraBehavior.js.j2
default_parameters:
actions:
- selector: button.sc-button-play, .playButton, div.soundItem, .jwlist>a

View File

@ -1,177 +0,0 @@
/*
* brozzler/behaviors.d/default.js - default behavior, scrolls to the bottom of
* the page and clicks on selected embedded elements
*
* Copyright (C) 2014-2016 Internet Archive
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
var umbraAboveBelowOrOnScreen = function(e) {
var eTop = e.getBoundingClientRect().top;
if (eTop < window.scrollY) {
return -1; // above
} else if (eTop > window.scrollY + window.innerHeight) {
return 1; // below
} else {
return 0; // on screen
}
}
var UMBRA_IFRAME_EMBEDDED_SELECTOR = "iframe";
//elements selected for SoundCloud.com
var UMBRA_THINGS_TO_CLICK_EMBEDDED_SELECTOR = "button.sc-button-play, .playButton, div.soundItem";
//elements selected for Archive.org Playlists
UMBRA_THINGS_TO_CLICK_EMBEDDED_SELECTOR += ", .jwlist>a"
var MAX_IFRAME_RECURSE_DEPTH = 1; //0-based
var umbraState = {'idleSince':null};
var umbraAlreadyClicked = {};
var umbraFinished = false;
var umbraIntervalFunc = function() {
var umbraEmbeddedElements = [];
getUmbraEmbeddedElements(umbraEmbeddedElements);
var clickedSomething = false;
var somethingLeftBelow = false;
var somethingLeftAbove = false;
var missedAbove = 0;
for (var i = 0; i < umbraEmbeddedElements.length; i++) {
var targetId = umbraEmbeddedElements[i].id;
var target = umbraEmbeddedElements[i].target;
if (!(targetId in umbraAlreadyClicked)) {
var where = umbraAboveBelowOrOnScreen(target);
if (where == 0) { // on screen
// var pos = target.getBoundingClientRect().top;
// window.scrollTo(0, target.getBoundingClientRect().top - 100);
console.log("clicking at " + target.getBoundingClientRect().top + " on " + target.outerHTML);
if (target.click != undefined) {
target.click();
}
umbraAlreadyClicked[targetId] = true;
clickedSomething = true;
umbraState.idleSince = null;
break;
} else if (where > 0) {
somethingLeftBelow = true;
} else if (where < 0) {
somethingLeftAbove = true;
}
}
}
if (!clickedSomething) {
if (somethingLeftAbove) {
console.log("scrolling UP because everything on this screen has been clicked but we missed something above");
window.scrollBy(0, -500);
umbraState.idleSince = null;
} else if (somethingLeftBelow) {
console.log("scrolling because everything on this screen has been clicked but there's more below document.body.clientHeight=" + document.body.clientHeight);
window.scrollBy(0, 200);
umbraState.idleSince = null;
} else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) {
console.log("scrolling because we're not to the bottom yet document.body.clientHeight=" + document.body.clientHeight);
window.scrollBy(0, 200);
umbraState.idleSince = null;
} else if (umbraState.idleSince == null) {
umbraState.idleSince = Date.now();
}
}
if (umbraState.idleSince == null) {
umbraState.idleSince = Date.now();
}
}
//try to detect sound cloud "Play" buttons and return them as targets for clicking
var getUmbraEmbeddedElements = function(embeddedElements, currentIframeDepth, currentDocument,
iframeElement) {
//set default values for parameters
currentIframeDepth = currentIframeDepth || 0;
currentDocument = currentDocument || document;
if (currentIframeDepth > MAX_IFRAME_RECURSE_DEPTH) {
return;
}
//collect all buttons on current document first
var button = [];
button = currentDocument.querySelectorAll(UMBRA_THINGS_TO_CLICK_EMBEDDED_SELECTOR);
var cssPathIframe = iframeElement ? getElementCssPath(iframeElement) : "";
for (var i = 0; i < button.length; i++) {
embeddedElements.push({"id" : cssPathIframe + getElementCssPath(button.item(i)), "target" : button.item(i)});
}
//now get all buttons in embedded iframes
var iframe = [];
iframe = currentDocument.querySelectorAll(UMBRA_IFRAME_EMBEDDED_SELECTOR);
for (var i = 0; i < iframe.length; i++) {
getUmbraEmbeddedElements(embeddedElements, currentIframeDepth + 1, iframe[i].contentWindow.document.body, iframe[i]);
}
}
// If we haven't had anything to do (scrolled, clicked, etc) in this amount of
// time, then we consider ourselves finished with the page.
var UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC = 10;
// Called from outside of this script.
var umbraBehaviorFinished = function() {
if (umbraState.idleSince != null) {
var idleTimeMs = Date.now() - umbraState.idleSince;
if (idleTimeMs / 1000 > UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC) {
clearInterval(umbraIntervalId)
return true;
}
}
return false;
}
//copied from http://stackoverflow.com/questions/4588119/get-elements-css-selector-without-element-id
var getElementCssPath = function(element) {
var names = [];
while (element.parentNode){
if (element.id){
names.unshift('#' + element.id);
break;
} else {
if (element == element.ownerDocument.documentElement) {
names.unshift(element.tagName);
}
else {
for (var c = 1, e = element; e.previousElementSibling; e = e.previousElementSibling, c++);
names.unshift(element.tagName + ":nth-child(" + c + ")");
}
element = element.parentNode;
}
}
return names.join(" > ");
}
var umbraIntervalId = setInterval(umbraIntervalFunc, 100);

View File

@ -1,171 +0,0 @@
/*
* brozzler/behaviors.d/pm-ca.js - behavior for http://pitchfork.com/festival/chicago/
*
* Copyright (C) 2014-2017 Internet Archive
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
var umbraBehavior = {
IDLE_TIMEOUT_SEC : 10,
idleSince : null,
itemsText : "",
// https://github.com/jquery/jquery/blob/master/src/css/hiddenVisibleSelectors.js
// n.b. returns true for elements with visibility:hidden, which occupy
// screen real estate but are not visible, or clickable with the ui
isVisible : function(elem) {
return !!(elem.offsetWidth || elem.offsetHeight || elem.getClientRects().length);
},
intervalFunc : function() {
var clickedSomething = false;
var somethingLeftBelow = false;
var somethingLeftAbove = false;
var cssSelector = "button.performer.full-lineup";
var closeSelector = ".pmf-artist-modal__close-btn";
var clickUntilTimeout = 10;
var iframes = document.querySelectorAll("iframe");
var documents = Array(iframes.length + 1);
documents[0] = document;
for (var i = 0; i < iframes.length; i++) {
documents[i+1] = iframes[i].contentWindow.document;
}
for (var j = 0; j < documents.length; j++) {
var closeTargets = documents[j].querySelectorAll(closeSelector);
for (var i = 0; i < closeTargets.length; i++) {
if (!this.isVisible(closeTargets[i])) {
continue;
}
var where = this.aboveBelowOrOnScreen(closeTargets[i]);
if (where == 0) {
// console.log("clicking on " + clickTargets[i].outerHTML);
// do mouse over event on click target
// since some urls are requsted only on
// this event - see
// https://webarchive.jira.com/browse/AITFIVE-451
var mouseOverEvent = document.createEvent('Events');
mouseOverEvent.initEvent("mouseover",true, false);
closeTargets[i].dispatchEvent(mouseOverEvent);
closeTargets[i].click();
clickedSomething = true;
this.idleSince = null;
break; //break from closeTargets loop, but not from iframe loop
} else if (where > 0) {
somethingLeftBelow = true;
} else if (where < 0) {
somethingLeftAbove = true;
}
}
var clickTargets = documents[j].querySelectorAll(cssSelector);
for (var i = 0; i < clickTargets.length; i++) {
if (!this.isVisible(clickTargets[i])) {
continue;
}
if (this.itemsText.indexOf(clickTargets[i].innerText) > -1) {
continue;
}
var where = this.aboveBelowOrOnScreen(clickTargets[i]);
if (where == 0) {
// console.log("clicking on " + clickTargets[i].outerHTML);
// do mouse over event on click target
// since some urls are requsted only on
// this event - see
// https://webarchive.jira.com/browse/AITFIVE-451
var mouseOverEvent = document.createEvent('Events');
mouseOverEvent.initEvent("mouseover",true, false);
clickTargets[i].dispatchEvent(mouseOverEvent);
clickTargets[i].click();
clickedSomething = true;
this.idleSince = null;
this.itemsText += clickTargets[i].innerText;
break; //break from clickTargets loop, but not from iframe loop
} else if (where > 0) {
somethingLeftBelow = true;
} else if (where < 0) {
somethingLeftAbove = true;
}
}
}
if (!clickedSomething) {
if (somethingLeftAbove) {
// console.log("scrolling UP because everything on this screen has been clicked but we missed something above");
window.scrollBy(0, -500);
this.idleSince = null;
} else if (somethingLeftBelow) {
// console.log("scrolling because everything on this screen has been clicked but there's more below document.body.clientHeight="
// + document.body.clientHeight);
window.scrollBy(0, 200);
this.idleSince = null;
} else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) {
// console.log("scrolling because we're not to the bottom yet document.body.clientHeight="
// + document.body.clientHeight);
window.scrollBy(0, 200);
this.idleSince = null;
} else if (this.idleSince == null) {
this.idleSince = Date.now();
}
}
if (!this.idleSince) {
this.idleSince = Date.now();
}
},
start : function() {
var that = this;
this.intervalId = setInterval(function() {
that.intervalFunc()
}, 500);
},
isFinished : function() {
if (this.idleSince != null) {
var idleTimeMs = Date.now() - this.idleSince;
if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) {
clearInterval(this.intervalId);
return true;
}
}
return false;
},
aboveBelowOrOnScreen : function(e) {
var eTop = e.getBoundingClientRect().top;
if (eTop < window.scrollY) {
return -1; // above
} else if (eTop > window.scrollY + window.innerHeight) {
return 1; // below
} else {
return 0; // on screen
}
},
};
// Called from outside of this script.
var umbraBehaviorFinished = function() {
return umbraBehavior.isFinished()
};
umbraBehavior.start();

View File

@ -1,141 +0,0 @@
/*
* brozzler/behaviors.d/pm-ca.js - behavior for http://www.pm.gc.ca/
*
* Copyright (C) 2014-2017 Internet Archive
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
var umbraBehavior = {
IDLE_TIMEOUT_SEC : 10,
idleSince : null,
itemsText : "",
// https://github.com/jquery/jquery/blob/master/src/css/hiddenVisibleSelectors.js
// n.b. returns true for elements with visibility:hidden, which occupy
// screen real estate but are not visible, or clickable with the ui
isVisible : function(elem) {
return !!(elem.offsetWidth || elem.offsetHeight || elem.getClientRects().length);
},
intervalFunc : function() {
var clickedSomething = false;
var somethingLeftBelow = false;
var somethingLeftAbove = false;
var cssSelector = "div.teaser";
var clickUntilTimeout = 10;
var iframes = document.querySelectorAll("iframe");
var documents = Array(iframes.length + 1);
documents[0] = document;
for (var i = 0; i < iframes.length; i++) {
documents[i+1] = iframes[i].contentWindow.document;
}
for (var j = 0; j < documents.length; j++) {
var clickTargets = documents[j].querySelectorAll(cssSelector);
for (var i = 0; i < clickTargets.length; i++) {
if (!this.isVisible(clickTargets[i])) {
continue;
}
if (this.itemsText.indexOf(clickTargets[i].innerText) > -1) {
continue;
}
var where = this.aboveBelowOrOnScreen(clickTargets[i]);
if (where == 0) {
// console.log("clicking on " + clickTargets[i].outerHTML);
// do mouse over event on click target
// since some urls are requsted only on
// this event - see
// https://webarchive.jira.com/browse/AITFIVE-451
var mouseOverEvent = document.createEvent('Events');
mouseOverEvent.initEvent("mouseover",true, false);
clickTargets[i].dispatchEvent(mouseOverEvent);
clickTargets[i].click();
clickedSomething = true;
this.idleSince = null;
this.itemsText += clickTargets[i].innerText;
break; //break from clickTargets loop, but not from iframe loop
} else if (where > 0) {
somethingLeftBelow = true;
} else if (where < 0) {
somethingLeftAbove = true;
}
}
}
if (!clickedSomething) {
if (somethingLeftAbove) {
// console.log("scrolling UP because everything on this screen has been clicked but we missed something above");
window.scrollBy(0, -500);
this.idleSince = null;
} else if (somethingLeftBelow) {
// console.log("scrolling because everything on this screen has been clicked but there's more below document.body.clientHeight="
// + document.body.clientHeight);
window.scrollBy(0, 200);
this.idleSince = null;
} else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) {
// console.log("scrolling because we're not to the bottom yet document.body.clientHeight="
// + document.body.clientHeight);
window.scrollBy(0, 200);
this.idleSince = null;
} else if (this.idleSince == null) {
this.idleSince = Date.now();
}
}
if (!this.idleSince) {
this.idleSince = Date.now();
}
},
start : function() {
var that = this;
this.intervalId = setInterval(function() {
that.intervalFunc()
}, 500);
},
isFinished : function() {
if (this.idleSince != null) {
var idleTimeMs = Date.now() - this.idleSince;
if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) {
clearInterval(this.intervalId);
return true;
}
}
return false;
},
aboveBelowOrOnScreen : function(e) {
var eTop = e.getBoundingClientRect().top;
if (eTop < window.scrollY) {
return -1; // above
} else if (eTop > window.scrollY + window.innerHeight) {
return 1; // below
} else {
return 0; // on screen
}
},
};
// Called from outside of this script.
var umbraBehaviorFinished = function() {
return umbraBehavior.isFinished()
};
umbraBehavior.start();

View File

@ -0,0 +1,168 @@
/*
* brozzler/js-templates/umbrabehavior.js.j2 - an umbra/brozzler behavior class
*
* Copyright (C) 2017-2018 Internet Archive
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
class UmbraBehavior {
constructor(actions) {
this.IDLE_TIMEOUT_SEC = 10;
this.actions = actions;
this.alreadyDone = [];
this.idleSince = null;
this.intervalId = null;
this.intervalTimeMs = 300;
this.index = 0;
}
simpleIntervalFunc() {
// should match older default and simpleclicks behavior, and more
var k = this.index;
var selector = this.actions[k].selector;
var action = this.actions[k].do ? this.actions[k].do : 'click';
var closeSelector = this.actions[k].closeSelector ? this.actions[k].closeSelector : null;
var didSomething = false;
var somethingLeftAbove = false;
var somethingLeftBelow = false;
var iframes = document.querySelectorAll("iframe");
var iframesLength = iframes.length;
var documents = Array(iframesLength + 1);
documents[0] = document;
for (var i = 0; i < iframesLength; i++) {
documents[i+1] = iframes[i].contentWindow.document;
}
var documentsLength = documents.length;
for (var j = 0; j < documentsLength; j++) {
if (closeSelector) {
var closeTargets = documents[j].querySelectorAll(closeSelector);
if ((closeTargets.length > 0) &&
(this.alreadyDone.indexOf(closeTargets[0]) === -1) &&
(this.isVisible(closeTargets[0]))) {
doTarget(closeTargets[0], 'click');
}
}
var doTargets = documents[j].querySelectorAll(selector);
var doTargetsLength = doTargets.length;
if (!(doTargetsLength > 0)) {
continue;
}
for ( var i = 0; i < doTargetsLength; i++) {
if (this.alreadyDone.indexOf(doTargets[i]) > -1) {
continue;
}
if (!this.isVisible(doTargets[i])) {
continue;
}
var where = this.aboveBelowOrOnScreen(doTargets[i]);
if (where == 0) {
this.doTarget(doTargets[i], action);
didSomething = true;
break;
} else if (where > 0) {
somethingLeftBelow = true;
} else if (where < 0) {
somethingLeftAbove = true;
}
}
}
if (!didSomething) {
if (somethingLeftAbove) {
window.scrollBy(0, -500);
this.idleSince = null;
} else if (somethingLeftBelow || ( (window.scrollY + window.innerHeight) < document.documentElement.scrollHeight)) {
window.scrollBy(0, 200);
this.idleSince = null;
} else if (this.idleSince == null) {
this.idleSince = Date.now();
}
}
if (!this.idleSince) {
this.idleSince = Date.now();
} else {
var idleTimeMs = Date.now() - this.idleSince;
if ((idleTimeMs / 1000) > (this.IDLE_TIMEOUT_SEC - 1) && (this.index < (this.actions.length - 1))) {
console.log("ready for next action"); // untested!
this.index += 1;
this.idleSince = null;
window.scroll(0,0);
}
}
}
aboveBelowOrOnScreen(elem) {
var eTop = elem.getBoundingClientRect().top;
if (eTop < window.scrollY) {
return -1; // above
} else if (eTop > window.scrollY + window.innerHeight) {
return 1; // below
} else {
return 0; // on screen
}
}
isVisible(elem) {
return !!(elem.offsetWidth || elem.offsetHeight || elem.getClientRects().length);
}
doTarget(target, action) {
// console.log("doing " + action + target.outerHTML);
// do mouse over event on target
// since some urls are requsted only on
// this event - see
// https://webarchive.jira.com/browse/AITFIVE-451
var mouseOverEvent = document.createEvent("Events");
mouseOverEvent.initEvent("mouseover", true, false);
target.dispatchEvent(mouseOverEvent);
if (action == "click") {
target.click();
} // add new do's here!
this.alreadyDone.push(target);
this.idleSince = null;
}
start() {
var that = this;
this.intervalId = setInterval(function() {
that.simpleIntervalFunc()
}, this.intervalTimeMs);
}
isFinished() {
if (this.idleSince != null) {
var idleTimeMs = Date.now() - this.idleSince;
if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) {
clearInterval(this.intervalId);
return true;
}
}
return false;
}
}
var umbraBehavior = new UmbraBehavior( {{actions|json}} );
// Called from outside of this script.
var umbraBehaviorFinished = function() {
return umbraBehavior.isFinished();
};
umbraBehavior.start();