mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-10 23:40:30 -04:00
clickGetPDFs for kansascityfed
This commit is contained in:
parent
d7bd19a7f6
commit
73454d2ac7
2 changed files with 47 additions and 25 deletions
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* brozzler/behaviors.d/clickGetPDFs.js.template - click on each of several elements and
|
* brozzler/behaviors.d/clickGetPDFs.js.template - click on each of several elements and
|
||||||
* click on linked PDFs found
|
* click on linked PDFs found
|
||||||
*
|
*
|
||||||
* Copyright (C) 2014-2016 Internet Archive
|
* Copyright (C) 2014-2016 Internet Archive
|
||||||
|
@ -17,6 +17,12 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* to do:
|
||||||
|
* <blocker> disable multiple file download warning dialog
|
||||||
|
* <major> enable storage across page loads to capture all PDFs AND decrease work
|
||||||
|
*/
|
||||||
|
|
||||||
var umbraAboveBelowOrOnScreen = function(e) {
|
var umbraAboveBelowOrOnScreen = function(e) {
|
||||||
var eTop = e.getBoundingClientRect().top;
|
var eTop = e.getBoundingClientRect().top;
|
||||||
if (eTop < window.scrollY) {
|
if (eTop < window.scrollY) {
|
||||||
|
@ -26,35 +32,58 @@ var umbraAboveBelowOrOnScreen = function(e) {
|
||||||
} else {
|
} else {
|
||||||
return 0; // on screen
|
return 0; // on screen
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
|
|
||||||
var umbraState = {'idleSince':null};
|
var umbraState = {'idleSince':null};
|
||||||
var umbraAlreadyClicked = {};
|
var umbraAlreadyClicked = {'2016':true};
|
||||||
|
|
||||||
|
var clickTargets = document.querySelectorAll("${css_selector}");
|
||||||
|
|
||||||
|
var pdfSelector = "a";
|
||||||
|
|
||||||
var umbraIntervalFunc = function() {
|
var umbraIntervalFunc = function() {
|
||||||
var clickedSomething = false;
|
var clickedSomething = false;
|
||||||
var somethingLeftBelow = false;
|
var somethingLeftBelow = false;
|
||||||
var somethingLeftAbove = false;
|
var somethingLeftAbove = false;
|
||||||
var cssSelector = "${css_selector}";
|
|
||||||
var pdfSelector = "a";
|
|
||||||
var i = 0;
|
var i = 0;
|
||||||
|
|
||||||
var clickTargets = document.querySelectorAll(cssSelector);
|
|
||||||
var pdfTargets = document.querySelectorAll(pdfSelector);
|
var pdfTargets = document.querySelectorAll(pdfSelector);
|
||||||
|
|
||||||
if (pdfTargets.length > 0) {
|
if (!(clickTargets[0].text in umbraAlreadyClicked)) {
|
||||||
|
targetID = clickTargets[0].text;
|
||||||
|
logmsg = 'clicking ' + targetID;
|
||||||
|
console.log(logmsg);
|
||||||
|
var where = umbraAboveBelowOrOnScreen(clickTargets[0]);
|
||||||
|
if (where === 0) {
|
||||||
|
var mouseOverEvent = document.createEvent('Events');
|
||||||
|
mouseOverEvent.initEvent("mouseover", true, false);
|
||||||
|
clickTargets[0].dispatchEvent(mouseOverEvent);
|
||||||
|
clickTargets[0].click();
|
||||||
|
clickedSomething = true;
|
||||||
|
umbraState.idleSince = null;
|
||||||
|
umbraAlreadyClicked[targetID] = true;
|
||||||
|
} else if (where > 0) {
|
||||||
|
somethingLeftBelow = true;
|
||||||
|
} else if (where < 0) {
|
||||||
|
somethingLeftAbove = true;
|
||||||
|
}
|
||||||
|
} else if (pdfTargets.length > 0) {
|
||||||
for (i = 0; i < pdfTargets.length; i++) {
|
for (i = 0; i < pdfTargets.length; i++) {
|
||||||
if (pdfTargets[i].href.toLowerCase().lastIndexOf('pdf') > 0) {
|
if (pdfTargets[i].href.toLowerCase().lastIndexOf('pdf') > 0) {
|
||||||
if (!(pdfTargets[i].href in umbraAlreadyClicked)){
|
pdfID = pdfTargets[i].href;
|
||||||
|
logmsg = 'clicking ' + pdfID;
|
||||||
|
console.log(logmsg);
|
||||||
|
if (!(pdfID in umbraAlreadyClicked)){
|
||||||
var where = umbraAboveBelowOrOnScreen(pdfTargets[i]);
|
var where = umbraAboveBelowOrOnScreen(pdfTargets[i]);
|
||||||
if (where === 0) {
|
if (where === 0) {
|
||||||
|
pdfTargets[i].setAttribute('download','');
|
||||||
var mouseOverEvent = document.createEvent('Events');
|
var mouseOverEvent = document.createEvent('Events');
|
||||||
mouseOverEvent.initEvent("mouseover", true, false);
|
mouseOverEvent.initEvent("mouseover", true, false);
|
||||||
pdfTargets[i].dispatchEvent(mouseOverEvent);
|
pdfTargets[i].dispatchEvent(mouseOverEvent);
|
||||||
pdfTargets[i].click();
|
pdfTargets[i].click();
|
||||||
clickedSomething = true;
|
clickedSomething = true;
|
||||||
umbraState.idleSince = null;
|
umbraState.idleSince = null;
|
||||||
umbraAlreadyClicked[pdfTargets[i].href] = true;
|
umbraAlreadyClicked[pdfID] = true;
|
||||||
break; // break from loop
|
break; // break from loop
|
||||||
} else if (where > 0) {
|
} else if (where > 0) {
|
||||||
somethingLeftBelow = true;
|
somethingLeftBelow = true;
|
||||||
|
@ -67,11 +96,10 @@ var umbraIntervalFunc = function() {
|
||||||
}
|
}
|
||||||
|
|
||||||
if((i === pdfTargets.length) && !clickedSomething) {
|
if((i === pdfTargets.length) && !clickedSomething) {
|
||||||
for (var i = 0; i < clickTargets.length; i++) {
|
for (i = 1; i < clickTargets.length; i++) {
|
||||||
targetID = clickTargets[i].id;
|
targetID = clickTargets[i].text;
|
||||||
if (targetID === "") {
|
logmsg = 'clicking ' + targetID;
|
||||||
targetID = location.host + "-" + i;
|
console.log(logmsg);
|
||||||
}
|
|
||||||
if (!(targetID in umbraAlreadyClicked)) {
|
if (!(targetID in umbraAlreadyClicked)) {
|
||||||
var where = umbraAboveBelowOrOnScreen(clickTargets[i]);
|
var where = umbraAboveBelowOrOnScreen(clickTargets[i]);
|
||||||
if (where === 0) {
|
if (where === 0) {
|
||||||
|
@ -105,15 +133,15 @@ var umbraIntervalFunc = function() {
|
||||||
} else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) {
|
} else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) {
|
||||||
window.scrollBy(0, 200);
|
window.scrollBy(0, 200);
|
||||||
umbraState.idleSince = null;
|
umbraState.idleSince = null;
|
||||||
} else if (umbraState.idleSince == null) {
|
} else if (umbraState.idleSince === null) {
|
||||||
umbraState.idleSince = Date.now();
|
umbraState.idleSince = Date.now();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (umbraState.idleSince == null) {
|
if (umbraState.idleSince === null) {
|
||||||
umbraState.idleSince = Date.now();
|
umbraState.idleSince = Date.now();
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
|
|
||||||
// If we haven't had anything to do (scrolled, clicked, etc) in this amount of
|
// If we haven't had anything to do (scrolled, clicked, etc) in this amount of
|
||||||
// time, then we consider ourselves finished with the page.
|
// time, then we consider ourselves finished with the page.
|
||||||
|
@ -121,7 +149,7 @@ var UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC = 7;
|
||||||
|
|
||||||
// Called from outside of this script.
|
// Called from outside of this script.
|
||||||
var umbraBehaviorFinished = function() {
|
var umbraBehaviorFinished = function() {
|
||||||
if (umbraState.idleSince != null) {
|
if (umbraState.idleSince !== null) {
|
||||||
var idleTimeMs = Date.now() - umbraState.idleSince;
|
var idleTimeMs = Date.now() - umbraState.idleSince;
|
||||||
if (idleTimeMs / 1000 > UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC) {
|
if (idleTimeMs / 1000 > UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC) {
|
||||||
clearInterval(umbraIntervalId);
|
clearInterval(umbraIntervalId);
|
||||||
|
@ -129,6 +157,6 @@ var umbraBehaviorFinished = function() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
};
|
||||||
|
|
||||||
var umbraIntervalId = setInterval(umbraIntervalFunc, 5000);
|
var umbraIntervalId = setInterval(umbraIntervalFunc, 5000);
|
||||||
|
|
|
@ -45,12 +45,6 @@ behaviors:
|
||||||
url_regex: '^https?://(?:www\.)?instagram\.com/.*$'
|
url_regex: '^https?://(?:www\.)?instagram\.com/.*$'
|
||||||
behavior_js: instagram.js
|
behavior_js: instagram.js
|
||||||
request_idle_timeout_sec: 10
|
request_idle_timeout_sec: 10
|
||||||
- # ARI-4838 racineco.com document viewers
|
|
||||||
url_regex: '^https?://(?:www\.)?racineco\.com/.*$'
|
|
||||||
behavior_js_template: clickGetPDFs.js.template
|
|
||||||
default_parameters:
|
|
||||||
css_selector: img[id^='NavtwocolUserControl11_NavMeeting_item']
|
|
||||||
request_idle_timeout_sec: 10
|
|
||||||
- # ARI-4930 test
|
- # ARI-4930 test
|
||||||
url_regex: '^https?://(?:www\.)?kansascityfed\.org/publications/research/er/archive/.*$'
|
url_regex: '^https?://(?:www\.)?kansascityfed\.org/publications/research/er/archive/.*$'
|
||||||
behavior_js_template: clickGetPDFs.js.template
|
behavior_js_template: clickGetPDFs.js.template
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue