diff --git a/brozzler/behaviors.d/clickGetPDFs.js.template b/brozzler/behaviors.d/clickGetPDFs.js.template new file mode 100644 index 0000000..65f62e7 --- /dev/null +++ b/brozzler/behaviors.d/clickGetPDFs.js.template @@ -0,0 +1,134 @@ +/* + * brozzler/behaviors.d/clickGetPDFs.js.template - click on each of several elements and + * click on linked PDFs found + * + * Copyright (C) 2014-2016 Internet Archive + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +var umbraAboveBelowOrOnScreen = function(e) { + var eTop = e.getBoundingClientRect().top; + if (eTop < window.scrollY) { + return -1; // above + } else if (eTop > window.scrollY + window.innerHeight) { + return 1; // below + } else { + return 0; // on screen + } +} + +var umbraState = {'idleSince':null}; +var umbraAlreadyClicked = {}; + +var umbraIntervalFunc = function() { + var clickedSomething = false; + var somethingLeftBelow = false; + var somethingLeftAbove = false; + var cssSelector = "${css_selector}"; + var pdfSelector = "a"; + var i = 0; + + var clickTargets = document.querySelectorAll(cssSelector); + var pdfTargets = document.querySelectorAll(pdfSelector); + + if (pdfTargets.length > 0) { + for (i = 0; i < pdfTargets.length; i++) { + if (pdfTargets[i].href.toLowerCase().lastIndexOf('pdf') > 0) { + if (!(pdfTargets[i].href in umbraAlreadyClicked)){ + var where = umbraAboveBelowOrOnScreen(pdfTargets[i]); + if (where === 0) { + var mouseOverEvent = document.createEvent('Events'); + mouseOverEvent.initEvent("mouseover", true, false); + pdfTargets[i].dispatchEvent(mouseOverEvent); + pdfTargets[i].click(); + clickedSomething = true; + umbraState.idleSince = null; + umbraAlreadyClicked[pdfTargets[i].href] = true; + break; // break from loop + } else if (where > 0) { + somethingLeftBelow = true; + } else if (where < 0) { + somethingLeftAbove = true; + } + } + } + } + } + + if((i === pdfTargets.length) && !clickedSomething) { + for (var i = 0; i < clickTargets.length; i++) { + targetID = clickTargets[i].id; + if (targetID === "") { + targetID = location.host + "-" + i; + } + if (!(targetID in umbraAlreadyClicked)) { + var where = umbraAboveBelowOrOnScreen(clickTargets[i]); + if (where === 0) { + var mouseOverEvent = document.createEvent('Events'); + mouseOverEvent.initEvent("mouseover", true, false); + clickTargets[i].dispatchEvent(mouseOverEvent); + clickTargets[i].click(); + clickedSomething = true; + umbraState.idleSince = null; + umbraAlreadyClicked[targetID] = true; + break; // break from loop + } else if (where > 0) { + somethingLeftBelow = true; + } else if (where < 0) { + somethingLeftAbove = true; + } + } + } + } + + if (!clickedSomething) { + if (somethingLeftAbove) { + // console.log("scrolling UP because everything on this screen has been clicked but we missed something above"); + window.scrollBy(0, -500); + umbraState.idleSince = null; + } else if (somethingLeftBelow) { + // console.log("scrolling because everything on this screen has been clicked but there's more below document.body.clientHeight=" + // + document.body.clientHeight); + window.scrollBy(0, 200); + umbraState.idleSince = null; + } else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) { + window.scrollBy(0, 200); + umbraState.idleSince = null; + } else if (umbraState.idleSince == null) { + umbraState.idleSince = Date.now(); + } + } + + if (umbraState.idleSince == null) { + umbraState.idleSince = Date.now(); + } +} + +// If we haven't had anything to do (scrolled, clicked, etc) in this amount of +// time, then we consider ourselves finished with the page. +var UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC = 7; + +// Called from outside of this script. +var umbraBehaviorFinished = function() { + if (umbraState.idleSince != null) { + var idleTimeMs = Date.now() - umbraState.idleSince; + if (idleTimeMs / 1000 > UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC) { + clearInterval(umbraIntervalId); + return true; + } + } + return false; +} + +var umbraIntervalId = setInterval(umbraIntervalFunc, 5000); diff --git a/brozzler/behaviors.yaml b/brozzler/behaviors.yaml index 2bc56a4..e4b7b90 100644 --- a/brozzler/behaviors.yaml +++ b/brozzler/behaviors.yaml @@ -47,7 +47,7 @@ behaviors: request_idle_timeout_sec: 10 - # ARI-4838 racineco.com document viewers url_regex: '^https?://(?:www\.)?racineco\.com/.*$' - behavior_js_template: multiclicks.js.template + behavior_js_template: clickGetPDFs.js.template default_parameters: css_selector: img[id^='NavtwocolUserControl11_NavMeeting_item'] request_idle_timeout_sec: 10