From 46d4c89431a4c1285f78aee7a4c834d6dfe7e6c0 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Wed, 12 Oct 2016 17:42:00 -0700 Subject: [PATCH] custom behavior for catalogue.noguchi.org --- brozzler/behaviors.d/noguchi.js.template | 119 +++++++++++++++++++++++ brozzler/behaviors.yaml | 14 +-- 2 files changed, 127 insertions(+), 6 deletions(-) create mode 100644 brozzler/behaviors.d/noguchi.js.template diff --git a/brozzler/behaviors.d/noguchi.js.template b/brozzler/behaviors.d/noguchi.js.template new file mode 100644 index 0000000..258df80 --- /dev/null +++ b/brozzler/behaviors.d/noguchi.js.template @@ -0,0 +1,119 @@ +/* + * brozzler/behaviors.d/noguchi.js - from ARTWORKS or EXHIBITIONS main pages, + * click through end + * + * Copyright (C) 2014-2016 Internet Archive + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +var umbraAboveBelowOrOnScreen = function(e) { + var eTop = e.getBoundingClientRect().top; + if (eTop < window.scrollY) { + return -1; // above + } else if (eTop > window.scrollY + window.innerHeight) { + return 1; // below + } else { + return 0; // on screen + } +} + +var UMBRA_N_USER_NAME = "${parameter_username}"; +var UMBRA_N_PASSWORD = "${parameter_password}"; + +var umbraState = {'idleSince':null}; +var umbraAlreadyClicked = {}; +var re = /(?:‹ Previous){0,1}   (page [\d]+/[\d]+)   (?:Next ›){0,1}/ + +var umbraIntervalFunc = function() { + var clickedSomething = false; + var somethingLeftBelow = false; + var somethingLeftAbove = false; + + var target = document.querySelectorAll("div.nav > a")[1]; + if (target) { + var where = umbraAboveBelowOrOnScreen(target); + if (where === 0) { + var mouseOverEvent = document.createEvent('Events'); + mouseOverEvent.initEvent("mouseover", true, false); + target.dispatchEvent(mouseOverEvent); + target.click(); + clickedSomething = true; + umbraState.idleSince = null; + target_page = re.exec(document.querySelector("div.nav").textContent) + console.log('clicked ' + target_page); + umbraAlreadyClicked[target_page] = true; + } else if (where > 0) { + somethingLeftBelow = true; + } else if (where < 0) { + somethingLeftAbove = true; + } + } + + if (!clickedSomething) { + if (somethingLeftAbove) { + // console.log("scrolling UP because everything on this screen has been clicked but we missed something above"); + window.scrollBy(0, -500); + umbraState.idleSince = null; + } else if (somethingLeftBelow) { + // console.log("scrolling because everything on this screen has been clicked but there's more below document.body.clientHeight=" + // + document.body.clientHeight); + window.scrollBy(0, 200); + umbraState.idleSince = null; + } else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) { + window.scrollBy(0, 200); + umbraState.idleSince = null; + } else if (umbraState.idleSince == null) { + umbraState.idleSince = Date.now(); + } + } + + if (umbraState.idleSince == null) { + umbraState.idleSince = Date.now(); + } +} + +// If we haven't had anything to do (scrolled, clicked, etc) in this amount of +// time, then we consider ourselves finished with the page. +var UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC = 5; + +// Called from outside of this script. +var umbraBehaviorFinished = function() { + if (umbraState.idleSince != null) { + var idleTimeMs = Date.now() - umbraState.idleSince; + if (idleTimeMs / 1000 > UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC) { + clearInterval(umbraIntervalId); + return true; + } + } + return false; +} + +var umbraNLogin = function() { + login_inputs = document.querySelectorAll("#loginForm > form > div > input"); + var emailInput = login_inputs[0]; + var passwordInput = login_inputs[1]; + var loginButton = document.querySelector("div.form-submit"); + emailInput.value=UMBRA_N_USER_NAME; + passwordInput.value=UMBRA_N_PASSWORD; + loginButton.click(); +} + +if (document.getElementById("loginForm") == null || UMBRA_N_USER_NAME.indexOf("parameter")>0 || UMBRA_N_PASSWORD.indexOf("parameter")>0 ) {//check for unset parameters + console.log("missing #loginForm or login credentials; maybe already logged in for " + location.href); + var umbraIntervalId = setInterval(umbraIntervalFunc, 200); +} +else {//login + console.log("#loginForm and credentials found for " + location.href); + umbraNLogin(); +} diff --git a/brozzler/behaviors.yaml b/brozzler/behaviors.yaml index 764012c..90322e6 100644 --- a/brozzler/behaviors.yaml +++ b/brozzler/behaviors.yaml @@ -45,12 +45,14 @@ behaviors: url_regex: '^https?://(?:www\.)?instagram\.com/.*$' behavior_js: instagram.js request_idle_timeout_sec: 10 -# - # ARI-4930 test -# url_regex: '^https?://(?:www\.)?kansascityfed\.org/publications/research/er/archive/.*$' -# behavior_js_template: clickGetPDFs.js.template -# default_parameters: -# css_selector: li.years>a -# request_idle_timeout_sec: 10 + - + url_regex: '^https?://catalogue\.noguchi\.org/index.php/LoginReg/form$' + behavior_js_template: noguchi.js.template + request_idle_timeout_sec: 10 + - + url_regex: '^https?://catalogue\.noguchi\.org/index.php/Search/Index/search/.*/target/ca_.*$' + behavior_js_template: noguchi.js.template + request_idle_timeout_sec: 10 - url_regex: '^https?://(?:www\.)?huffingtonpost\.com/.*$' behavior_js_template: huffpostslides.js