From 3becbb5b10381fa9c206b73f808422714d2397df Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Tue, 24 Oct 2017 12:38:39 -0700 Subject: [PATCH] test www.careers.ox.ac.uk --- brozzler/behaviors.yaml | 7 + brozzler/js-templates/umbraBehavior.js.j2 | 228 ++++++++++++++++++++++ 2 files changed, 235 insertions(+) create mode 100644 brozzler/js-templates/umbraBehavior.js.j2 diff --git a/brozzler/behaviors.yaml b/brozzler/behaviors.yaml index 3425b51..36fbc8b 100644 --- a/brozzler/behaviors.yaml +++ b/brozzler/behaviors.yaml @@ -43,6 +43,13 @@ default_parameters: actions: - selector: div.teaser, li.pager__item a +- # https://webarchive.jira.com/browse/ARI-5430 + url_regex: '^https?://www\.careers\.ox\.ac\.uk/.*$' + behavior_js_template: umbraBehavior.js.j2 + default_parameters: + actions: + - selector: li.breadcrumbOnPage a + closeSelector: .exit - # https://webarchive.jira.com/browse/ARI-5389 url_regex: '^https?://pitchfork\.com/.*$' behavior_js_template: umbraBehavior.js.j2 diff --git a/brozzler/js-templates/umbraBehavior.js.j2 b/brozzler/js-templates/umbraBehavior.js.j2 new file mode 100644 index 0000000..10b8e14 --- /dev/null +++ b/brozzler/js-templates/umbraBehavior.js.j2 @@ -0,0 +1,228 @@ +/* + * brozzler/js-templates/umbrabehavior.js.j2 - a generalized umbra/brozzler behavior + * + * Copyright (C) 2017 Internet Archive + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +var umbraBehavior = { + IDLE_TIMEOUT_SEC : 10, + + alreadyDone : [], + idleSince : null, + intervalId : null, + state : null, + + //actions : [{'selector': 'div.teaser, li.pager__item a'}], + //actions : [{'selector': '.slideshow-card__overlay'}, {'selector': '.slideshow__next'}], + actions : {{actions|json}}, + k : 0, + + intervalFunc: function() { + if (!this.state) { + this.state = this.actions.length === 1 ? "simple" : "fancy"; + } else if (this.actions.length === k + 1) { + // last action always uses simple block + this.state = "simple"; + } + + var k = this.k; + var selector = this.actions[k].selector; + var action = this.actions[k].do ? this.actions[k].do : 'click'; + var closeSelector = this.actions[k].closeSelector ? this.actions[k].closeSelector : null; + + // need to figure out more about how to end more complex actions... + // var limit = this.actions[k].limit ? this.actions[k].limit : 0; + // if (limit && this.actions[k].alreadyDone && this.actions[k].alreadyDone.length >= limit) { + // continue; + // } + // if (limit && !(this.actions[k].alreadyDone)) { + // this.actions[k].alreadyDone = []; + // } + + if (this.state === "fancy") { + + var moreButton = document.querySelectorAll(selector); + if (moreButton.length > 0) { + console.log("clicking more button"); + this.doTarget(moreButton[0],action); + this.k++; // use next action at next run of interval function + return; + } + + if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) { + window.scrollBy(0, 200); + this.idleSince = null; + return; + } + + if (this.idleSince === null) { + console.log("nothing to do at the moment, might be waiting for something to load, setting this.idleSince=Date.now()"); + this.idleSince = Date.now(); + return; + } else { + if ((Date.now() - this.idleSince) > 9000) { + console.log("finished loading-thumbs, it appears we have reached the bottom"); + this.state = "clicking-first-thumb"; + this.idleSince = null; + } + return; + } + } + + if (this.state === "simple") { + var didSomething = false; + var somethingLeftBelow = false; + var somethingLeftAbove = false; + + var iframes = document.querySelectorAll("iframe"); + var documents = Array(iframes.length + 1); + documents[0] = document; + + iframesLength = iframes.length; + for (var i = 0; i < iframesLength; i++) { + documents[i+1] = iframes[i].contentWindow.document; + } + + documentsLength = documents.length; + for (var j = 0; j < documentsLength; j++) { + + if (closeSelector) { + var closeTargets = documents[j].querySelectorAll(closeSelector); + if (closeTargets != []) { + for ( var i = 0; i < closeTargets.length; i++) { + this.doTarget(closeTargets[i], 'click'); + didSomething = true; + break; // break from closeTargets loop + } + } + } + + var doTargets = documents[j].querySelectorAll(selector); + if (doTargets == []) { + continue; + } + + doTargetsLength = doTargets.length; + for ( var i = 0; i < doTargetsLength; i++) { + // if using limits... + // if (limit && this.actions[k].alreadyDone && this.actions[k].alreadyDone.length >= limit) { + // break; + // } + + if (this.alreadyDone.indexOf(doTargets[i]) > -1) { + continue; + } + if (!this.isVisible(doTargets[i])) { + continue; + } + var where = this.aboveBelowOrOnScreen(doTargets[i]); + if (where == 0) { + this.doTarget(doTargets[i], action); + // if (this.actions[k].alreadyDone) { + // this.actions[k].alreadyDone.push(doTargets[i]); + // } + didSomething = true; + break; // break from doTargets loop, but not from documents loop + } else if (where > 0) { + somethingLeftBelow = true; + } else if (where < 0) { + somethingLeftAbove = true; + } + } + } + if (!didSomething) { + if (somethingLeftAbove) { + // console.log("scrolling UP because everything on this screen has been clicked but we missed something above"); + window.scrollBy(0, -500); + this.idleSince = null; + } else if (somethingLeftBelow) { + // console.log("scrolling because everything on this screen has been clicked but there's more below document.body.clientHeight=" + // + document.body.clientHeight); + window.scrollBy(0, 200); + this.idleSince = null; + } else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) { + // console.log("scrolling because we're not to the bottom yet document.body.clientHeight=" + // + document.body.clientHeight); + window.scrollBy(0, 200); + this.idleSince = null; + } else if (this.idleSince == null) { + this.idleSince = Date.now(); + } + } + } + if (!this.idleSince) { + this.idleSince = Date.now(); + } + }, + + aboveBelowOrOnScreen : function(elem) { + var eTop = elem.getBoundingClientRect().top; + if (eTop < window.scrollY) { + return -1; // above + } else if (eTop > window.scrollY + window.innerHeight) { + return 1; // below + } else { + return 0; // on screen + } + }, + + isVisible : function(elem) { + return !!(elem.offsetWidth || elem.offsetHeight || elem.getClientRects().length); + }, + + doTarget : function(target, action) { + // console.log("doing " + action + target.outerHTML); + // do mouse over event on target + // since some urls are requsted only on + // this event - see + // https://webarchive.jira.com/browse/AITFIVE-451 + var mouseOverEvent = document.createEvent("Events"); + mouseOverEvent.initEvent("mouseover", true, false); + target.dispatchEvent(mouseOverEvent); + + if (action == "click") { + target.click(); + } // add new do's here! + + this.alreadyDone.push(target); + this.idleSince = null; + }, + + start : function() { + var that = this; + this.intervalId = setInterval(function() { + that.intervalFunc() + }, 500); + }, + + isFinished : function() { + if (this.idleSince != null) { + var idleTimeMs = Date.now() - this.idleSince; + if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) { + clearInterval(this.intervalId); + return true; + } + } + return false; + }, +// end umbraBehavior definition +}; + +// Called from outside of this script. +var umbraBehaviorFinished = function() { + return umbraBehavior.isFinished(); +}; + +umbraBehavior.start();