From 9db30b089cb3778308054ad41cbbfa307d31f6a1 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Wed, 5 Jul 2017 18:46:18 -0700 Subject: [PATCH 1/3] supports rewritten www.news.com.au yaml --- brozzler/behaviors.yaml | 16 +++ brozzler/js-templates/umbraBehavior.js.j2 | 158 ++++++++++++++++++++++ 2 files changed, 174 insertions(+) create mode 100644 brozzler/js-templates/umbraBehavior.js.j2 diff --git a/brozzler/behaviors.yaml b/brozzler/behaviors.yaml index a7e80ef..3bfa3bd 100644 --- a/brozzler/behaviors.yaml +++ b/brozzler/behaviors.yaml @@ -37,6 +37,15 @@ url_regex: '^https?://(?:www\.)?instagram\.com/.*$' behavior_js_template: instagram.js request_idle_timeout_sec: 10 +- + url_regex: '^https?://(?:www\.)?huffingtonpost\.com/.*$' + behavior_js_template: umbraBehavior.js.j2 + default_parameters: + actions: + - selector: .slideshow + do: click + - selector: .slideshow-overlay__container__left__nav__next + do: click - url_regex: '^https?://(?:www\.)?huffingtonpost\.com/.*$' behavior_js_template: huffpostslides.js @@ -94,6 +103,13 @@ url_regex: '^https?://(?:www\.)?fec.gov/data/.*$' behavior_js_template: fec_gov.js request_idle_timeout_sec: 10 +- url_regex: '^https?://(?:www\.)?news\.com\.au/.*$' + behavior_js_template: umbraBehavior.js.j2 + default_parameters: + actions: + - selector: .menu-item a + do: mouseover + do_until_hard_timeout: False - url_regex: '^https?://(?:www\.)?news\.com\.au/.*$' behavior_js_template: mouseovers.js.j2 default_parameters: diff --git a/brozzler/js-templates/umbraBehavior.js.j2 b/brozzler/js-templates/umbraBehavior.js.j2 new file mode 100644 index 0000000..cfe39c8 --- /dev/null +++ b/brozzler/js-templates/umbraBehavior.js.j2 @@ -0,0 +1,158 @@ +/* + * brozzler/js-templates/umbrabehavior.js.j2 - a library for umbra/brozzler behaviors + * + * Copyright (C) 2017 Internet Archive + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +var umbraBehavior = { + IDLE_TIMEOUT_SEC : 10, + + alreadyDone : [], + idleSince : null, + intervalId : null, + + intervalFunc: function() { + var didSomething = false; + var somethingLeftBelow = false; + var somethingLeftAbove = false; + + var actions = {{actions|json}}; + var actionsLength = actions.length; + + for (var k = 0; k < actionsLength; k++) { + var selector = actions[k].selector; + var action = actions[k].do; + + var iframes = document.querySelectorAll("iframe"); + var documents = Array(iframes.length + 1); + documents[0] = document; + + iframesLength = iframes.length; + for (var i = 0; i < iframesLength; i++) { + documents[i+1] = iframes[i].contentWindow.document; + } + + documentsLength = documents.length; + for (var j = 0; j < documentsLength; j++) { + + var doTargets = documents[j].querySelectorAll(selector); + + doTargetsLength = doTargets.length; + for ( var i = 0; i < doTargetsLength; i++) { + if (this.alreadyDone.indexOf(doTargets[i]) > -1) { + continue; + } + if (!this.isVisible(doTargets[i])) { + continue; + } + var where = this.aboveBelowOrOnScreen(doTargets[i]); + if (where == 0) { + this.doTarget(doTargets[i], action); + didSomething = true; + } else if (where > 0) { + somethingLeftBelow = true; + } else if (where < 0) { + somethingLeftAbove = true; + } + if (didSomething) { + break; // break from doTargets loop, but not from documents loop + } + } + } + + if (!didSomething) { + if (somethingLeftAbove) { + // console.log("scrolling UP because everything on this screen has been done but we missed something above"); + window.scrollBy(0, -500); + this.idleSince = null; + } else if (somethingLeftBelow) { + // console.log("scrolling because everything on this screen has been done but there's more below"); + window.scrollBy(0, 200); + this.idleSince = null; + } else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) { + // console.log("scrolling because we're not to the bottom yet"); + window.scrollBy(0, 200); + this.idleSince = null; + } else if (this.idleSince == null) { + this.idleSince = Date.now(); + } + } + + if (!idleSince) { + this.idleSince = Date.now(); + } + } + }, + + start : function() { + var that = this; + this.intervalId = setInterval(function() { + that.intervalFunc() + }, 250); + }, + + isFinished : function() { + if (this.idleSince != null) { + var idleTimeMs = Date.now() - this.idleSince; + if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) { + clearInterval(this.intervalId); + return true; + } + } + return false; + }, + + aboveBelowOrOnScreen : function(elem) { + var eTop = elem.getBoundingClientRect().top; + if (eTop < window.scrollY) { + return -1; // above + } else if (eTop > window.scrollY + window.innerHeight) { + return 1; // below + } else { + return 0; // on screen + } + }, + + isVisible : function(elem) { + return !!(elem.offsetWidth || elem.offsetHeight || elem.getClientRects().length); + }, + + doTarget : function(target, action) { + // console.log("doing " + action + target.outerHTML); + // do mouse over event on target + // since some urls are requsted only on + // this event - see + // https://webarchive.jira.com/browse/AITFIVE-451 + var mouseOverEvent = document.createEvent("Events"); + mouseOverEvent.initEvent("mouseover", true, false); + target.dispatchEvent(mouseOverEvent); + + if (action == "click") { + target.click(); + } // add new do's here! + + this.alreadyDone.push(target); + this.idleSince = null; + }, + +// end umbraBehavior definition +}; + +// Called from outside of this script. +var umbraBehaviorFinished = function() { + return umbraBehavior.isFinished(); +}; + +umbraBehavior.start(); From ac70617a05b32f53f070e577ec0a3b49d23f21e2 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Wed, 12 Jul 2017 20:00:02 -0700 Subject: [PATCH 2/3] add limits --- brozzler/behaviors.yaml | 9 +++---- brozzler/js-templates/umbraBehavior.js.j2 | 30 ++++++++++++++++++----- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/brozzler/behaviors.yaml b/brozzler/behaviors.yaml index 3bfa3bd..892fd31 100644 --- a/brozzler/behaviors.yaml +++ b/brozzler/behaviors.yaml @@ -38,14 +38,13 @@ behavior_js_template: instagram.js request_idle_timeout_sec: 10 - - url_regex: '^https?://(?:www\.)?huffingtonpost\.com/.*$' + url_regex: '^https?://(?:www\.)?pm\.gc\.ca/.*$' behavior_js_template: umbraBehavior.js.j2 default_parameters: actions: - - selector: .slideshow - do: click - - selector: .slideshow-overlay__container__left__nav__next - do: click + - selector: li.pager__item a + limit: 4 + - selector: div.teaser - url_regex: '^https?://(?:www\.)?huffingtonpost\.com/.*$' behavior_js_template: huffpostslides.js diff --git a/brozzler/js-templates/umbraBehavior.js.j2 b/brozzler/js-templates/umbraBehavior.js.j2 index cfe39c8..d4e1320 100644 --- a/brozzler/js-templates/umbraBehavior.js.j2 +++ b/brozzler/js-templates/umbraBehavior.js.j2 @@ -22,18 +22,23 @@ var umbraBehavior = { alreadyDone : [], idleSince : null, intervalId : null, + actions : {{actions|json}}, intervalFunc: function() { var didSomething = false; var somethingLeftBelow = false; var somethingLeftAbove = false; - - var actions = {{actions|json}}; - var actionsLength = actions.length; + var actionsLength = this.actions.length; for (var k = 0; k < actionsLength; k++) { - var selector = actions[k].selector; - var action = actions[k].do; + var selector = this.actions[k].selector; + var action = this.actions[k].do ? this.actions[k].do : 'click'; + if (this.actions[k].limit && this.actions[k].alreadyDone && this.actions[k].alreadyDone.length >= this.actions[k].limit) { + continue; + } + if (this.actions[k].limit && !(this.actions[k].alreadyDone)) { + this.actions[k].alreadyDone = []; + } var iframes = document.querySelectorAll("iframe"); var documents = Array(iframes.length + 1); @@ -47,20 +52,33 @@ var umbraBehavior = { documentsLength = documents.length; for (var j = 0; j < documentsLength; j++) { + if (this.actions[k].limit && this.actions[k].alreadyDone && this.actions[k].alreadyDone.length >= this.actions[k].limit) { + break; + } var doTargets = documents[j].querySelectorAll(selector); + if (doTargets == []) { + continue; + } doTargetsLength = doTargets.length; for ( var i = 0; i < doTargetsLength; i++) { + if (this.actions[k].limit && this.actions[k].alreadyDone && this.actions[k].alreadyDone.length >= this.actions[k].limit) { + break; + } if (this.alreadyDone.indexOf(doTargets[i]) > -1) { continue; } if (!this.isVisible(doTargets[i])) { continue; } + var where = this.aboveBelowOrOnScreen(doTargets[i]); if (where == 0) { this.doTarget(doTargets[i], action); didSomething = true; + if (this.actions[k].alreadyDone) { + this.actions[k].alreadyDone.push(doTargets[i]); + } } else if (where > 0) { somethingLeftBelow = true; } else if (where < 0) { @@ -90,7 +108,7 @@ var umbraBehavior = { } } - if (!idleSince) { + if (!this.idleSince) { this.idleSince = Date.now(); } } From 85249928409754a806428d5ddd7bbe22a7cfc5b0 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Sat, 15 Jul 2017 14:26:35 -0700 Subject: [PATCH 3/3] var limit --- brozzler/js-templates/umbraBehavior.js.j2 | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/brozzler/js-templates/umbraBehavior.js.j2 b/brozzler/js-templates/umbraBehavior.js.j2 index d4e1320..1cbcb08 100644 --- a/brozzler/js-templates/umbraBehavior.js.j2 +++ b/brozzler/js-templates/umbraBehavior.js.j2 @@ -33,10 +33,11 @@ var umbraBehavior = { for (var k = 0; k < actionsLength; k++) { var selector = this.actions[k].selector; var action = this.actions[k].do ? this.actions[k].do : 'click'; - if (this.actions[k].limit && this.actions[k].alreadyDone && this.actions[k].alreadyDone.length >= this.actions[k].limit) { + var limit = this.actions[k].limit ? this.actions[k].limit : 0; + if (limit && this.actions[k].alreadyDone && this.actions[k].alreadyDone.length >= limit) { continue; } - if (this.actions[k].limit && !(this.actions[k].alreadyDone)) { + if (limit && !(this.actions[k].alreadyDone)) { this.actions[k].alreadyDone = []; } @@ -52,9 +53,6 @@ var umbraBehavior = { documentsLength = documents.length; for (var j = 0; j < documentsLength; j++) { - if (this.actions[k].limit && this.actions[k].alreadyDone && this.actions[k].alreadyDone.length >= this.actions[k].limit) { - break; - } var doTargets = documents[j].querySelectorAll(selector); if (doTargets == []) { continue; @@ -62,7 +60,7 @@ var umbraBehavior = { doTargetsLength = doTargets.length; for ( var i = 0; i < doTargetsLength; i++) { - if (this.actions[k].limit && this.actions[k].alreadyDone && this.actions[k].alreadyDone.length >= this.actions[k].limit) { + if (limit && this.actions[k].alreadyDone && this.actions[k].alreadyDone.length >= limit) { break; } if (this.alreadyDone.indexOf(doTargets[i]) > -1) {