From 97904a0bdf4b9075d6074f00d4b9297f0569f7d5 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Wed, 5 Jul 2017 18:46:18 -0700 Subject: [PATCH 1/4] umbraBehavior refactor initial commit --- brozzler/behaviors.yaml | 16 +++ brozzler/js-templates/umbraBehavior.js.j2 | 158 ++++++++++++++++++++++ 2 files changed, 174 insertions(+) create mode 100644 brozzler/js-templates/umbraBehavior.js.j2 diff --git a/brozzler/behaviors.yaml b/brozzler/behaviors.yaml index fb2e15b..76f0ce2 100644 --- a/brozzler/behaviors.yaml +++ b/brozzler/behaviors.yaml @@ -37,6 +37,15 @@ url_regex: '^https?://(?:www\.)?instagram\.com/.*$' behavior_js_template: instagram.js request_idle_timeout_sec: 10 +- + url_regex: '^https?://(?:www\.)?huffingtonpost\.com/.*$' + behavior_js_template: umbraBehavior.js.j2 + default_parameters: + actions: + - selector: .slideshow + do: click + - selector: .slideshow-overlay__container__left__nav__next + do: click - url_regex: '^https?://(?:www\.)?huffingtonpost\.com/.*$' behavior_js_template: huffpostslides.js @@ -108,6 +117,13 @@ url_regex: '^https?://(?:www\.)?fec.gov/data/.*$' behavior_js_template: fec_gov.js request_idle_timeout_sec: 10 +- url_regex: '^https?://(?:www\.)?news\.com\.au/.*$' + behavior_js_template: umbraBehavior.js.j2 + default_parameters: + actions: + - selector: .menu-item a + do: mouseover + do_until_hard_timeout: False - url_regex: '^https?://(?:www\.)?news\.com\.au/.*$' behavior_js_template: mouseovers.js.j2 default_parameters: diff --git a/brozzler/js-templates/umbraBehavior.js.j2 b/brozzler/js-templates/umbraBehavior.js.j2 new file mode 100644 index 0000000..cfe39c8 --- /dev/null +++ b/brozzler/js-templates/umbraBehavior.js.j2 @@ -0,0 +1,158 @@ +/* + * brozzler/js-templates/umbrabehavior.js.j2 - a library for umbra/brozzler behaviors + * + * Copyright (C) 2017 Internet Archive + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +var umbraBehavior = { + IDLE_TIMEOUT_SEC : 10, + + alreadyDone : [], + idleSince : null, + intervalId : null, + + intervalFunc: function() { + var didSomething = false; + var somethingLeftBelow = false; + var somethingLeftAbove = false; + + var actions = {{actions|json}}; + var actionsLength = actions.length; + + for (var k = 0; k < actionsLength; k++) { + var selector = actions[k].selector; + var action = actions[k].do; + + var iframes = document.querySelectorAll("iframe"); + var documents = Array(iframes.length + 1); + documents[0] = document; + + iframesLength = iframes.length; + for (var i = 0; i < iframesLength; i++) { + documents[i+1] = iframes[i].contentWindow.document; + } + + documentsLength = documents.length; + for (var j = 0; j < documentsLength; j++) { + + var doTargets = documents[j].querySelectorAll(selector); + + doTargetsLength = doTargets.length; + for ( var i = 0; i < doTargetsLength; i++) { + if (this.alreadyDone.indexOf(doTargets[i]) > -1) { + continue; + } + if (!this.isVisible(doTargets[i])) { + continue; + } + var where = this.aboveBelowOrOnScreen(doTargets[i]); + if (where == 0) { + this.doTarget(doTargets[i], action); + didSomething = true; + } else if (where > 0) { + somethingLeftBelow = true; + } else if (where < 0) { + somethingLeftAbove = true; + } + if (didSomething) { + break; // break from doTargets loop, but not from documents loop + } + } + } + + if (!didSomething) { + if (somethingLeftAbove) { + // console.log("scrolling UP because everything on this screen has been done but we missed something above"); + window.scrollBy(0, -500); + this.idleSince = null; + } else if (somethingLeftBelow) { + // console.log("scrolling because everything on this screen has been done but there's more below"); + window.scrollBy(0, 200); + this.idleSince = null; + } else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) { + // console.log("scrolling because we're not to the bottom yet"); + window.scrollBy(0, 200); + this.idleSince = null; + } else if (this.idleSince == null) { + this.idleSince = Date.now(); + } + } + + if (!idleSince) { + this.idleSince = Date.now(); + } + } + }, + + start : function() { + var that = this; + this.intervalId = setInterval(function() { + that.intervalFunc() + }, 250); + }, + + isFinished : function() { + if (this.idleSince != null) { + var idleTimeMs = Date.now() - this.idleSince; + if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) { + clearInterval(this.intervalId); + return true; + } + } + return false; + }, + + aboveBelowOrOnScreen : function(elem) { + var eTop = elem.getBoundingClientRect().top; + if (eTop < window.scrollY) { + return -1; // above + } else if (eTop > window.scrollY + window.innerHeight) { + return 1; // below + } else { + return 0; // on screen + } + }, + + isVisible : function(elem) { + return !!(elem.offsetWidth || elem.offsetHeight || elem.getClientRects().length); + }, + + doTarget : function(target, action) { + // console.log("doing " + action + target.outerHTML); + // do mouse over event on target + // since some urls are requsted only on + // this event - see + // https://webarchive.jira.com/browse/AITFIVE-451 + var mouseOverEvent = document.createEvent("Events"); + mouseOverEvent.initEvent("mouseover", true, false); + target.dispatchEvent(mouseOverEvent); + + if (action == "click") { + target.click(); + } // add new do's here! + + this.alreadyDone.push(target); + this.idleSince = null; + }, + +// end umbraBehavior definition +}; + +// Called from outside of this script. +var umbraBehaviorFinished = function() { + return umbraBehavior.isFinished(); +}; + +umbraBehavior.start(); From 5f0bb7b5836464f6b562ba06d9149db325a77d7a Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Wed, 12 Jul 2017 20:00:02 -0700 Subject: [PATCH 2/4] add limits --- brozzler/behaviors.yaml | 9 ++- brozzler/js-templates/umbraBehavior.js.j2 | 73 ++++++++++++++--------- 2 files changed, 48 insertions(+), 34 deletions(-) diff --git a/brozzler/behaviors.yaml b/brozzler/behaviors.yaml index 76f0ce2..16d9070 100644 --- a/brozzler/behaviors.yaml +++ b/brozzler/behaviors.yaml @@ -38,14 +38,13 @@ behavior_js_template: instagram.js request_idle_timeout_sec: 10 - - url_regex: '^https?://(?:www\.)?huffingtonpost\.com/.*$' + url_regex: '^https?://(?:www\.)?pm\.gc\.ca/.*$' behavior_js_template: umbraBehavior.js.j2 default_parameters: actions: - - selector: .slideshow - do: click - - selector: .slideshow-overlay__container__left__nav__next - do: click + - selector: li.pager__item a + limit: 4 + - selector: div.teaser - url_regex: '^https?://(?:www\.)?huffingtonpost\.com/.*$' behavior_js_template: huffpostslides.js diff --git a/brozzler/js-templates/umbraBehavior.js.j2 b/brozzler/js-templates/umbraBehavior.js.j2 index cfe39c8..c3716ae 100644 --- a/brozzler/js-templates/umbraBehavior.js.j2 +++ b/brozzler/js-templates/umbraBehavior.js.j2 @@ -22,18 +22,24 @@ var umbraBehavior = { alreadyDone : [], idleSince : null, intervalId : null, + actions : {{actions|json}}, intervalFunc: function() { - var didSomething = false; - var somethingLeftBelow = false; - var somethingLeftAbove = false; - - var actions = {{actions|json}}; - var actionsLength = actions.length; - + var actionsLength = this.actions.length; for (var k = 0; k < actionsLength; k++) { - var selector = actions[k].selector; - var action = actions[k].do; + var didSomething = false; + var somethingLeftBelow = false; + var somethingLeftAbove = false; + + var selector = this.actions[k].selector; + var action = this.actions[k].do ? this.actions[k].do : 'click'; + var limit = this.actions[k].limit ? this.actions[k].limit : 0; + if (limit && this.actions[k].alreadyDone && this.actions[k].alreadyDone.length >= limit) { + continue; + } + if (limit && !(this.actions[k].alreadyDone)) { + this.actions[k].alreadyDone = []; + } var iframes = document.querySelectorAll("iframe"); var documents = Array(iframes.length + 1); @@ -48,49 +54,58 @@ var umbraBehavior = { for (var j = 0; j < documentsLength; j++) { var doTargets = documents[j].querySelectorAll(selector); + if (doTargets == []) { + continue; + } doTargetsLength = doTargets.length; for ( var i = 0; i < doTargetsLength; i++) { + if (limit && this.actions[k].alreadyDone && this.actions[k].alreadyDone.length >= limit) { + break; + } if (this.alreadyDone.indexOf(doTargets[i]) > -1) { continue; } if (!this.isVisible(doTargets[i])) { continue; } + var where = this.aboveBelowOrOnScreen(doTargets[i]); if (where == 0) { this.doTarget(doTargets[i], action); didSomething = true; + if (this.actions[k].alreadyDone) { + this.actions[k].alreadyDone.push(doTargets[i]); + } } else if (where > 0) { somethingLeftBelow = true; } else if (where < 0) { somethingLeftAbove = true; } + if (didSomething) { break; // break from doTargets loop, but not from documents loop } + + if (somethingLeftAbove) { + // console.log("scrolling UP because everything on this screen has been done but we missed something above"); + window.scrollBy(0, -500); + this.idleSince = null; + } else if (somethingLeftBelow) { + // console.log("scrolling because everything on this screen has been done but there's more below"); + window.scrollBy(0, 200); + this.idleSince = null; + } else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) { + // console.log("scrolling because we're not to the bottom yet"); + window.scrollBy(0, 200); + this.idleSince = null; + } else if (this.idleSince == null) { + this.idleSince = Date.now(); + } } } - if (!didSomething) { - if (somethingLeftAbove) { - // console.log("scrolling UP because everything on this screen has been done but we missed something above"); - window.scrollBy(0, -500); - this.idleSince = null; - } else if (somethingLeftBelow) { - // console.log("scrolling because everything on this screen has been done but there's more below"); - window.scrollBy(0, 200); - this.idleSince = null; - } else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) { - // console.log("scrolling because we're not to the bottom yet"); - window.scrollBy(0, 200); - this.idleSince = null; - } else if (this.idleSince == null) { - this.idleSince = Date.now(); - } - } - - if (!idleSince) { + if (!this.idleSince) { this.idleSince = Date.now(); } } @@ -100,7 +115,7 @@ var umbraBehavior = { var that = this; this.intervalId = setInterval(function() { that.intervalFunc() - }, 250); + }, 1000); }, isFinished : function() { From 0a52f7fe848361d75b360bdcd499164463a93dff Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Fri, 1 Sep 2017 18:51:35 -0700 Subject: [PATCH 3/4] first support 'simple' behaviors --- brozzler/behaviors.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/brozzler/behaviors.yaml b/brozzler/behaviors.yaml index 16d9070..0b02e45 100644 --- a/brozzler/behaviors.yaml +++ b/brozzler/behaviors.yaml @@ -42,9 +42,7 @@ behavior_js_template: umbraBehavior.js.j2 default_parameters: actions: - - selector: li.pager__item a - limit: 4 - - selector: div.teaser + - selector: div.teaser, li.pager__item a - url_regex: '^https?://(?:www\.)?huffingtonpost\.com/.*$' behavior_js_template: huffpostslides.js @@ -122,7 +120,6 @@ actions: - selector: .menu-item a do: mouseover - do_until_hard_timeout: False - url_regex: '^https?://(?:www\.)?news\.com\.au/.*$' behavior_js_template: mouseovers.js.j2 default_parameters: From 03e14e3db8c0b3a90aabd0a70a382eae88ee5a1a Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Fri, 1 Sep 2017 18:52:26 -0700 Subject: [PATCH 4/4] checking state: first support only 'simple' behaviors --- brozzler/js-templates/umbraBehavior.js.j2 | 135 +++++++++++----------- 1 file changed, 69 insertions(+), 66 deletions(-) diff --git a/brozzler/js-templates/umbraBehavior.js.j2 b/brozzler/js-templates/umbraBehavior.js.j2 index c3716ae..ffe9d22 100644 --- a/brozzler/js-templates/umbraBehavior.js.j2 +++ b/brozzler/js-templates/umbraBehavior.js.j2 @@ -22,15 +22,15 @@ var umbraBehavior = { alreadyDone : [], idleSince : null, intervalId : null, + state : null, + actions : {{actions|json}}, intervalFunc: function() { - var actionsLength = this.actions.length; - for (var k = 0; k < actionsLength; k++) { - var didSomething = false; - var somethingLeftBelow = false; - var somethingLeftAbove = false; - + if (!this.state) { + this.state = this.actions.length === 1 ? "simple" : "fancy"; + } + for (var k = 0; k < this.actions.length; k++) { var selector = this.actions[k].selector; var action = this.actions[k].do ? this.actions[k].do : 'click'; var limit = this.actions[k].limit ? this.actions[k].limit : 0; @@ -41,94 +41,80 @@ var umbraBehavior = { this.actions[k].alreadyDone = []; } - var iframes = document.querySelectorAll("iframe"); - var documents = Array(iframes.length + 1); - documents[0] = document; + if (this.state === "simple") { + var didSomething = false; + var somethingLeftBelow = false; + var somethingLeftAbove = false; - iframesLength = iframes.length; - for (var i = 0; i < iframesLength; i++) { - documents[i+1] = iframes[i].contentWindow.document; - } + var iframes = document.querySelectorAll("iframe"); + var documents = Array(iframes.length + 1); + documents[0] = document; - documentsLength = documents.length; - for (var j = 0; j < documentsLength; j++) { - - var doTargets = documents[j].querySelectorAll(selector); - if (doTargets == []) { - continue; + iframesLength = iframes.length; + for (var i = 0; i < iframesLength; i++) { + documents[i+1] = iframes[i].contentWindow.document; } - doTargetsLength = doTargets.length; - for ( var i = 0; i < doTargetsLength; i++) { - if (limit && this.actions[k].alreadyDone && this.actions[k].alreadyDone.length >= limit) { - break; - } - if (this.alreadyDone.indexOf(doTargets[i]) > -1) { - continue; - } - if (!this.isVisible(doTargets[i])) { + documentsLength = documents.length; + for (var j = 0; j < documentsLength; j++) { + + var doTargets = documents[j].querySelectorAll(selector); + if (doTargets == []) { continue; } - var where = this.aboveBelowOrOnScreen(doTargets[i]); - if (where == 0) { - this.doTarget(doTargets[i], action); - didSomething = true; - if (this.actions[k].alreadyDone) { - this.actions[k].alreadyDone.push(doTargets[i]); + doTargetsLength = doTargets.length; + for ( var i = 0; i < doTargetsLength; i++) { + if (limit && this.actions[k].alreadyDone && this.actions[k].alreadyDone.length >= limit) { + break; + } + if (this.alreadyDone.indexOf(doTargets[i]) > -1) { + continue; + } + if (!this.isVisible(doTargets[i])) { + continue; + } + var where = this.aboveBelowOrOnScreen(doTargets[i]); + if (where == 0) { + this.doTarget(doTargets[i], action); + if (this.actions[k].alreadyDone) { + this.actions[k].alreadyDone.push(doTargets[i]); + } + didSomething = true; + break; // break from doTargets loop, but not from documents loop + } else if (where > 0) { + somethingLeftBelow = true; + } else if (where < 0) { + somethingLeftAbove = true; } - } else if (where > 0) { - somethingLeftBelow = true; - } else if (where < 0) { - somethingLeftAbove = true; } - - if (didSomething) { - break; // break from doTargets loop, but not from documents loop - } - + } + if (!didSomething) { if (somethingLeftAbove) { - // console.log("scrolling UP because everything on this screen has been done but we missed something above"); + // console.log("scrolling UP because everything on this screen has been clicked but we missed something above"); window.scrollBy(0, -500); this.idleSince = null; } else if (somethingLeftBelow) { - // console.log("scrolling because everything on this screen has been done but there's more below"); + // console.log("scrolling because everything on this screen has been clicked but there's more below document.body.clientHeight=" + // + document.body.clientHeight); window.scrollBy(0, 200); this.idleSince = null; } else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) { - // console.log("scrolling because we're not to the bottom yet"); + // console.log("scrolling because we're not to the bottom yet document.body.clientHeight=" + // + document.body.clientHeight); window.scrollBy(0, 200); this.idleSince = null; - } else if (this.idleSince == null) { + } else if (this.idleSince == null) { this.idleSince = Date.now(); } } } - if (!this.idleSince) { this.idleSince = Date.now(); } } }, - start : function() { - var that = this; - this.intervalId = setInterval(function() { - that.intervalFunc() - }, 1000); - }, - - isFinished : function() { - if (this.idleSince != null) { - var idleTimeMs = Date.now() - this.idleSince; - if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) { - clearInterval(this.intervalId); - return true; - } - } - return false; - }, - aboveBelowOrOnScreen : function(elem) { var eTop = elem.getBoundingClientRect().top; if (eTop < window.scrollY) { @@ -162,6 +148,23 @@ var umbraBehavior = { this.idleSince = null; }, + start : function() { + var that = this; + this.intervalId = setInterval(function() { + that.intervalFunc() + }, 500); + }, + + isFinished : function() { + if (this.idleSince != null) { + var idleTimeMs = Date.now() - this.idleSince; + if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) { + clearInterval(this.intervalId); + return true; + } + } + return false; + }, // end umbraBehavior definition };