From 6c21a9f77319b9dbe00e230f3aa7ee0b702bd019 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Sun, 9 Dec 2018 14:25:59 -0800 Subject: [PATCH 1/3] iframe option and other instagram updates --- brozzler/behaviors.yaml | 8 +- brozzler/js-templates/umbraBehavior18q4.js.j2 | 177 ++++++++++++++++++ 2 files changed, 182 insertions(+), 3 deletions(-) create mode 100644 brozzler/js-templates/umbraBehavior18q4.js.j2 diff --git a/brozzler/behaviors.yaml b/brozzler/behaviors.yaml index 28cceba..6c46c7a 100644 --- a/brozzler/behaviors.yaml +++ b/brozzler/behaviors.yaml @@ -23,11 +23,13 @@ request_idle_timeout_sec: 30 - url_regex: '^https?://(?:www\.)?instagram\.com/.*$' - behavior_js_template: umbraBehavior.js.j2 + behavior_js_template: umbraBehavior18q4.js.j2 default_parameters: + interval: 500 + iframes: false actions: - - selector: a.coreSpriteDismissLarge - - selector: a>div[role='button'] + - selector: button.coreSpriteDismissLarge + - selector: 'a>.eLAPa>.KL4Bh' firstMatchOnly: true - selector: a.coreSpriteRightPaginationArrow repeatSameElement: true diff --git a/brozzler/js-templates/umbraBehavior18q4.js.j2 b/brozzler/js-templates/umbraBehavior18q4.js.j2 new file mode 100644 index 0000000..100b5b7 --- /dev/null +++ b/brozzler/js-templates/umbraBehavior18q4.js.j2 @@ -0,0 +1,177 @@ +/* + * brozzler/js-templates/umbrabehavior.js.j2 - an umbra/brozzler behavior class + * + * Copyright (C) 2017-2018 Internet Archive + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +class UmbraBehavior { + + constructor(actions) { + this.IDLE_TIMEOUT_SEC = 10; + this.actions = actions; + this.alreadyDone = []; + this.idleSince = null; + this.intervalId = null; + this.intervalTimeMs = {{interval or 300}}; + this.doIframes = {{iframes or true}}; + this.index = 0; + } + + simpleIntervalFunc() { + // should match older default and simpleclicks behavior, and more + var k = this.index; + var selector = this.actions[k].selector; + var repeatSameElement = this.actions[k].repeatSameElement ? this.actions[k].repeatSameElement : false; + var firstMatchOnly = this.actions[k].firstMatchOnly ? this.actions[k].firstMatchOnly : false; + var action = this.actions[k].do ? this.actions[k].do : 'click'; + var closeSelector = this.actions[k].closeSelector ? this.actions[k].closeSelector : null; + var didSomething = false; + var somethingLeftAbove = false; + var somethingLeftBelow = false; + + var documents = []; + documents[0] = document; + if (this.doIframes) { + var iframes = document.querySelectorAll("iframe"); + var iframesLength = iframes.length; + for (var i = 0; i < iframesLength; i++) { + documents.push(iframes[i].contentWindow.document); + } + } + var documentsLength = documents.length; + for (var j = 0; j < documentsLength; j++) { + if (closeSelector) { + var closeTargets = documents[j].querySelectorAll(closeSelector); + if ((closeTargets.length > 0) && + (this.alreadyDone.indexOf(closeTargets[0]) === -1) && + (this.isVisible(closeTargets[0]))) { + doTarget(closeTargets[0], 'click'); + } + } + if (firstMatchOnly) { + var doTargets = [ documents[j].querySelector(selector) ]; + } else { + var doTargets = documents[j].querySelectorAll(selector); + } + var doTargetsLength = doTargets.length; + if (!(doTargetsLength > 0)) { + continue; + } + for ( var i = 0; i < doTargetsLength; i++) { + if (!repeatSameElement && this.alreadyDone.indexOf(doTargets[i]) > -1) { + continue; + } + if (!this.isVisible(doTargets[i])) { + continue; + } + var where = this.aboveBelowOrOnScreen(doTargets[i]); + if (where == 0) { + this.doTarget(doTargets[i], action); + didSomething = true; + break; + } else if (where > 0) { + somethingLeftBelow = true; + } else if (where < 0) { + somethingLeftAbove = true; + } + } + } + + if (!didSomething) { + if (somethingLeftAbove) { + window.scrollBy(0, -500); + this.idleSince = null; + } else if (somethingLeftBelow || ( (window.scrollY + window.innerHeight) < document.documentElement.scrollHeight)) { + window.scrollBy(0, 200); + this.idleSince = null; + } else if (this.idleSince == null) { + this.idleSince = Date.now(); + } + } + + if (!this.idleSince) { + this.idleSince = Date.now(); + } else { + var idleTimeMs = Date.now() - this.idleSince; + if ((idleTimeMs / 1000) > (this.IDLE_TIMEOUT_SEC - 1) && (this.index < (this.actions.length - 1))) { + console.log("ready for next action"); + this.index += 1; + this.idleSince = null; + window.scroll(0,0); + } + } + } + + aboveBelowOrOnScreen(elem) { + var eTop = elem.getBoundingClientRect().top; + if (eTop < window.scrollY) { + return -1; // above + } else if (eTop > window.scrollY + window.innerHeight) { + return 1; // below + } else { + return 0; // on screen + } + } + + isVisible(elem) { + return elem && !!(elem.offsetWidth || elem.offsetHeight || elem.getClientRects().length); + } + + doTarget(target, action) { + // console.log("doing " + action + target.outerHTML); + // do mouse over event on target + // since some urls are requsted only on + // this event - see + // https://webarchive.jira.com/browse/AITFIVE-451 + var mouseOverEvent = document.createEvent("Events"); + mouseOverEvent.initEvent("mouseover", true, false); + target.dispatchEvent(mouseOverEvent); + + if (action == "click") { + target.click(); + } // add new do's here! + + this.alreadyDone.push(target); + this.idleSince = null; + } + + start() { + var that = this; + this.intervalId = setInterval(function() { + that.simpleIntervalFunc() + }, this.intervalTimeMs); + } + + isFinished() { + if (this.idleSince != null) { + var idleTimeMs = Date.now() - this.idleSince; + if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) { + clearInterval(this.intervalId); + return true; + } + } + return false; + } +} + +var umbraBehavior = new UmbraBehavior( {{actions|json}} ); + +// Called from outside of this script. +var umbraBehaviorFinished = function() { + return umbraBehavior.isFinished(); +}; + +umbraBehavior.start(); From 425d44bf4ac8eec097a0ce490e63dea94971d7de Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Thu, 13 Dec 2018 15:13:03 -0800 Subject: [PATCH 2/3] updates for jina2 --- brozzler/behaviors.yaml | 2 +- brozzler/js-templates/umbraBehavior18q4.js.j2 | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/brozzler/behaviors.yaml b/brozzler/behaviors.yaml index 6c46c7a..e29fab6 100644 --- a/brozzler/behaviors.yaml +++ b/brozzler/behaviors.yaml @@ -26,7 +26,7 @@ behavior_js_template: umbraBehavior18q4.js.j2 default_parameters: interval: 500 - iframes: false + skip_iframes: true actions: - selector: button.coreSpriteDismissLarge - selector: 'a>.eLAPa>.KL4Bh' diff --git a/brozzler/js-templates/umbraBehavior18q4.js.j2 b/brozzler/js-templates/umbraBehavior18q4.js.j2 index 100b5b7..7c24a13 100644 --- a/brozzler/js-templates/umbraBehavior18q4.js.j2 +++ b/brozzler/js-templates/umbraBehavior18q4.js.j2 @@ -26,7 +26,11 @@ class UmbraBehavior { this.idleSince = null; this.intervalId = null; this.intervalTimeMs = {{interval or 300}}; - this.doIframes = {{iframes or true}}; + {% if skip_iframes %} + this.skipIframes = true; + {% else %} + this.skipIframes = false; + {% endif %} this.index = 0; } @@ -44,7 +48,7 @@ class UmbraBehavior { var documents = []; documents[0] = document; - if (this.doIframes) { + if (!(this.skipIframes)) { var iframes = document.querySelectorAll("iframe"); var iframesLength = iframes.length; for (var i = 0; i < iframesLength; i++) { From 4a0d95277f9b078bd4783e94695c5e9958501d0c Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Mon, 17 Dec 2018 15:04:36 -0800 Subject: [PATCH 3/3] update umbraBehavior --- brozzler/behaviors.yaml | 2 +- brozzler/js-templates/umbraBehavior.js.j2 | 17 +- brozzler/js-templates/umbraBehavior18q4.js.j2 | 181 ------------------ 3 files changed, 13 insertions(+), 187 deletions(-) delete mode 100644 brozzler/js-templates/umbraBehavior18q4.js.j2 diff --git a/brozzler/behaviors.yaml b/brozzler/behaviors.yaml index e29fab6..0a3d3c0 100644 --- a/brozzler/behaviors.yaml +++ b/brozzler/behaviors.yaml @@ -23,7 +23,7 @@ request_idle_timeout_sec: 30 - url_regex: '^https?://(?:www\.)?instagram\.com/.*$' - behavior_js_template: umbraBehavior18q4.js.j2 + behavior_js_template: umbraBehavior.js.j2 default_parameters: interval: 500 skip_iframes: true diff --git a/brozzler/js-templates/umbraBehavior.js.j2 b/brozzler/js-templates/umbraBehavior.js.j2 index d743c20..7c24a13 100644 --- a/brozzler/js-templates/umbraBehavior.js.j2 +++ b/brozzler/js-templates/umbraBehavior.js.j2 @@ -26,6 +26,11 @@ class UmbraBehavior { this.idleSince = null; this.intervalId = null; this.intervalTimeMs = {{interval or 300}}; + {% if skip_iframes %} + this.skipIframes = true; + {% else %} + this.skipIframes = false; + {% endif %} this.index = 0; } @@ -41,12 +46,14 @@ class UmbraBehavior { var somethingLeftAbove = false; var somethingLeftBelow = false; - var iframes = document.querySelectorAll("iframe"); - var iframesLength = iframes.length; - var documents = Array(iframesLength + 1); + var documents = []; documents[0] = document; - for (var i = 0; i < iframesLength; i++) { - documents[i+1] = iframes[i].contentWindow.document; + if (!(this.skipIframes)) { + var iframes = document.querySelectorAll("iframe"); + var iframesLength = iframes.length; + for (var i = 0; i < iframesLength; i++) { + documents.push(iframes[i].contentWindow.document); + } } var documentsLength = documents.length; for (var j = 0; j < documentsLength; j++) { diff --git a/brozzler/js-templates/umbraBehavior18q4.js.j2 b/brozzler/js-templates/umbraBehavior18q4.js.j2 deleted file mode 100644 index 7c24a13..0000000 --- a/brozzler/js-templates/umbraBehavior18q4.js.j2 +++ /dev/null @@ -1,181 +0,0 @@ -/* - * brozzler/js-templates/umbrabehavior.js.j2 - an umbra/brozzler behavior class - * - * Copyright (C) 2017-2018 Internet Archive - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -class UmbraBehavior { - - constructor(actions) { - this.IDLE_TIMEOUT_SEC = 10; - this.actions = actions; - this.alreadyDone = []; - this.idleSince = null; - this.intervalId = null; - this.intervalTimeMs = {{interval or 300}}; - {% if skip_iframes %} - this.skipIframes = true; - {% else %} - this.skipIframes = false; - {% endif %} - this.index = 0; - } - - simpleIntervalFunc() { - // should match older default and simpleclicks behavior, and more - var k = this.index; - var selector = this.actions[k].selector; - var repeatSameElement = this.actions[k].repeatSameElement ? this.actions[k].repeatSameElement : false; - var firstMatchOnly = this.actions[k].firstMatchOnly ? this.actions[k].firstMatchOnly : false; - var action = this.actions[k].do ? this.actions[k].do : 'click'; - var closeSelector = this.actions[k].closeSelector ? this.actions[k].closeSelector : null; - var didSomething = false; - var somethingLeftAbove = false; - var somethingLeftBelow = false; - - var documents = []; - documents[0] = document; - if (!(this.skipIframes)) { - var iframes = document.querySelectorAll("iframe"); - var iframesLength = iframes.length; - for (var i = 0; i < iframesLength; i++) { - documents.push(iframes[i].contentWindow.document); - } - } - var documentsLength = documents.length; - for (var j = 0; j < documentsLength; j++) { - if (closeSelector) { - var closeTargets = documents[j].querySelectorAll(closeSelector); - if ((closeTargets.length > 0) && - (this.alreadyDone.indexOf(closeTargets[0]) === -1) && - (this.isVisible(closeTargets[0]))) { - doTarget(closeTargets[0], 'click'); - } - } - if (firstMatchOnly) { - var doTargets = [ documents[j].querySelector(selector) ]; - } else { - var doTargets = documents[j].querySelectorAll(selector); - } - var doTargetsLength = doTargets.length; - if (!(doTargetsLength > 0)) { - continue; - } - for ( var i = 0; i < doTargetsLength; i++) { - if (!repeatSameElement && this.alreadyDone.indexOf(doTargets[i]) > -1) { - continue; - } - if (!this.isVisible(doTargets[i])) { - continue; - } - var where = this.aboveBelowOrOnScreen(doTargets[i]); - if (where == 0) { - this.doTarget(doTargets[i], action); - didSomething = true; - break; - } else if (where > 0) { - somethingLeftBelow = true; - } else if (where < 0) { - somethingLeftAbove = true; - } - } - } - - if (!didSomething) { - if (somethingLeftAbove) { - window.scrollBy(0, -500); - this.idleSince = null; - } else if (somethingLeftBelow || ( (window.scrollY + window.innerHeight) < document.documentElement.scrollHeight)) { - window.scrollBy(0, 200); - this.idleSince = null; - } else if (this.idleSince == null) { - this.idleSince = Date.now(); - } - } - - if (!this.idleSince) { - this.idleSince = Date.now(); - } else { - var idleTimeMs = Date.now() - this.idleSince; - if ((idleTimeMs / 1000) > (this.IDLE_TIMEOUT_SEC - 1) && (this.index < (this.actions.length - 1))) { - console.log("ready for next action"); - this.index += 1; - this.idleSince = null; - window.scroll(0,0); - } - } - } - - aboveBelowOrOnScreen(elem) { - var eTop = elem.getBoundingClientRect().top; - if (eTop < window.scrollY) { - return -1; // above - } else if (eTop > window.scrollY + window.innerHeight) { - return 1; // below - } else { - return 0; // on screen - } - } - - isVisible(elem) { - return elem && !!(elem.offsetWidth || elem.offsetHeight || elem.getClientRects().length); - } - - doTarget(target, action) { - // console.log("doing " + action + target.outerHTML); - // do mouse over event on target - // since some urls are requsted only on - // this event - see - // https://webarchive.jira.com/browse/AITFIVE-451 - var mouseOverEvent = document.createEvent("Events"); - mouseOverEvent.initEvent("mouseover", true, false); - target.dispatchEvent(mouseOverEvent); - - if (action == "click") { - target.click(); - } // add new do's here! - - this.alreadyDone.push(target); - this.idleSince = null; - } - - start() { - var that = this; - this.intervalId = setInterval(function() { - that.simpleIntervalFunc() - }, this.intervalTimeMs); - } - - isFinished() { - if (this.idleSince != null) { - var idleTimeMs = Date.now() - this.idleSince; - if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) { - clearInterval(this.intervalId); - return true; - } - } - return false; - } -} - -var umbraBehavior = new UmbraBehavior( {{actions|json}} ); - -// Called from outside of this script. -var umbraBehaviorFinished = function() { - return umbraBehavior.isFinished(); -}; - -umbraBehavior.start();