From 6c21a9f77319b9dbe00e230f3aa7ee0b702bd019 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Sun, 9 Dec 2018 14:25:59 -0800 Subject: [PATCH] iframe option and other instagram updates --- brozzler/behaviors.yaml | 8 +- brozzler/js-templates/umbraBehavior18q4.js.j2 | 177 ++++++++++++++++++ 2 files changed, 182 insertions(+), 3 deletions(-) create mode 100644 brozzler/js-templates/umbraBehavior18q4.js.j2 diff --git a/brozzler/behaviors.yaml b/brozzler/behaviors.yaml index 28cceba..6c46c7a 100644 --- a/brozzler/behaviors.yaml +++ b/brozzler/behaviors.yaml @@ -23,11 +23,13 @@ request_idle_timeout_sec: 30 - url_regex: '^https?://(?:www\.)?instagram\.com/.*$' - behavior_js_template: umbraBehavior.js.j2 + behavior_js_template: umbraBehavior18q4.js.j2 default_parameters: + interval: 500 + iframes: false actions: - - selector: a.coreSpriteDismissLarge - - selector: a>div[role='button'] + - selector: button.coreSpriteDismissLarge + - selector: 'a>.eLAPa>.KL4Bh' firstMatchOnly: true - selector: a.coreSpriteRightPaginationArrow repeatSameElement: true diff --git a/brozzler/js-templates/umbraBehavior18q4.js.j2 b/brozzler/js-templates/umbraBehavior18q4.js.j2 new file mode 100644 index 0000000..100b5b7 --- /dev/null +++ b/brozzler/js-templates/umbraBehavior18q4.js.j2 @@ -0,0 +1,177 @@ +/* + * brozzler/js-templates/umbrabehavior.js.j2 - an umbra/brozzler behavior class + * + * Copyright (C) 2017-2018 Internet Archive + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +class UmbraBehavior { + + constructor(actions) { + this.IDLE_TIMEOUT_SEC = 10; + this.actions = actions; + this.alreadyDone = []; + this.idleSince = null; + this.intervalId = null; + this.intervalTimeMs = {{interval or 300}}; + this.doIframes = {{iframes or true}}; + this.index = 0; + } + + simpleIntervalFunc() { + // should match older default and simpleclicks behavior, and more + var k = this.index; + var selector = this.actions[k].selector; + var repeatSameElement = this.actions[k].repeatSameElement ? this.actions[k].repeatSameElement : false; + var firstMatchOnly = this.actions[k].firstMatchOnly ? this.actions[k].firstMatchOnly : false; + var action = this.actions[k].do ? this.actions[k].do : 'click'; + var closeSelector = this.actions[k].closeSelector ? this.actions[k].closeSelector : null; + var didSomething = false; + var somethingLeftAbove = false; + var somethingLeftBelow = false; + + var documents = []; + documents[0] = document; + if (this.doIframes) { + var iframes = document.querySelectorAll("iframe"); + var iframesLength = iframes.length; + for (var i = 0; i < iframesLength; i++) { + documents.push(iframes[i].contentWindow.document); + } + } + var documentsLength = documents.length; + for (var j = 0; j < documentsLength; j++) { + if (closeSelector) { + var closeTargets = documents[j].querySelectorAll(closeSelector); + if ((closeTargets.length > 0) && + (this.alreadyDone.indexOf(closeTargets[0]) === -1) && + (this.isVisible(closeTargets[0]))) { + doTarget(closeTargets[0], 'click'); + } + } + if (firstMatchOnly) { + var doTargets = [ documents[j].querySelector(selector) ]; + } else { + var doTargets = documents[j].querySelectorAll(selector); + } + var doTargetsLength = doTargets.length; + if (!(doTargetsLength > 0)) { + continue; + } + for ( var i = 0; i < doTargetsLength; i++) { + if (!repeatSameElement && this.alreadyDone.indexOf(doTargets[i]) > -1) { + continue; + } + if (!this.isVisible(doTargets[i])) { + continue; + } + var where = this.aboveBelowOrOnScreen(doTargets[i]); + if (where == 0) { + this.doTarget(doTargets[i], action); + didSomething = true; + break; + } else if (where > 0) { + somethingLeftBelow = true; + } else if (where < 0) { + somethingLeftAbove = true; + } + } + } + + if (!didSomething) { + if (somethingLeftAbove) { + window.scrollBy(0, -500); + this.idleSince = null; + } else if (somethingLeftBelow || ( (window.scrollY + window.innerHeight) < document.documentElement.scrollHeight)) { + window.scrollBy(0, 200); + this.idleSince = null; + } else if (this.idleSince == null) { + this.idleSince = Date.now(); + } + } + + if (!this.idleSince) { + this.idleSince = Date.now(); + } else { + var idleTimeMs = Date.now() - this.idleSince; + if ((idleTimeMs / 1000) > (this.IDLE_TIMEOUT_SEC - 1) && (this.index < (this.actions.length - 1))) { + console.log("ready for next action"); + this.index += 1; + this.idleSince = null; + window.scroll(0,0); + } + } + } + + aboveBelowOrOnScreen(elem) { + var eTop = elem.getBoundingClientRect().top; + if (eTop < window.scrollY) { + return -1; // above + } else if (eTop > window.scrollY + window.innerHeight) { + return 1; // below + } else { + return 0; // on screen + } + } + + isVisible(elem) { + return elem && !!(elem.offsetWidth || elem.offsetHeight || elem.getClientRects().length); + } + + doTarget(target, action) { + // console.log("doing " + action + target.outerHTML); + // do mouse over event on target + // since some urls are requsted only on + // this event - see + // https://webarchive.jira.com/browse/AITFIVE-451 + var mouseOverEvent = document.createEvent("Events"); + mouseOverEvent.initEvent("mouseover", true, false); + target.dispatchEvent(mouseOverEvent); + + if (action == "click") { + target.click(); + } // add new do's here! + + this.alreadyDone.push(target); + this.idleSince = null; + } + + start() { + var that = this; + this.intervalId = setInterval(function() { + that.simpleIntervalFunc() + }, this.intervalTimeMs); + } + + isFinished() { + if (this.idleSince != null) { + var idleTimeMs = Date.now() - this.idleSince; + if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) { + clearInterval(this.intervalId); + return true; + } + } + return false; + } +} + +var umbraBehavior = new UmbraBehavior( {{actions|json}} ); + +// Called from outside of this script. +var umbraBehaviorFinished = function() { + return umbraBehavior.isFinished(); +}; + +umbraBehavior.start();