From f5ed91de6e7a4daade8f9f6d900d528e07faca5c Mon Sep 17 00:00:00 2001 From: Corentin Barreau Date: Fri, 27 Sep 2019 21:57:35 +0200 Subject: [PATCH 1/6] Replace facebook.js with behaviors.yaml --- brozzler/behaviors.yaml | 7 +- brozzler/js-templates/facebook.js | 194 ------------------------------ 2 files changed, 6 insertions(+), 195 deletions(-) delete mode 100644 brozzler/js-templates/facebook.js diff --git a/brozzler/behaviors.yaml b/brozzler/behaviors.yaml index 7771ed0..46413ca 100644 --- a/brozzler/behaviors.yaml +++ b/brozzler/behaviors.yaml @@ -28,8 +28,13 @@ closeSelector: 'div._7lq1 > button' - url_regex: '^https?://(?:www\.)?facebook\.com/.*$' - behavior_js_template: facebook.js + behavior_js_template: umbraBehavior.js.j2 request_idle_timeout_sec: 30 + default_parameters: + interval: 500 + actions: + - selector: 'a[id="expanding_cta_close_button"], a[rel="theater"]' + closeSelector: 'a._xlt' - url_regex: '^https?://(?:www\.)?instagram\.com/.*$' behavior_js_template: umbraBehavior.js.j2 diff --git a/brozzler/js-templates/facebook.js b/brozzler/js-templates/facebook.js deleted file mode 100644 index c5c1770..0000000 --- a/brozzler/js-templates/facebook.js +++ /dev/null @@ -1,194 +0,0 @@ -/* - * brozzler/behaviors.d/facebook.js - facebook behavior, scrolls to the bottom - * of the page, clicks to expand images, a few other things - * - * Copyright (C) 2014-2016 Internet Archive - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -var umbraAboveBelowOrOnScreen = function(e) { - var eTop = e.getBoundingClientRect().top; - if (eTop < window.scrollY) { - return -1; // above - } else if (eTop > window.scrollY + window.innerHeight) { - // if (e.clientWidth != 0) { - // console.warn("e.clientWidth=" + e.clientWidth + " though it appears to be below the screen? e.getBoundingClientRect().top=" + eTop + " window.scrollY=" + window.scrollY + " window.innerHeight=" + window.innerHeight + " e=" + e); - // } - return 1; // below - } else { - // if (e.clientWidth != 0) { - // console.warn("e.clientWidth=" + e.clientWidth + " though it appears to be on screen? e.getBoundingClientRect().top=" + eTop + " window.scrollY=" + window.scrollY + " window.innerHeight=" + window.innerHeight + " e=" + e); - // } - return 0; // on screen - } -} - -// comments - 'a.UFIPagerLink > span, a.UFIPagerLink, span.UFIReplySocialSentenceLinkText' -var UMBRA_THINGS_TO_CLICK_SELECTOR = 'a[href^="/browse/likes"], *[rel="theater"]'; -//div[class="phm pluginLikeboxStream"] = facebook widget embedded in 3rd party pages -var UMBRA_THINGS_TO_SCROLL_SELECTOR = 'div[class="phm pluginLikeboxStream"]'; -var NUMBER_FAILED_SCROLL_ATTEMPTS_ON_THING_TO_SCROLL_BEFORE_STOP_SCROLLING = 5; -var umbraAlreadyClicked = {}; -var umbraAlreadyScrolledThing = {}; -var umbraScrolledThingFailedScrollAttempts = {}; -var umbraState = {'idleSince':null,'expectingSomething':null,'bottomReachedScrollY':0}; - -var umbraIntervalFunc = function() { - - var thingsToScroll = document.querySelectorAll(UMBRA_THINGS_TO_SCROLL_SELECTOR); - var everythingScrolled = true; - - for (var i = 0; i < thingsToScroll.length; i++) { - var target = thingsToScroll[i]; - - if (!(target in umbraAlreadyScrolledThing)) { - - everythingScrolled = false; - - console.log("scrolling to " + target.scrollHeight + " on element with nodeName " + target.nodeName + " with id of " + target.id); - var lastScrollTop = target.scrollTop; - target.scrollTop = target.scrollHeight; - - umbraState.idleSince = null; - - if (target.scrollTop >= target.scrollHeight) { - umbraAlreadyScrolledThing[target] = true; - } - else if (target.scrollTop == lastScrollTop) { - if (umbraScrolledThingFailedScrollAttempts[target]) { - umbraScrolledThingFailedScrollAttempts[target]++; - } - else { - umbraScrolledThingFailedScrollAttempts[target] = 1; - } - - if (umbraScrolledThingFailedScrollAttempts[target] >= NUMBER_FAILED_SCROLL_ATTEMPTS_ON_THING_TO_SCROLL_BEFORE_STOP_SCROLLING) { - umbraAlreadyScrolledThing[target] = true; - } - } - else { - //reset failed count on a successful scroll - umbraScrolledThingFailedScrollAttempts[target] = 0; - } - } - else { - console.log("done scrolling for element with nodeName " + target.nodeName + " with id of " + target.id) - } - - umbraState.expectingSomething = null; - } - - if (thingsToScroll && thingsToScroll.length > 0 && everythingScrolled) { - if (umbraState.idleSince == null) { - umbraState.idleSince = Date.now(); - } - - return; - } - - var closeButtons = document.querySelectorAll('a[title="Close"], a.closeTheater, a[aria-label="Press Esc to close"], div.fbPhotoSnowlift.fbxPhoto a._xlt'); - for (var i = 0; i < closeButtons.length; i++) { - // XXX closeTheater buttons stick around in the dom after closing, clientWidth>0 is one way to check if they're visible - if (closeButtons[i].clientWidth > 0) { - if (umbraState.expectingSomething == 'closeButton') { - console.log("found expected close button, clicking on it " + closeButtons[i].outerHTML); - umbraState.expectingSomething = null; - } else { - console.warn("found UNexpected close button, umbraState.expectingSomething=" + umbraState.expectingSomething + " ... clicking on it " + closeButtons[i].outerHTML); - } - closeButtons[i].click(); - return; - } - } - if (umbraState.expectingSomething == 'closeButton') { - console.log("waiting for close button, haven't seen it yet"); - return; - } - - var thingsToClick = document.querySelectorAll(UMBRA_THINGS_TO_CLICK_SELECTOR); - var clickedSomething = false; - var somethingLeftBelow = false; - var somethingLeftAbove = false; - var missedAbove = 0; - - for (var i = 0; i < thingsToClick.length; i++) { - var target = thingsToClick[i]; - if (!(target in umbraAlreadyClicked)) { - var where = umbraAboveBelowOrOnScreen(target); - if (where == 0) { // on screen - // var pos = target.getBoundingClientRect().top; - // window.scrollTo(0, target.getBoundingClientRect().top - 100); - console.log("clicking at " + target.getBoundingClientRect().top + " on " + target.outerHTML); - if (target.click != undefined) { - umbraState.expectingSomething = 'closeButton'; - target.click(); - } - target.style.border = '1px solid #0a0'; - umbraAlreadyClicked[target] = true; - clickedSomething = true; - umbraState.idleSince = null; - break; - } else if (where > 0) { - somethingLeftBelow = true; - } else if (where < 0) { - somethingLeftAbove = true; - } - } - } - - if (window.scrollY > umbraState.bottomReachedScrollY) { - umbraState.bottomReachedScrollY = window.scrollY; - } - - if (!clickedSomething) { - if (somethingLeftBelow) { - // console.log("scrolling down because everything on this screen has been clicked but there's more below document.body.clientHeight=" + document.body.clientHeight); - window.scrollBy(0, 300); - umbraState.idleSince = null; - } else if (umbraState.bottomReachedScrollY + window.innerHeight < document.documentElement.scrollHeight) { - // console.log("scrolling down because we haven't reached the bottom yet document.body.clientHeight=" + document.body.clientHeight); - window.scrollBy(0, 300); - umbraState.idleSince = null; - } else if (somethingLeftAbove) { - // console.log("scrolling UP because we've already been to the bottom, everything on or below this screen has been clicked, but we missed something above"); - window.scrollBy(0, -600); - umbraState.idleSince = null; - } else if (umbraState.idleSince == null) { - umbraState.idleSince = Date.now(); - } - } -} - -// If we haven't had anything to do (scrolled, clicked, etc) in this amount of -// time, then we consider ourselves finished with the page. -var UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC = 10; - -// Called from outside of this script. -var umbraBehaviorFinished = function() { - - if (umbraState.idleSince != null) { - var idleTimeMs = Date.now() - umbraState.idleSince; - if (idleTimeMs / 1000 > UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC) { - clearInterval(umbraIntervalId); - return true; - } - } - return false; -} - -if (document.querySelector("div.captcha_interstitial") != null) { // found a captcha - console.log("captcha found for " + location.href); -} - -var umbraIntervalId = setInterval(umbraIntervalFunc, 200); From 473fd9e3936be5b179bf7a0b6091bef91fade0c0 Mon Sep 17 00:00:00 2001 From: Corentin Barreau Date: Thu, 31 Oct 2019 23:02:59 +0100 Subject: [PATCH 2/6] Change interval speed --- brozzler/js-templates/umbraBehavior.js.j2 | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/brozzler/js-templates/umbraBehavior.js.j2 b/brozzler/js-templates/umbraBehavior.js.j2 index 7931a62..50ae065 100644 --- a/brozzler/js-templates/umbraBehavior.js.j2 +++ b/brozzler/js-templates/umbraBehavior.js.j2 @@ -80,13 +80,13 @@ class UmbraBehavior { continue; } - for ( var i = 0; i < doTargetsLength; i++) { + for (var i = 0; i < doTargetsLength; i++) { if (!repeatSameElement && this.alreadyDone.indexOf(doTargets[i]) > -1) { continue; } - if (!this.isVisible(doTargets[i])) { - continue; - } + //if (!this.isVisible(doTargets[i])) { + // continue; + //} var where = this.aboveBelowOrOnScreen(doTargets[i]); if (where == 0) { this.doTarget(doTargets[i], action); @@ -101,10 +101,7 @@ class UmbraBehavior { } if (!didSomething) { - if (somethingLeftAbove) { - window.scrollBy(0, -500); - this.idleSince = null; - } else if (somethingLeftBelow || ( (window.scrollY + window.innerHeight) < document.documentElement.scrollHeight)) { + if (somethingLeftBelow || ( (window.scrollY + window.innerHeight) < document.documentElement.scrollHeight)) { window.scrollBy(0, 200); this.idleSince = null; } else if (this.idleSince == null) { From c3e4597d1ad3b20ea20b1dc4cefc8de4f35da7a6 Mon Sep 17 00:00:00 2001 From: Corentin Barreau Date: Thu, 31 Oct 2019 23:04:50 +0100 Subject: [PATCH 3/6] Revert "Change interval speed" This reverts commit 473fd9e3936be5b179bf7a0b6091bef91fade0c0. --- brozzler/js-templates/umbraBehavior.js.j2 | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/brozzler/js-templates/umbraBehavior.js.j2 b/brozzler/js-templates/umbraBehavior.js.j2 index 50ae065..7931a62 100644 --- a/brozzler/js-templates/umbraBehavior.js.j2 +++ b/brozzler/js-templates/umbraBehavior.js.j2 @@ -80,13 +80,13 @@ class UmbraBehavior { continue; } - for (var i = 0; i < doTargetsLength; i++) { + for ( var i = 0; i < doTargetsLength; i++) { if (!repeatSameElement && this.alreadyDone.indexOf(doTargets[i]) > -1) { continue; } - //if (!this.isVisible(doTargets[i])) { - // continue; - //} + if (!this.isVisible(doTargets[i])) { + continue; + } var where = this.aboveBelowOrOnScreen(doTargets[i]); if (where == 0) { this.doTarget(doTargets[i], action); @@ -101,7 +101,10 @@ class UmbraBehavior { } if (!didSomething) { - if (somethingLeftBelow || ( (window.scrollY + window.innerHeight) < document.documentElement.scrollHeight)) { + if (somethingLeftAbove) { + window.scrollBy(0, -500); + this.idleSince = null; + } else if (somethingLeftBelow || ( (window.scrollY + window.innerHeight) < document.documentElement.scrollHeight)) { window.scrollBy(0, 200); this.idleSince = null; } else if (this.idleSince == null) { From 9b5472380228d53676675bb1a2f081cca069630d Mon Sep 17 00:00:00 2001 From: Corentin Barreau Date: Thu, 31 Oct 2019 23:05:54 +0100 Subject: [PATCH 4/6] Change interval speed --- brozzler/behaviors.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/brozzler/behaviors.yaml b/brozzler/behaviors.yaml index 46413ca..5297e2d 100644 --- a/brozzler/behaviors.yaml +++ b/brozzler/behaviors.yaml @@ -31,9 +31,9 @@ behavior_js_template: umbraBehavior.js.j2 request_idle_timeout_sec: 30 default_parameters: - interval: 500 + interval: 200 actions: - - selector: 'a[id="expanding_cta_close_button"], a[rel="theater"]' + - selector: a[rel="theater"], a[id="expanding_cta_close_button"] closeSelector: 'a._xlt' - url_regex: '^https?://(?:www\.)?instagram\.com/.*$' From 06fba51b7fb626c135ea4070b56702d4aaed3d17 Mon Sep 17 00:00:00 2001 From: Corentin Barreau Date: Wed, 6 Nov 2019 14:11:19 +0100 Subject: [PATCH 5/6] Restore 500ms interval speed --- brozzler/behaviors.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/brozzler/behaviors.yaml b/brozzler/behaviors.yaml index 5297e2d..8decab7 100644 --- a/brozzler/behaviors.yaml +++ b/brozzler/behaviors.yaml @@ -31,7 +31,7 @@ behavior_js_template: umbraBehavior.js.j2 request_idle_timeout_sec: 30 default_parameters: - interval: 200 + interval: 500 actions: - selector: a[rel="theater"], a[id="expanding_cta_close_button"] closeSelector: 'a._xlt' From 0c7e93c941c391853bde32ea3f03ce4471663ba9 Mon Sep 17 00:00:00 2001 From: Corentin Barreau Date: Sat, 16 Nov 2019 02:11:05 +0100 Subject: [PATCH 6/6] Remove custom interval --- brozzler/behaviors.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/brozzler/behaviors.yaml b/brozzler/behaviors.yaml index 8decab7..7e178ef 100644 --- a/brozzler/behaviors.yaml +++ b/brozzler/behaviors.yaml @@ -31,7 +31,6 @@ behavior_js_template: umbraBehavior.js.j2 request_idle_timeout_sec: 30 default_parameters: - interval: 500 actions: - selector: a[rel="theater"], a[id="expanding_cta_close_button"] closeSelector: 'a._xlt'