From 770ea6de1e4e63bdf6f82be9914e0c2c7c57ac4b Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Thu, 10 Jan 2019 15:54:47 -0800 Subject: [PATCH 1/3] no more simpleclicks/mouseovers --- brozzler/behaviors.yaml | 99 ++++++++++++++++++----------------------- 1 file changed, 43 insertions(+), 56 deletions(-) diff --git a/brozzler/behaviors.yaml b/brozzler/behaviors.yaml index 646af5e..1ba1a92 100644 --- a/brozzler/behaviors.yaml +++ b/brozzler/behaviors.yaml @@ -72,11 +72,10 @@ request_idle_timeout_sec: 10 - url_regex: '^https?://(?:www\.)?brooklynmuseum\.org/exhibitions/.*$' - behavior_js_template: simpleclicks.js.j2 + behavior_js_template: umbraBehavior.js.j2 default_parameters: - click_css_selector: img.img-responsive - click_until_hard_timeout: False - request_idle_timeout_sec: 10 + actions: + - selector: img.img-responsive - # https://webarchive.jira.com/browse/ARI-5517 url_regex: '^https?://(?:www\.)?thejewishnews.com/.*$' behavior_js_template: umbraBehavior.js.j2 @@ -85,11 +84,10 @@ - selector: a#get_more - # acalog https://webarchive.jira.com/browse/ARI-3775 url_regex: '^https?://.*[?&]catoid=[^?]*$' - behavior_js_template: simpleclicks.js.j2 + behavior_js_template: umbraBehavior.js.j2 default_parameters: - click_css_selector: a[onclick] - click_until_hard_timeout: False - request_idle_timeout_sec: 10 + actions: + - selector: a[onclick] - # https://webarchive.jira.com/browse/ARI-5294 url_regex: '^https?://citymedfordwi\.civicweb\.net/.*$' behavior_js_template: umbraBehavior.js.j2 @@ -98,53 +96,46 @@ - selector: div.meeting-document-type-buttons button.button-small - # https://webarchive.jira.com/browse/ARI-5409 url_regex: '^https?://(?:www\.)?tuebingen.de/.*$' - behavior_js_template: simpleclicks.js.j2 + behavior_js_template: umbraBehavior.js.j2 default_parameters: - click_css_selector: a.kl - click_until_hard_timeout: False - request_idle_timeout_sec: 10 + actions: + - selector: a.kl - # https://webarchive.jira.com/browse/ARI-3956 url_regex: '^https?://(?:www\.)?usask.ca/.*$' - behavior_js_template: simpleclicks.js.j2 + behavior_js_template: umbraBehavior.js.j2 default_parameters: - click_css_selector: a[id='feature-next'] - click_until_hard_timeout: False - request_idle_timeout_sec: 10 + actions: + - selector: a[id='feature-next'] - # https://webarchive.jira.com/browse/AITFIVE-451 url_regex: '^https?://(?:www\.)?soundcloud.com/.*$' - behavior_js_template: simpleclicks.js.j2 + behavior_js_template: umbraBehavior.js.j2 default_parameters: - click_css_selector: button.sc-button-play, .playButton, div.compactTrackListItem - click_until_hard_timeout: False - request_idle_timeout_sec: 10 + actions: + - selector: button.sc-button-play, .playButton, div.compactTrackListItem - # https://webarchive.jira.com/browse/AITFIVE-463 url_regex: '^https?://(?:www\.)?christophercerrone.com/.*$' - behavior_js_template: simpleclicks.js.j2 + behavior_js_template: umbraBehavior.js.j2 default_parameters: - click_css_selector: button.playButton.medium - click_until_hard_timeout: False - request_idle_timeout_sec: 10 + actions: + - selector: button.playButton.medium - # https://webarchive.jira.com/browse/ARI-4690 url_regex: '^https?://(?:www\.)?youtube.com/.*$' - behavior_js_template: simpleclicks.js.j2 + behavior_js_template: umbraBehavior.js.j2 default_parameters: - click_css_selector: span.load-more-text - click_until_hard_timeout: False - request_idle_timeout_sec: 10 + actions: + - selector: span.load-more-text - # https://webarchive.jira.com/browse/ARI-5453 url_regex: '^https?://.*\.wixsite.com/.*$' - behavior_js_template: simpleclicks.js.j2 + behavior_js_template: umbraBehavior.js.j2 default_parameters: - click_css_selector: .ddm1repeaterButtonlabel - click_until_hard_timeout: False - request_idle_timeout_sec: 10 + actions: + - selector: .ddm1repeaterButtonlabel - # https://webarchive.jira.com/browse/ARI-4725 url_regex: '^https?://(?:www\.)?moma.org/.*$' - behavior_js_template: simpleclicks.js.j2 + behavior_js_template: umbraBehavior.js.j2 default_parameters: - click_css_selector: button[data-more-results-bottom-button] - click_until_hard_timeout: True - request_idle_timeout_sec: 10 + actions: + - selector: button[data-more-results-bottom-button] - # https://webarchive.jira.com/browse/ARI-4692 url_regex: '^https?://(?:www\.)?fec.gov/data/.*$' behavior_js_template: fec_gov.js @@ -157,39 +148,35 @@ do: mouseover - # https://webarchive.jira.com/browse/ARI-5259 url_regex: '^https?://blog\.sina\.com\.cn/.*$' - behavior_js_template: simpleclicks.js.j2 + behavior_js_template: umbraBehavior.js.j2 default_parameters: - click_css_selector: li.SG_pgnext a - click_until_hard_timeout: False - request_idle_timeout_sec: 10 + actions: + - selector: li.SG_pgnext a - # https://webarchive.jira.com/browse/ARI-5313 url_regex: '^https?://.*\.ky\.gov/.*$' - behavior_js_template: mouseovers.js.j2 + behavior_js_template: umbraBehavior.js.j2 default_parameters: - mouseover_css_selector: .zz1_AgencyListingMenu_1 - mouseover_until_hard_timeout: False - request_idle_timeout_sec: 10 + actions: + - selector: .zz1_AgencyListingMenu_1 + do: mouseover - # https://webarchive.jira.com/browse/ARI-5384 url_regex: '^https?://issuu\.com/.*$' - behavior_js_template: simpleclicks.js.j2 + behavior_js_template: umbraBehavior.js.j2 default_parameters: - click_css_selector: button#ird3-button-next - click_until_hard_timeout: True - request_idle_timeout_sec: 10 + actions: + - selector: button#ird3-button-next - # https://webarchive.jira.com/browse/ARI-5241 url_regex: '^https?://(?:www\.)?colonialart\.org/.*$' - behavior_js_template: simpleclicks.js.j2 + behavior_js_template: umbraBehavior.js.j2 default_parameters: - click_css_selector: img.link-overlay - click_until_hard_timeout: False - request_idle_timeout_sec: 10 + actions: + - selector: img.link-overlay - # https://webarchive.jira.com/browse/ARI-4960 url_regex: '^https?://(?:www\.)?fortstjames.ca/community-events-calendar/$' - behavior_js_template: simpleclicks.js.j2 + behavior_js_template: umbraBehavior.js.j2 default_parameters: - click_css_selector: img#navForward1 - click_until_hard_timeout: True - request_idle_timeout_sec: 10 + actions: + - selector: img#navForward1 - # default fallback behavior url_regex: '^.*$' behavior_js_template: umbraBehavior.js.j2 From 9001156b54269754befa63b49b54f0c98cd7c41c Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Thu, 10 Jan 2019 15:58:38 -0800 Subject: [PATCH 2/3] rm simpleclicks.js.j2 mouseovers.js.j2 --- brozzler/js-templates/mouseovers.js.j2 | 133 --------------------- brozzler/js-templates/simpleclicks.js.j2 | 142 ----------------------- 2 files changed, 275 deletions(-) delete mode 100644 brozzler/js-templates/mouseovers.js.j2 delete mode 100644 brozzler/js-templates/simpleclicks.js.j2 diff --git a/brozzler/js-templates/mouseovers.js.j2 b/brozzler/js-templates/mouseovers.js.j2 deleted file mode 100644 index 8521387..0000000 --- a/brozzler/js-templates/mouseovers.js.j2 +++ /dev/null @@ -1,133 +0,0 @@ -/* - * brozzler/behaviors.d/mouseovers.js.in - mouseovers behavior template, - * mouseovers on elements matching templatized css selector - * - * Copyright (C) 2014-2016 Internet Archive - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -var umbraBehavior = { - IDLE_TIMEOUT_SEC : 10, - idleSince : null, - alreadyMouseovered : {}, - - intervalFunc : function() { - var mouseoveredSomething = false; - var somethingLeftBelow = false; - var somethingLeftAbove = false; - var cssSelector = {{mouseover_css_selector|json}}; - var mouseoverUntilTimeout = {{mouseover_until_hard_timeout|json}}; - - var iframes = document.querySelectorAll("iframe"); - var documents = Array(iframes.length + 1); - documents[0] = document; - - for (var i = 0; i < iframes.length; i++) { - documents[i+1] = iframes[i].contentWindow.document; - } - - for (var j = 0; j < documents.length; j++) { - - var mouseoverTargets = documents[j].querySelectorAll(cssSelector); - - for ( var i = 0; i < mouseoverTargets.length; i++) { - if (mouseoverTargets[i].umbraMouseovered && !mouseoverUntilTimeout) { - continue; - } - - var where = this.aboveBelowOrOnScreen(mouseoverTargets[i]); - - if (where == 0) { - console.log("mouseovering on " + mouseoverTargets[i].outerHTML); - // do mouse over event on mouseover target - // since some urls are requsted only on - // this event - see - // https://webarchive.jira.com/browse/AITFIVE-451 - var mouseOverEvent = document.createEvent('Events'); - mouseOverEvent.initEvent("mouseover",true, false); - mouseoverTargets[i].dispatchEvent(mouseOverEvent); - mouseoveredSomething = true; - this.idleSince = null; - mouseoverTargets[i].umbraMouseovered = true; - - break; //break from mouseoverTargets loop, but not from iframe loop - } else if (where > 0) { - somethingLeftBelow = true; - } else if (where < 0) { - somethingLeftAbove = true; - } - } - } - - if (!mouseoveredSomething) { - if (somethingLeftAbove) { - // console.log("scrolling UP because everything on this screen has been mouseovered but we missed something above"); - window.scrollBy(0, -500); - this.idleSince = null; - } else if (somethingLeftBelow) { - // console.log("scrolling because everything on this screen has been mouseovered but there's more below document.body.clientHeight=" - // + document.body.clientHeight); - window.scrollBy(0, 200); - this.idleSince = null; - } else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) { - // console.log("scrolling because we're not to the bottom yet document.body.clientHeight=" - // + document.body.clientHeight); - window.scrollBy(0, 200); - this.idleSince = null; - } else if (this.idleSince == null) { - this.idleSince = Date.now(); - } - } - - if (!this.idleSince) { - this.idleSince = Date.now(); - } - }, - - start : function() { - var that = this; - this.intervalId = setInterval(function() { - that.intervalFunc() - }, 250); - }, - - isFinished : function() { - if (this.idleSince != null) { - var idleTimeMs = Date.now() - this.idleSince; - if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) { - clearInterval(this.intervalId); - return true; - } - } - return false; - }, - - aboveBelowOrOnScreen : function(e) { - var eTop = e.getBoundingClientRect().top; - if (eTop < window.scrollY) { - return -1; // above - } else if (eTop > window.scrollY + window.innerHeight) { - return 1; // below - } else { - return 0; // on screen - } - }, -}; - -// Called from outside of this script. -var umbraBehaviorFinished = function() { - return umbraBehavior.isFinished() -}; - -umbraBehavior.start(); diff --git a/brozzler/js-templates/simpleclicks.js.j2 b/brozzler/js-templates/simpleclicks.js.j2 deleted file mode 100644 index 14652b8..0000000 --- a/brozzler/js-templates/simpleclicks.js.j2 +++ /dev/null @@ -1,142 +0,0 @@ -/* - * brozzler/behaviors.d/simpleclicks.js.in - simpleclicks behavior template, - * clicks on elements matching templatized css selector - * - * Copyright (C) 2014-2016 Internet Archive - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -var umbraBehavior = { - IDLE_TIMEOUT_SEC : 10, - idleSince : null, - alreadyClicked : {}, - - // https://github.com/jquery/jquery/blob/master/src/css/hiddenVisibleSelectors.js - // n.b. returns true for elements with visibility:hidden, which occupy - // screen real estate but are not visible, or clickable with the ui - isVisible : function(elem) { - return !!(elem.offsetWidth || elem.offsetHeight || elem.getClientRects().length); - }, - - intervalFunc : function() { - var clickedSomething = false; - var somethingLeftBelow = false; - var somethingLeftAbove = false; - var cssSelector = {{click_css_selector|json}}; - var clickUntilTimeout = {{click_until_hard_timeout|json}}; - - var iframes = document.querySelectorAll("iframe"); - var documents = Array(iframes.length + 1); - documents[0] = document; - - for (var i = 0; i < iframes.length; i++) { - documents[i+1] = iframes[i].contentWindow.document; - } - - for (var j = 0; j < documents.length; j++) { - var clickTargets = documents[j].querySelectorAll(cssSelector); - for ( var i = 0; i < clickTargets.length; i++) { - if (!this.isVisible(clickTargets[i])) { - continue; - } - if (clickTargets[i].umbraClicked && !clickUntilTimeout) { - continue; - } - - var where = this.aboveBelowOrOnScreen(clickTargets[i]); - - if (where == 0) { - console.log("clicking on " + clickTargets[i].outerHTML); - // do mouse over event on click target - // since some urls are requsted only on - // this event - see - // https://webarchive.jira.com/browse/AITFIVE-451 - var mouseOverEvent = document.createEvent('Events'); - mouseOverEvent.initEvent("mouseover",true, false); - clickTargets[i].dispatchEvent(mouseOverEvent); - clickTargets[i].click(); - clickedSomething = true; - this.idleSince = null; - clickTargets[i].umbraClicked = true; - - break; //break from clickTargets loop, but not from iframe loop - } else if (where > 0) { - somethingLeftBelow = true; - } else if (where < 0) { - somethingLeftAbove = true; - } - } - } - - if (!clickedSomething) { - if (somethingLeftAbove) { - // console.log("scrolling UP because everything on this screen has been clicked but we missed something above"); - window.scrollBy(0, -500); - this.idleSince = null; - } else if (somethingLeftBelow) { - // console.log("scrolling because everything on this screen has been clicked but there's more below document.body.clientHeight=" - // + document.body.clientHeight); - window.scrollBy(0, 200); - this.idleSince = null; - } else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) { - // console.log("scrolling because we're not to the bottom yet document.body.clientHeight=" - // + document.body.clientHeight); - window.scrollBy(0, 200); - this.idleSince = null; - } else if (this.idleSince == null) { - this.idleSince = Date.now(); - } - } - - if (!this.idleSince) { - this.idleSince = Date.now(); - } - }, - - start : function() { - var that = this; - this.intervalId = setInterval(function() { - that.intervalFunc() - }, 250); - }, - - isFinished : function() { - if (this.idleSince != null) { - var idleTimeMs = Date.now() - this.idleSince; - if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) { - clearInterval(this.intervalId); - return true; - } - } - return false; - }, - - aboveBelowOrOnScreen : function(e) { - var eTop = e.getBoundingClientRect().top; - if (eTop < window.scrollY) { - return -1; // above - } else if (eTop > window.scrollY + window.innerHeight) { - return 1; // below - } else { - return 0; // on screen - } - }, -}; - -// Called from outside of this script. -var umbraBehaviorFinished = function() { - return umbraBehavior.isFinished() -}; - -umbraBehavior.start(); From f6ffb4acea2e68e648d07f44c4261f9fd90a2f10 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Thu, 10 Jan 2019 16:11:24 -0800 Subject: [PATCH 3/3] update (C) --- brozzler/behaviors.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/brozzler/behaviors.yaml b/brozzler/behaviors.yaml index 1ba1a92..e792e69 100644 --- a/brozzler/behaviors.yaml +++ b/brozzler/behaviors.yaml @@ -1,7 +1,7 @@ # # brozzler/behaviors.yaml - behavior configuration # -# Copyright (C) 2014-2018 Internet Archive +# Copyright (C) 2014-2019 Internet Archive # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License.