From b30cc2d68bca0bad1b955cf9a9e655fa5d55b1af Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Mon, 14 Sep 2015 17:57:01 -0700 Subject: [PATCH 1/9] simpler implementation for https://github.com/internetarchive/umbra/pull/42/files --- umbra/behaviors.d/simpleclicks.js.in | 81 +++++++++++++++++----------- umbra/behaviors.yaml | 11 ++++ 2 files changed, 60 insertions(+), 32 deletions(-) diff --git a/umbra/behaviors.d/simpleclicks.js.in b/umbra/behaviors.d/simpleclicks.js.in index d97bac5..bc5d8ac 100644 --- a/umbra/behaviors.d/simpleclicks.js.in +++ b/umbra/behaviors.d/simpleclicks.js.in @@ -1,41 +1,58 @@ var umbraSimpleClicksBehavior = { - IDLE_TIMEOUT_SEC: 10, - idleSince: null, - alreadyClicked: {}, + IDLE_TIMEOUT_SEC: 10, + idleSince: null, + alreadyClicked: {}, - intervalFunc: function() { - var clickTargets = document.querySelectorAll("${click_css_selector}"); + intervalFunc: function() { + var iframes = document.querySelectorAll("iframe"); + var documents = Array(iframes.length + 1); + documents[0] = document; + for (var i = 0; i < iframes.length; i++) { + documents[i+1] = iframes[i].contentWindow.document; + } - for (var i = 0; i < clickTargets.length; i++) { - var key = clickTargets[i].outerHTML; - if (!this.alreadyClicked[key]) { - console.log("clicking on " + key); - clickTargets[i].click(); - this.alreadyClicked[key] = true; - this.idleSince = null; - return; - } - } + for (var j = 0; j < documents.length; j++) { + var clickTargets = documents[j].querySelectorAll("${click_css_selector}"); + for (var i = 0; i < clickTargets.length; i++) { + var key = clickTargets[i].outerHTML; + if (!this.alreadyClicked[key]) { + console.log("clicking on " + key); - if (!this.idleSince) { - this.idleSince = Date.now(); - } - }, + // do mouse over event on click target + // since some urls are requsted only on + // this event - see + // https://webarchive.jira.com/browse/AITFIVE-451 + var mouseOverEvent = document.createEvent('Events'); + mouseOverEvent.initEvent("mouseover", true, false); + clickTargets[i].dispatchEvent(mouseOverEvent); - start: function() { - var that = this; - this.intervalId = setInterval(function(){ that.intervalFunc() }, 250); - }, + clickTargets[i].click(); + this.alreadyClicked[key] = true; + this.idleSince = null; + return; + } + } + } - isFinished: function() { - if (this.idleSince != null) { - var idleTimeMs = Date.now() - this.idleSince; - if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) { - return true; - } - } - return false; - }, + if (!this.idleSince) { + this.idleSince = Date.now(); + } + }, + + start: function() { + var that = this; + this.intervalId = setInterval(function(){ that.intervalFunc() }, 250); + }, + + isFinished: function() { + if (this.idleSince != null) { + var idleTimeMs = Date.now() - this.idleSince; + if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) { + return true; + } + } + return false; + }, }; // Called from outside of this script. diff --git a/umbra/behaviors.yaml b/umbra/behaviors.yaml index 687cc1b..14f88b3 100644 --- a/umbra/behaviors.yaml +++ b/umbra/behaviors.yaml @@ -28,6 +28,17 @@ behaviors: url_regex: '^https?://(?:www\.)?usask.ca/.*$' click_css_selector: a[id='feature-next'] request_idle_timeout_sec: 10 + # https://webarchive.jira.com/browse/ARI-4128 + url_regex: '^https?://(?:www\.)?psu24.psu.edu/.*$' + click_css_selector: a[id='load-more'] + - # https://webarchive.jira.com/browse/AITFIVE-451 + url_regex: '^https?://(?:www\.)?soundcloud.com/.*$' + click_css_selector: button.sc-button-play, button.playButton + request_idle_timeout_sec: 10 + - # https://webarchive.jira.com/browse/AITFIVE-463 + url_regex: '^https?://(?:www\.)?christophercerrone.com/.*$' + click_css_selector: button.playButton.medium + request_idle_timeout_sec: 10 - # default fallback brhavior url_regex: '^.*$' request_idle_timeout_sec: 10 From ea41653c4404ab0818bb355b4b4908f8efb2d958 Mon Sep 17 00:00:00 2001 From: Hunter Stern Date: Tue, 15 Sep 2015 11:53:53 -0700 Subject: [PATCH 2/9] Pulled in changes from https://github.com/nlevitt/umbra/tree/aitfive-451-alt --- umbra/behaviors.d/simpleclicks.js.in | 111 ++++++++++++++++++++------- 1 file changed, 85 insertions(+), 26 deletions(-) diff --git a/umbra/behaviors.d/simpleclicks.js.in b/umbra/behaviors.d/simpleclicks.js.in index bc5d8ac..ee9bc04 100644 --- a/umbra/behaviors.d/simpleclicks.js.in +++ b/umbra/behaviors.d/simpleclicks.js.in @@ -1,36 +1,91 @@ -var umbraSimpleClicksBehavior = { - IDLE_TIMEOUT_SEC: 10, - idleSince: null, - alreadyClicked: {}, +var umbraAboveBelowOrOnScreen = function(e) { + var eTop = e.getBoundingClientRect().top; + if (eTop < window.scrollY) { + return -1; // above + } else if (eTop > window.scrollY + window.innerHeight) { + return 1; // below + } else { + return 0; // on screen + } +} + +var umbraSimpleScrollsAndClicksBehavior = { + IDLE_TIMEOUT_SEC : 10, + idleSince : null, + alreadyClicked : {}, + + intervalFunc : function() { + var clickedSomething = false; + var somethingLeftBelow = false; + var somethingLeftAbove = false; + var cssSelector = "${click_css_selector}"; - intervalFunc: function() { var iframes = document.querySelectorAll("iframe"); var documents = Array(iframes.length + 1); documents[0] = document; + for (var i = 0; i < iframes.length; i++) { documents[i+1] = iframes[i].contentWindow.document; } for (var j = 0; j < documents.length; j++) { - var clickTargets = documents[j].querySelectorAll("${click_css_selector}"); - for (var i = 0; i < clickTargets.length; i++) { - var key = clickTargets[i].outerHTML; - if (!this.alreadyClicked[key]) { - console.log("clicking on " + key); - // do mouse over event on click target - // since some urls are requsted only on - // this event - see - // https://webarchive.jira.com/browse/AITFIVE-451 - var mouseOverEvent = document.createEvent('Events'); - mouseOverEvent.initEvent("mouseover", true, false); - clickTargets[i].dispatchEvent(mouseOverEvent); + var clickTargets = documents[j].querySelectorAll("button.sc-button-play, button.playButton"); - clickTargets[i].click(); - this.alreadyClicked[key] = true; - this.idleSince = null; - return; + for ( var i = 0; i < clickTargets.length; i++) { + if (clickTargets[i].umbraClicked) { + //has already been clicked so no need to check again. if we did check + // and element was above screen, we would go back up and never reach the bottom + continue; } + + var where = umbraAboveBelowOrOnScreen(clickTargets[i]); + + if (where == 0) { + if (!clickTargets[i].umbraClicked) { + console.log("clicking on " + clickTargets[i].outerHTML); + // do mouse over event on click target + // since some urls are requsted only on + // this event - see + // https://webarchive.jira.com/browse/AITFIVE-451 + var mouseOverEvent = document.createEvent('Events'); + mouseOverEvent.initEvent("mouseover",true, false); + clickTargets[i].dispatchEvent(mouseOverEvent); + clickTargets[i].click(); + clickedSomething = true; + this.idleSince = null; + clickTargets[i].umbraClicked = true; + + break; + } + } else if (where > 0) { + somethingLeftBelow = true; + } else if (where < 0) { + somethingLeftAbove = true; + } + } + } + + if (!clickedSomething) { + if (somethingLeftAbove) { + console + .log("scrolling UP because everything on this screen has been clicked but we missed something above"); + window.scrollBy(0, -500); + this.idleSince = null; + } else if (somethingLeftBelow) { + console + .log("scrolling because everything on this screen has been clicked but there's more below document.body.clientHeight=" + + document.body.clientHeight); + window.scrollBy(0, 200); + this.idleSince = null; + } else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) { + console + .log("scrolling because we're not to the bottom yet document.body.clientHeight=" + + document.body.clientHeight); + window.scrollBy(0, 200); + this.idleSince = null; + } else if (this.idleSince == null) { + this.idleSince = Date.now(); } } @@ -39,12 +94,14 @@ var umbraSimpleClicksBehavior = { } }, - start: function() { + start : function() { var that = this; - this.intervalId = setInterval(function(){ that.intervalFunc() }, 250); + this.intervalId = setInterval(function() { + that.intervalFunc() + }, 250); }, - isFinished: function() { + isFinished : function() { if (this.idleSince != null) { var idleTimeMs = Date.now() - this.idleSince; if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) { @@ -56,6 +113,8 @@ var umbraSimpleClicksBehavior = { }; // Called from outside of this script. -var umbraBehaviorFinished = function() { return umbraSimpleClicksBehavior.isFinished() }; +var umbraBehaviorFinished = function() { + return umbraSimpleScrollsAndClicksBehavior.isFinished() +}; -umbraSimpleClicksBehavior.start(); +umbraSimpleScrollsAndClicksBehavior.start(); \ No newline at end of file From 3467670900e9636da07733679f9832e153e526bd Mon Sep 17 00:00:00 2001 From: Hunter Stern Date: Tue, 15 Sep 2015 18:03:08 -0700 Subject: [PATCH 3/9] More changes for handling psu24 site --- umbra/behaviors.d/simpleclicks.js.in | 65 ++++++++++++++++++++-------- umbra/behaviors.py | 8 +++- umbra/behaviors.yaml | 2 + 3 files changed, 55 insertions(+), 20 deletions(-) diff --git a/umbra/behaviors.d/simpleclicks.js.in b/umbra/behaviors.d/simpleclicks.js.in index ee9bc04..ee4a23c 100644 --- a/umbra/behaviors.d/simpleclicks.js.in +++ b/umbra/behaviors.d/simpleclicks.js.in @@ -1,3 +1,5 @@ +var umbraEndConditionTarget; + var umbraAboveBelowOrOnScreen = function(e) { var eTop = e.getBoundingClientRect().top; if (eTop < window.scrollY) { @@ -19,7 +21,14 @@ var umbraSimpleScrollsAndClicksBehavior = { var somethingLeftBelow = false; var somethingLeftAbove = false; var cssSelector = "${click_css_selector}"; + var cssSelectorClickEndCondition = "${click_css_selector_end_condition}"; + var cssSelectorClickComputedStyleEndCondition = "${click_css_selector_computed_style_end_condition}"; + //fixup cssSelectorClickEndCondition value if this argument has not been set in behaviors.yaml + if (cssSelectorClickEndCondition == "") { + cssSelectorClickEndCondition = "#uniq-selector-for-nothing"; + } + var iframes = document.querySelectorAll("iframe"); var documents = Array(iframes.length + 1); documents[0] = document; @@ -30,34 +39,46 @@ var umbraSimpleScrollsAndClicksBehavior = { for (var j = 0; j < documents.length; j++) { - var clickTargets = documents[j].querySelectorAll("button.sc-button-play, button.playButton"); + var clickTargets = documents[j].querySelectorAll(cssSelector); + umbraEndConditionTarget = documents[j].querySelector(cssSelectorClickEndCondition); + if (umbraEndConditionTarget) { + if (cssSelectorClickComputedStyleEndCondition != "") { + var dynamicCode = umbraCreateDynamicCodeToCheckSelectorCondition(cssSelectorClickComputedStyleEndCondition); + + if (eval(dynamicCode)) { + return; + } + } + else if (cssSelectorClickComputedStyleEndCondition == "") { + return; + } + } + for ( var i = 0; i < clickTargets.length; i++) { - if (clickTargets[i].umbraClicked) { - //has already been clicked so no need to check again. if we did check - // and element was above screen, we would go back up and never reach the bottom + if (clickTargets[i].umbraClicked && cssSelectorClickEndCondition == "#uniq-selector-for-nothing") { + //has already been clicked so no need to check again unless there is a click end condition value set + //for this url in behaviors.yaml. then we keep clicking until the end condition is met continue; } var where = umbraAboveBelowOrOnScreen(clickTargets[i]); if (where == 0) { - if (!clickTargets[i].umbraClicked) { - console.log("clicking on " + clickTargets[i].outerHTML); - // do mouse over event on click target - // since some urls are requsted only on - // this event - see - // https://webarchive.jira.com/browse/AITFIVE-451 - var mouseOverEvent = document.createEvent('Events'); - mouseOverEvent.initEvent("mouseover",true, false); - clickTargets[i].dispatchEvent(mouseOverEvent); - clickTargets[i].click(); - clickedSomething = true; - this.idleSince = null; - clickTargets[i].umbraClicked = true; + console.log("clicking on " + clickTargets[i].outerHTML); + // do mouse over event on click target + // since some urls are requsted only on + // this event - see + // https://webarchive.jira.com/browse/AITFIVE-451 + var mouseOverEvent = document.createEvent('Events'); + mouseOverEvent.initEvent("mouseover",true, false); + clickTargets[i].dispatchEvent(mouseOverEvent); + clickTargets[i].click(); + clickedSomething = true; + this.idleSince = null; + clickTargets[i].umbraClicked = true; - break; - } + break; //break from clickTargets loop, but not from iframe loop } else if (where > 0) { somethingLeftBelow = true; } else if (where < 0) { @@ -112,6 +133,12 @@ var umbraSimpleScrollsAndClicksBehavior = { }, }; +var umbraCreateDynamicCodeToCheckSelectorCondition = function(condition) { + if (!condition || condition == "") return; + + return eval("var dynamicFunction = new Function('return function testCondition(target){ if (!target) return false; var computedStyle=window.getComputedStyle(target); return computedStyle." + condition + " }' )(); dynamicFunction(umbraEndConditionTarget);"); +} + // Called from outside of this script. var umbraBehaviorFinished = function() { return umbraSimpleScrollsAndClicksBehavior.isFinished() diff --git a/umbra/behaviors.py b/umbra/behaviors.py index 2085690..9c8faca 100644 --- a/umbra/behaviors.py +++ b/umbra/behaviors.py @@ -32,7 +32,13 @@ class Behavior: behavior_js = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ["behaviors.d"] + [behavior["behavior_js"]]) behavior["script"] = open(behavior_js, encoding="utf-8").read() elif "click_css_selector" in behavior: - behavior["script"] = simpleclicks_js_template.substitute(click_css_selector=behavior["click_css_selector"]) + if "click_css_selector_end_condition" not in behavior: + behavior["click_css_selector_end_condition"] = ""; + + if "click_css_selector_computed_style_end_condition" not in behavior: + behavior["click_css_selector_computed_style_end_condition"] = ""; + + behavior["script"] = simpleclicks_js_template.substitute(click_css_selector=behavior["click_css_selector"], click_css_selector_end_condition=behavior["click_css_selector_end_condition"], click_css_selector_computed_style_end_condition=behavior["click_css_selector_computed_style_end_condition"]) return Behavior._behaviors diff --git a/umbra/behaviors.yaml b/umbra/behaviors.yaml index 14f88b3..e63f52e 100644 --- a/umbra/behaviors.yaml +++ b/umbra/behaviors.yaml @@ -31,6 +31,8 @@ behaviors: # https://webarchive.jira.com/browse/ARI-4128 url_regex: '^https?://(?:www\.)?psu24.psu.edu/.*$' click_css_selector: a[id='load-more'] + click_css_selector_end_condition: a[id='load-more'][class='disabled'] + click_css_selector_computed_style_end_condition: visibility=='hidden' - # https://webarchive.jira.com/browse/AITFIVE-451 url_regex: '^https?://(?:www\.)?soundcloud.com/.*$' click_css_selector: button.sc-button-play, button.playButton From 5ccc535f5197c021b5ce47a3f56d77fc087f21aa Mon Sep 17 00:00:00 2001 From: Hunter Stern Date: Wed, 16 Sep 2015 09:23:13 -0700 Subject: [PATCH 4/9] More changes --- umbra/behaviors.d/simpleclicks.js.in | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/umbra/behaviors.d/simpleclicks.js.in b/umbra/behaviors.d/simpleclicks.js.in index ee4a23c..1f6bcbd 100644 --- a/umbra/behaviors.d/simpleclicks.js.in +++ b/umbra/behaviors.d/simpleclicks.js.in @@ -44,13 +44,11 @@ var umbraSimpleScrollsAndClicksBehavior = { if (umbraEndConditionTarget) { if (cssSelectorClickComputedStyleEndCondition != "") { - var dynamicCode = umbraCreateDynamicCodeToCheckSelectorCondition(cssSelectorClickComputedStyleEndCondition); - - if (eval(dynamicCode)) { + if (umbraCheckSelectorEndCondition(cssSelectorClickComputedStyleEndCondition)) { return; } } - else if (cssSelectorClickComputedStyleEndCondition == "") { + else { return; } } @@ -89,19 +87,16 @@ var umbraSimpleScrollsAndClicksBehavior = { if (!clickedSomething) { if (somethingLeftAbove) { - console - .log("scrolling UP because everything on this screen has been clicked but we missed something above"); + console.log("scrolling UP because everything on this screen has been clicked but we missed something above"); window.scrollBy(0, -500); this.idleSince = null; } else if (somethingLeftBelow) { - console - .log("scrolling because everything on this screen has been clicked but there's more below document.body.clientHeight=" + console.log("scrolling because everything on this screen has been clicked but there's more below document.body.clientHeight=" + document.body.clientHeight); window.scrollBy(0, 200); this.idleSince = null; } else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) { - console - .log("scrolling because we're not to the bottom yet document.body.clientHeight=" + console.log("scrolling because we're not to the bottom yet document.body.clientHeight=" + document.body.clientHeight); window.scrollBy(0, 200); this.idleSince = null; @@ -133,7 +128,7 @@ var umbraSimpleScrollsAndClicksBehavior = { }, }; -var umbraCreateDynamicCodeToCheckSelectorCondition = function(condition) { +var umbraCheckSelectorEndCondition = function(condition) { if (!condition || condition == "") return; return eval("var dynamicFunction = new Function('return function testCondition(target){ if (!target) return false; var computedStyle=window.getComputedStyle(target); return computedStyle." + condition + " }' )(); dynamicFunction(umbraEndConditionTarget);"); From f282213981c5da4a7b25353700de90b76814fdf1 Mon Sep 17 00:00:00 2001 From: Hunter Stern Date: Thu, 17 Sep 2015 08:43:30 -0700 Subject: [PATCH 5/9] Add fix for https://webarchive.jira.com/browse/ARI-4518 --- umbra/behaviors.yaml | 4 ++++ umbra/browser.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/umbra/behaviors.yaml b/umbra/behaviors.yaml index e63f52e..40852b4 100644 --- a/umbra/behaviors.yaml +++ b/umbra/behaviors.yaml @@ -41,6 +41,10 @@ behaviors: url_regex: '^https?://(?:www\.)?christophercerrone.com/.*$' click_css_selector: button.playButton.medium request_idle_timeout_sec: 10 + - # https://webarchive.jira.com/browse/ARI-4518 + url_regex: '^https?://(?:www\.)?pitkincounty.com/.*$' + click_css_selector: a[onclick^='window.open(\\'http://pitkincounty.granicus.com/MediaPlayer.php'] + request_idle_timeout_sec: 10 - # default fallback brhavior url_regex: '^.*$' request_idle_timeout_sec: 10 diff --git a/umbra/browser.py b/umbra/browser.py index c72bd3e..88ef19c 100644 --- a/umbra/browser.py +++ b/umbra/browser.py @@ -242,7 +242,7 @@ class Chrome: "--window-size=1100,900", "--no-default-browser-check", "--disable-first-run-ui", "--no-first-run", "--homepage=about:blank", "--disable-direct-npapi-requests", - "--disable-web-security", + "--disable-web-security", "--disable-popup-blocking", "about:blank"] self.logger.info("running {}".format(chrome_args)) self.chrome_process = subprocess.Popen(chrome_args, env=new_env, start_new_session=True) From 8829323a386eec4e253a627438b152758bd1938f Mon Sep 17 00:00:00 2001 From: Hunter Stern Date: Thu, 17 Sep 2015 09:07:03 -0700 Subject: [PATCH 6/9] Remove changes for https://webarchive.jira.com/browse/ARI-4518: --- umbra/behaviors.yaml | 4 ---- umbra/browser.py | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/umbra/behaviors.yaml b/umbra/behaviors.yaml index 40852b4..e63f52e 100644 --- a/umbra/behaviors.yaml +++ b/umbra/behaviors.yaml @@ -41,10 +41,6 @@ behaviors: url_regex: '^https?://(?:www\.)?christophercerrone.com/.*$' click_css_selector: button.playButton.medium request_idle_timeout_sec: 10 - - # https://webarchive.jira.com/browse/ARI-4518 - url_regex: '^https?://(?:www\.)?pitkincounty.com/.*$' - click_css_selector: a[onclick^='window.open(\\'http://pitkincounty.granicus.com/MediaPlayer.php'] - request_idle_timeout_sec: 10 - # default fallback brhavior url_regex: '^.*$' request_idle_timeout_sec: 10 diff --git a/umbra/browser.py b/umbra/browser.py index 88ef19c..c72bd3e 100644 --- a/umbra/browser.py +++ b/umbra/browser.py @@ -242,7 +242,7 @@ class Chrome: "--window-size=1100,900", "--no-default-browser-check", "--disable-first-run-ui", "--no-first-run", "--homepage=about:blank", "--disable-direct-npapi-requests", - "--disable-web-security", "--disable-popup-blocking", + "--disable-web-security", "about:blank"] self.logger.info("running {}".format(chrome_args)) self.chrome_process = subprocess.Popen(chrome_args, env=new_env, start_new_session=True) From f8a70f38420d19140a77e488776fd6ed5b8d0142 Mon Sep 17 00:00:00 2001 From: Hunter Stern Date: Thu, 17 Sep 2015 16:24:41 -0700 Subject: [PATCH 7/9] More changes. --- umbra/behaviors.d/psu.js | 131 +++++++++++++++++++++++++++ umbra/behaviors.d/simpleclicks.js.in | 29 +----- umbra/behaviors.py | 8 +- umbra/behaviors.yaml | 9 +- 4 files changed, 137 insertions(+), 40 deletions(-) create mode 100644 umbra/behaviors.d/psu.js diff --git a/umbra/behaviors.d/psu.js b/umbra/behaviors.d/psu.js new file mode 100644 index 0000000..17e8412 --- /dev/null +++ b/umbra/behaviors.d/psu.js @@ -0,0 +1,131 @@ +var umbraAboveBelowOrOnScreen = function(e) { + var eTop = e.getBoundingClientRect().top; + if (eTop < window.scrollY) { + return -1; // above + } else if (eTop > window.scrollY + window.innerHeight) { + return 1; // below + } else { + return 0; // on screen + } +} + +var umbraSimpleScrollsAndClicksBehavior = { + IDLE_TIMEOUT_SEC : 10, + idleSince : null, + alreadyClicked : {}, + + intervalFunc : function() { + var clickedSomething = false; + var somethingLeftBelow = false; + var somethingLeftAbove = false; + + var iframes = document.querySelectorAll("iframe"); + var documents = Array(iframes.length + 1); + documents[0] = document; + + for (var i = 0; i < iframes.length; i++) { + documents[i+1] = iframes[i].contentWindow.document; + } + + for (var j = 0; j < documents.length; j++) { + + var clickTargets = documents[j].querySelectorAll("a[id='load-more']"); + + if (umbraCheckAtEndOfScrollingContent(documents[j])) { + return; + } + + for ( var i = 0; i < clickTargets.length; i++) { + if (clickTargets[i].umbraClicked) { + continue; + } + + var where = umbraAboveBelowOrOnScreen(clickTargets[i]); + + if (where == 0) { + console.log("clicking on " + clickTargets[i].outerHTML); + // do mouse over event on click target + // since some urls are requsted only on + // this event - see + // https://webarchive.jira.com/browse/AITFIVE-451 + var mouseOverEvent = document.createEvent('Events'); + mouseOverEvent.initEvent("mouseover",true, false); + clickTargets[i].dispatchEvent(mouseOverEvent); + clickTargets[i].click(); + clickedSomething = true; + this.idleSince = null; + clickTargets[i].umbraClicked = true; + + break; //break from clickTargets loop, but not from iframe loop + } else if (where > 0) { + somethingLeftBelow = true; + } else if (where < 0) { + somethingLeftAbove = true; + } + } + } + + if (!clickedSomething) { + if (somethingLeftAbove) { + console.log("scrolling UP because everything on this screen has been clicked but we missed something above"); + window.scrollBy(0, -500); + this.idleSince = null; + } else if (somethingLeftBelow) { + console.log("scrolling because everything on this screen has been clicked but there's more below document.body.clientHeight=" + + document.body.clientHeight); + window.scrollBy(0, 200); + this.idleSince = null; + } else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) { + console.log("scrolling because we're not to the bottom yet document.body.clientHeight=" + + document.body.clientHeight); + window.scrollBy(0, 200); + this.idleSince = null; + } else if (this.idleSince == null) { + this.idleSince = Date.now(); + } + } + + if (!this.idleSince) { + this.idleSince = Date.now(); + } + }, + + start : function() { + var that = this; + this.intervalId = setInterval(function() { + that.intervalFunc() + }, 250); + }, + + isFinished : function() { + if (this.idleSince != null) { + var idleTimeMs = Date.now() - this.idleSince; + if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) { + return true; + } + } + return false; + }, +}; + + +var umbraCheckAtEndOfScrollingContent = function(document) { + var elementToCheck = document.querySelector("a[id='load-more'][class='disabled']"); + + if (elementToCheck) { + var computedStyle = window.getComputedStyle(elementToCheck); + + if (computedStyle) { + return computerStyle.visibility=='hidden'; + } + } + + return false; +} + +// Called from outside of this script. +var umbraBehaviorFinished = function() { + return umbraSimpleScrollsAndClicksBehavior.isFinished() +}; + +umbraSimpleScrollsAndClicksBehavior.start(); \ No newline at end of file diff --git a/umbra/behaviors.d/simpleclicks.js.in b/umbra/behaviors.d/simpleclicks.js.in index 1f6bcbd..143ce14 100644 --- a/umbra/behaviors.d/simpleclicks.js.in +++ b/umbra/behaviors.d/simpleclicks.js.in @@ -21,14 +21,7 @@ var umbraSimpleScrollsAndClicksBehavior = { var somethingLeftBelow = false; var somethingLeftAbove = false; var cssSelector = "${click_css_selector}"; - var cssSelectorClickEndCondition = "${click_css_selector_end_condition}"; - var cssSelectorClickComputedStyleEndCondition = "${click_css_selector_computed_style_end_condition}"; - //fixup cssSelectorClickEndCondition value if this argument has not been set in behaviors.yaml - if (cssSelectorClickEndCondition == "") { - cssSelectorClickEndCondition = "#uniq-selector-for-nothing"; - } - var iframes = document.querySelectorAll("iframe"); var documents = Array(iframes.length + 1); documents[0] = document; @@ -40,23 +33,9 @@ var umbraSimpleScrollsAndClicksBehavior = { for (var j = 0; j < documents.length; j++) { var clickTargets = documents[j].querySelectorAll(cssSelector); - umbraEndConditionTarget = documents[j].querySelector(cssSelectorClickEndCondition); - if (umbraEndConditionTarget) { - if (cssSelectorClickComputedStyleEndCondition != "") { - if (umbraCheckSelectorEndCondition(cssSelectorClickComputedStyleEndCondition)) { - return; - } - } - else { - return; - } - } - for ( var i = 0; i < clickTargets.length; i++) { - if (clickTargets[i].umbraClicked && cssSelectorClickEndCondition == "#uniq-selector-for-nothing") { - //has already been clicked so no need to check again unless there is a click end condition value set - //for this url in behaviors.yaml. then we keep clicking until the end condition is met + if (clickTargets[i].umbraClicked) { continue; } @@ -128,12 +107,6 @@ var umbraSimpleScrollsAndClicksBehavior = { }, }; -var umbraCheckSelectorEndCondition = function(condition) { - if (!condition || condition == "") return; - - return eval("var dynamicFunction = new Function('return function testCondition(target){ if (!target) return false; var computedStyle=window.getComputedStyle(target); return computedStyle." + condition + " }' )(); dynamicFunction(umbraEndConditionTarget);"); -} - // Called from outside of this script. var umbraBehaviorFinished = function() { return umbraSimpleScrollsAndClicksBehavior.isFinished() diff --git a/umbra/behaviors.py b/umbra/behaviors.py index 9c8faca..2085690 100644 --- a/umbra/behaviors.py +++ b/umbra/behaviors.py @@ -32,13 +32,7 @@ class Behavior: behavior_js = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ["behaviors.d"] + [behavior["behavior_js"]]) behavior["script"] = open(behavior_js, encoding="utf-8").read() elif "click_css_selector" in behavior: - if "click_css_selector_end_condition" not in behavior: - behavior["click_css_selector_end_condition"] = ""; - - if "click_css_selector_computed_style_end_condition" not in behavior: - behavior["click_css_selector_computed_style_end_condition"] = ""; - - behavior["script"] = simpleclicks_js_template.substitute(click_css_selector=behavior["click_css_selector"], click_css_selector_end_condition=behavior["click_css_selector_end_condition"], click_css_selector_computed_style_end_condition=behavior["click_css_selector_computed_style_end_condition"]) + behavior["script"] = simpleclicks_js_template.substitute(click_css_selector=behavior["click_css_selector"]) return Behavior._behaviors diff --git a/umbra/behaviors.yaml b/umbra/behaviors.yaml index e63f52e..21b1945 100644 --- a/umbra/behaviors.yaml +++ b/umbra/behaviors.yaml @@ -16,6 +16,10 @@ behaviors: url_regex: '^https?://(?:www\.)?vimeo\.com/.*$' behavior_js: vimeo.js request_idle_timeout_sec: 10 + - + url_regex: '^https?://(?:www\.)?psu24.psu.edu/.*$' + behavior_js: psu.js + request_idle_timeout_sec: 10 - url_regex: '^https?://(?:www\.)?instagram\.com/.*$' behavior_js: instagram.js @@ -28,11 +32,6 @@ behaviors: url_regex: '^https?://(?:www\.)?usask.ca/.*$' click_css_selector: a[id='feature-next'] request_idle_timeout_sec: 10 - # https://webarchive.jira.com/browse/ARI-4128 - url_regex: '^https?://(?:www\.)?psu24.psu.edu/.*$' - click_css_selector: a[id='load-more'] - click_css_selector_end_condition: a[id='load-more'][class='disabled'] - click_css_selector_computed_style_end_condition: visibility=='hidden' - # https://webarchive.jira.com/browse/AITFIVE-451 url_regex: '^https?://(?:www\.)?soundcloud.com/.*$' click_css_selector: button.sc-button-play, button.playButton From f2ead0570e24c41cf648ad53ff1b41c98fd84d6a Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 24 Sep 2015 12:20:19 -0700 Subject: [PATCH 8/9] fixes for psu24 behavior --- umbra/behaviors.d/{psu.js => psu24.js} | 63 +++++++++----------------- umbra/behaviors.yaml | 2 +- 2 files changed, 22 insertions(+), 43 deletions(-) rename umbra/behaviors.d/{psu.js => psu24.js} (70%) diff --git a/umbra/behaviors.d/psu.js b/umbra/behaviors.d/psu24.js similarity index 70% rename from umbra/behaviors.d/psu.js rename to umbra/behaviors.d/psu24.js index 17e8412..8084bca 100644 --- a/umbra/behaviors.d/psu.js +++ b/umbra/behaviors.d/psu24.js @@ -1,20 +1,10 @@ -var umbraAboveBelowOrOnScreen = function(e) { - var eTop = e.getBoundingClientRect().top; - if (eTop < window.scrollY) { - return -1; // above - } else if (eTop > window.scrollY + window.innerHeight) { - return 1; // below - } else { - return 0; // on screen - } -} -var umbraSimpleScrollsAndClicksBehavior = { +var umbraBehavior = { IDLE_TIMEOUT_SEC : 10, idleSince : null, alreadyClicked : {}, - intervalFunc : function() { + intervalFunc: function() { var clickedSomething = false; var somethingLeftBelow = false; var somethingLeftAbove = false; @@ -28,20 +18,13 @@ var umbraSimpleScrollsAndClicksBehavior = { } for (var j = 0; j < documents.length; j++) { - var clickTargets = documents[j].querySelectorAll("a[id='load-more']"); - - if (umbraCheckAtEndOfScrollingContent(documents[j])) { - return; - } - - for ( var i = 0; i < clickTargets.length; i++) { - if (clickTargets[i].umbraClicked) { + for (var i = 0; i < clickTargets.length; i++) { + if (clickTargets[i].className === "disabled") { continue; } - var where = umbraAboveBelowOrOnScreen(clickTargets[i]); - + var where = this.aboveBelowOrOnScreen(clickTargets[i]); if (where == 0) { console.log("clicking on " + clickTargets[i].outerHTML); // do mouse over event on click target @@ -54,7 +37,6 @@ var umbraSimpleScrollsAndClicksBehavior = { clickTargets[i].click(); clickedSomething = true; this.idleSince = null; - clickTargets[i].umbraClicked = true; break; //break from clickTargets loop, but not from iframe loop } else if (where > 0) { @@ -90,14 +72,25 @@ var umbraSimpleScrollsAndClicksBehavior = { } }, - start : function() { + aboveBelowOrOnScreen: function(e) { + var eTop = e.getBoundingClientRect().top; + if (eTop < window.scrollY) { + return -1; // above + } else if (eTop > window.scrollY + window.innerHeight) { + return 1; // below + } else { + return 0; // on screen + } + }, + + start: function() { var that = this; this.intervalId = setInterval(function() { that.intervalFunc() }, 250); }, - isFinished : function() { + isFinished: function() { if (this.idleSince != null) { var idleTimeMs = Date.now() - this.idleSince; if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) { @@ -108,24 +101,10 @@ var umbraSimpleScrollsAndClicksBehavior = { }, }; - -var umbraCheckAtEndOfScrollingContent = function(document) { - var elementToCheck = document.querySelector("a[id='load-more'][class='disabled']"); - - if (elementToCheck) { - var computedStyle = window.getComputedStyle(elementToCheck); - - if (computedStyle) { - return computerStyle.visibility=='hidden'; - } - } - - return false; -} - // Called from outside of this script. var umbraBehaviorFinished = function() { - return umbraSimpleScrollsAndClicksBehavior.isFinished() + return umbraBehavior.isFinished() }; -umbraSimpleScrollsAndClicksBehavior.start(); \ No newline at end of file +umbraBehavior.start(); + diff --git a/umbra/behaviors.yaml b/umbra/behaviors.yaml index 21b1945..e5ea670 100644 --- a/umbra/behaviors.yaml +++ b/umbra/behaviors.yaml @@ -18,7 +18,7 @@ behaviors: request_idle_timeout_sec: 10 - url_regex: '^https?://(?:www\.)?psu24.psu.edu/.*$' - behavior_js: psu.js + behavior_js: psu24.js request_idle_timeout_sec: 10 - url_regex: '^https?://(?:www\.)?instagram\.com/.*$' From a17b0f3b8dc180d6f508d63696988a458d7666dc Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 24 Sep 2015 12:34:55 -0700 Subject: [PATCH 9/9] refactor umbraAboveBelowOrOnScreen into umbraBehavior object --- umbra/behaviors.d/simpleclicks.js.in | 42 +++++++++++++--------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/umbra/behaviors.d/simpleclicks.js.in b/umbra/behaviors.d/simpleclicks.js.in index 143ce14..aabb0c0 100644 --- a/umbra/behaviors.d/simpleclicks.js.in +++ b/umbra/behaviors.d/simpleclicks.js.in @@ -1,17 +1,4 @@ -var umbraEndConditionTarget; - -var umbraAboveBelowOrOnScreen = function(e) { - var eTop = e.getBoundingClientRect().top; - if (eTop < window.scrollY) { - return -1; // above - } else if (eTop > window.scrollY + window.innerHeight) { - return 1; // below - } else { - return 0; // on screen - } -} - -var umbraSimpleScrollsAndClicksBehavior = { +var umbraBehavior = { IDLE_TIMEOUT_SEC : 10, idleSince : null, alreadyClicked : {}, @@ -39,7 +26,7 @@ var umbraSimpleScrollsAndClicksBehavior = { continue; } - var where = umbraAboveBelowOrOnScreen(clickTargets[i]); + var where = this.aboveBelowOrOnScreen(clickTargets[i]); if (where == 0) { console.log("clicking on " + clickTargets[i].outerHTML); @@ -66,17 +53,17 @@ var umbraSimpleScrollsAndClicksBehavior = { if (!clickedSomething) { if (somethingLeftAbove) { - console.log("scrolling UP because everything on this screen has been clicked but we missed something above"); + // console.log("scrolling UP because everything on this screen has been clicked but we missed something above"); window.scrollBy(0, -500); this.idleSince = null; } else if (somethingLeftBelow) { - console.log("scrolling because everything on this screen has been clicked but there's more below document.body.clientHeight=" - + document.body.clientHeight); + // console.log("scrolling because everything on this screen has been clicked but there's more below document.body.clientHeight=" + // + document.body.clientHeight); window.scrollBy(0, 200); this.idleSince = null; } else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) { - console.log("scrolling because we're not to the bottom yet document.body.clientHeight=" - + document.body.clientHeight); + // console.log("scrolling because we're not to the bottom yet document.body.clientHeight=" + // + document.body.clientHeight); window.scrollBy(0, 200); this.idleSince = null; } else if (this.idleSince == null) { @@ -105,11 +92,22 @@ var umbraSimpleScrollsAndClicksBehavior = { } return false; }, + + aboveBelowOrOnScreen : function(e) { + var eTop = e.getBoundingClientRect().top; + if (eTop < window.scrollY) { + return -1; // above + } else if (eTop > window.scrollY + window.innerHeight) { + return 1; // below + } else { + return 0; // on screen + } + }, }; // Called from outside of this script. var umbraBehaviorFinished = function() { - return umbraSimpleScrollsAndClicksBehavior.isFinished() + return umbraBehavior.isFinished() }; -umbraSimpleScrollsAndClicksBehavior.start(); \ No newline at end of file +umbraBehavior.start();