Merge branch 'master' of github.com:nlevitt/brozzler

* 'master' of github.com:nlevitt/brozzler:
  update detection of modal close button for facebook changes
  refactor umbraAboveBelowOrOnScreen into umbraBehavior object
  fixes for psu24 behavior
  More changes.
  Remove changes for https://webarchive.jira.com/browse/ARI-4518:
  Add fix for https://webarchive.jira.com/browse/ARI-4518
  More changes
  More changes for handling psu24 site
  Pulled in changes from https://github.com/nlevitt/umbra/tree/aitfive-451-alt
  simpler implementation for https://github.com/internetarchive/umbra/pull/42/files
  Adds routing_key to queue Queue creation
This commit is contained in:
Noah Levitt 2015-11-05 20:10:22 +00:00
commit 29b6a0b0d4
4 changed files with 227 additions and 36 deletions

View File

@ -80,7 +80,7 @@ var umbraIntervalFunc = function() {
return;
}
var closeButtons = document.querySelectorAll('a[title="Close"], a.closeTheater');
var closeButtons = document.querySelectorAll('a[title="Close"], a.closeTheater, a[aria-label="Press Esc to close"]');
for (var i = 0; i < closeButtons.length; i++) {
// XXX closeTheater buttons stick around in the dom after closing, clientWidth>0 is one way to check if they're visible
if (closeButtons[i].clientWidth > 0) {

View File

@ -0,0 +1,110 @@
var umbraBehavior = {
IDLE_TIMEOUT_SEC : 10,
idleSince : null,
alreadyClicked : {},
intervalFunc: function() {
var clickedSomething = false;
var somethingLeftBelow = false;
var somethingLeftAbove = false;
var iframes = document.querySelectorAll("iframe");
var documents = Array(iframes.length + 1);
documents[0] = document;
for (var i = 0; i < iframes.length; i++) {
documents[i+1] = iframes[i].contentWindow.document;
}
for (var j = 0; j < documents.length; j++) {
var clickTargets = documents[j].querySelectorAll("a[id='load-more']");
for (var i = 0; i < clickTargets.length; i++) {
if (clickTargets[i].className === "disabled") {
continue;
}
var where = this.aboveBelowOrOnScreen(clickTargets[i]);
if (where == 0) {
console.log("clicking on " + clickTargets[i].outerHTML);
// do mouse over event on click target
// since some urls are requsted only on
// this event - see
// https://webarchive.jira.com/browse/AITFIVE-451
var mouseOverEvent = document.createEvent('Events');
mouseOverEvent.initEvent("mouseover",true, false);
clickTargets[i].dispatchEvent(mouseOverEvent);
clickTargets[i].click();
clickedSomething = true;
this.idleSince = null;
break; //break from clickTargets loop, but not from iframe loop
} else if (where > 0) {
somethingLeftBelow = true;
} else if (where < 0) {
somethingLeftAbove = true;
}
}
}
if (!clickedSomething) {
if (somethingLeftAbove) {
console.log("scrolling UP because everything on this screen has been clicked but we missed something above");
window.scrollBy(0, -500);
this.idleSince = null;
} else if (somethingLeftBelow) {
console.log("scrolling because everything on this screen has been clicked but there's more below document.body.clientHeight="
+ document.body.clientHeight);
window.scrollBy(0, 200);
this.idleSince = null;
} else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) {
console.log("scrolling because we're not to the bottom yet document.body.clientHeight="
+ document.body.clientHeight);
window.scrollBy(0, 200);
this.idleSince = null;
} else if (this.idleSince == null) {
this.idleSince = Date.now();
}
}
if (!this.idleSince) {
this.idleSince = Date.now();
}
},
aboveBelowOrOnScreen: function(e) {
var eTop = e.getBoundingClientRect().top;
if (eTop < window.scrollY) {
return -1; // above
} else if (eTop > window.scrollY + window.innerHeight) {
return 1; // below
} else {
return 0; // on screen
}
},
start: function() {
var that = this;
this.intervalId = setInterval(function() {
that.intervalFunc()
}, 250);
},
isFinished: function() {
if (this.idleSince != null) {
var idleTimeMs = Date.now() - this.idleSince;
if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) {
return true;
}
}
return false;
},
};
// Called from outside of this script.
var umbraBehaviorFinished = function() {
return umbraBehavior.isFinished()
};
umbraBehavior.start();

View File

@ -1,19 +1,73 @@
var umbraSimpleClicksBehavior = {
var umbraBehavior = {
IDLE_TIMEOUT_SEC : 10,
idleSince : null,
alreadyClicked : {},
intervalFunc : function() {
var clickTargets = document.querySelectorAll("${click_css_selector}");
var clickedSomething = false;
var somethingLeftBelow = false;
var somethingLeftAbove = false;
var cssSelector = "${click_css_selector}";
var iframes = document.querySelectorAll("iframe");
var documents = Array(iframes.length + 1);
documents[0] = document;
for (var i = 0; i < iframes.length; i++) {
documents[i+1] = iframes[i].contentWindow.document;
}
for (var j = 0; j < documents.length; j++) {
var clickTargets = documents[j].querySelectorAll(cssSelector);
for ( var i = 0; i < clickTargets.length; i++) {
var key = clickTargets[i].outerHTML;
if (!this.alreadyClicked[key]) {
console.log("clicking on " + key);
if (clickTargets[i].umbraClicked) {
continue;
}
var where = this.aboveBelowOrOnScreen(clickTargets[i]);
if (where == 0) {
console.log("clicking on " + clickTargets[i].outerHTML);
// do mouse over event on click target
// since some urls are requsted only on
// this event - see
// https://webarchive.jira.com/browse/AITFIVE-451
var mouseOverEvent = document.createEvent('Events');
mouseOverEvent.initEvent("mouseover",true, false);
clickTargets[i].dispatchEvent(mouseOverEvent);
clickTargets[i].click();
this.alreadyClicked[key] = true;
clickedSomething = true;
this.idleSince = null;
return;
clickTargets[i].umbraClicked = true;
break; //break from clickTargets loop, but not from iframe loop
} else if (where > 0) {
somethingLeftBelow = true;
} else if (where < 0) {
somethingLeftAbove = true;
}
}
}
if (!clickedSomething) {
if (somethingLeftAbove) {
// console.log("scrolling UP because everything on this screen has been clicked but we missed something above");
window.scrollBy(0, -500);
this.idleSince = null;
} else if (somethingLeftBelow) {
// console.log("scrolling because everything on this screen has been clicked but there's more below document.body.clientHeight="
// + document.body.clientHeight);
window.scrollBy(0, 200);
this.idleSince = null;
} else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) {
// console.log("scrolling because we're not to the bottom yet document.body.clientHeight="
// + document.body.clientHeight);
window.scrollBy(0, 200);
this.idleSince = null;
} else if (this.idleSince == null) {
this.idleSince = Date.now();
}
}
@ -24,7 +78,9 @@ var umbraSimpleClicksBehavior = {
start : function() {
var that = this;
this.intervalId = setInterval(function(){ that.intervalFunc() }, 250);
this.intervalId = setInterval(function() {
that.intervalFunc()
}, 250);
},
isFinished : function() {
@ -36,9 +92,22 @@ var umbraSimpleClicksBehavior = {
}
return false;
},
aboveBelowOrOnScreen : function(e) {
var eTop = e.getBoundingClientRect().top;
if (eTop < window.scrollY) {
return -1; // above
} else if (eTop > window.scrollY + window.innerHeight) {
return 1; // below
} else {
return 0; // on screen
}
},
};
// Called from outside of this script.
var umbraBehaviorFinished = function() { return umbraSimpleClicksBehavior.isFinished() };
var umbraBehaviorFinished = function() {
return umbraBehavior.isFinished()
};
umbraSimpleClicksBehavior.start();
umbraBehavior.start();

View File

@ -16,6 +16,10 @@ behaviors:
url_regex: '^https?://(?:www\.)?vimeo\.com/.*$'
behavior_js: vimeo.js
request_idle_timeout_sec: 10
-
url_regex: '^https?://(?:www\.)?psu24.psu.edu/.*$'
behavior_js: psu24.js
request_idle_timeout_sec: 10
-
url_regex: '^https?://(?:www\.)?instagram\.com/.*$'
behavior_js: instagram.js
@ -28,6 +32,14 @@ behaviors:
url_regex: '^https?://(?:www\.)?usask.ca/.*$'
click_css_selector: a[id='feature-next']
request_idle_timeout_sec: 10
- # https://webarchive.jira.com/browse/AITFIVE-451
url_regex: '^https?://(?:www\.)?soundcloud.com/.*$'
click_css_selector: button.sc-button-play, button.playButton
request_idle_timeout_sec: 10
- # https://webarchive.jira.com/browse/AITFIVE-463
url_regex: '^https?://(?:www\.)?christophercerrone.com/.*$'
click_css_selector: button.playButton.medium
request_idle_timeout_sec: 10
- # default fallback brhavior
url_regex: '^.*$'
request_idle_timeout_sec: 10