Merge remote-tracking branch 'upstream/master' into qa

This commit is contained in:
Barbara Miller 2019-10-01 16:06:25 -07:00
commit dd2921af69
4 changed files with 27 additions and 12 deletions

View file

@ -356,13 +356,12 @@ class Browser:
# tell browser to send us messages we're interested in # tell browser to send us messages we're interested in
self.send_to_chrome(method='Network.enable') self.send_to_chrome(method='Network.enable')
self.send_to_chrome(method='Page.enable') self.send_to_chrome(method='Page.enable')
# Enable Console & Runtime output only when debugging.
# After all, we just print these events with debug(), we don't use
# them in Brozzler logic.
if self.logger.isEnabledFor(logging.DEBUG):
self.send_to_chrome(method='Console.enable') self.send_to_chrome(method='Console.enable')
self.send_to_chrome(method='Runtime.enable') self.send_to_chrome(method='Runtime.enable')
# Network.requestIntercepted needs more work...
#self.send_to_chrome(
# method='Network.setRequestInterception',
# params={'patterns': [{'urlPattern': '*'}]})
self.send_to_chrome(method='ServiceWorker.enable') self.send_to_chrome(method='ServiceWorker.enable')
self.send_to_chrome(method='ServiceWorker.setForceUpdateOnPageLoad') self.send_to_chrome(method='ServiceWorker.setForceUpdateOnPageLoad')
@ -661,6 +660,7 @@ class Browser:
method='Runtime.evaluate', suppress_logging=True, method='Runtime.evaluate', suppress_logging=True,
params={'expression': behavior_script}) params={'expression': behavior_script})
check_interval = min(timeout, 7)
start = time.time() start = time.time()
while True: while True:
elapsed = time.time() - start elapsed = time.time() - start
@ -669,7 +669,7 @@ class Browser:
'behavior reached hard timeout after %.1fs', elapsed) 'behavior reached hard timeout after %.1fs', elapsed)
return return
brozzler.sleep(7) brozzler.sleep(check_interval)
self.websock_thread.expect_result(self._command_id.peek()) self.websock_thread.expect_result(self._command_id.peek())
msg_id = self.send_to_chrome( msg_id = self.send_to_chrome(

View file

@ -250,10 +250,16 @@ class Chrome:
# XXX select doesn't work on windows # XXX select doesn't work on windows
def readline_nonblock(f): def readline_nonblock(f):
buf = b'' buf = b''
try:
while not self._shutdown.is_set() and ( while not self._shutdown.is_set() and (
len(buf) == 0 or buf[-1] != 0xa) and select.select( len(buf) == 0 or buf[-1] != 0xa) and select.select(
[f],[],[],0.5)[0]: [f],[],[],0.5)[0]:
buf += f.read(1) buf += f.read(1)
except (ValueError, OSError):
# When the chrome process crashes, stdout & stderr are closed
# and trying to read from them raises these exceptions. We just
# stop reading and return current `buf`.
pass
return buf return buf
try: try:

View file

@ -43,6 +43,7 @@ class UmbraBehavior {
var documents = []; var documents = [];
documents[0] = document; documents[0] = document;
var iframes = document.querySelectorAll("iframe"); var iframes = document.querySelectorAll("iframe");
var iframesLength = iframes.length; var iframesLength = iframes.length;
for (var i = 0; i < iframesLength; i++) { for (var i = 0; i < iframesLength; i++) {
@ -54,23 +55,31 @@ class UmbraBehavior {
// console.log("exception looking at iframe" + iframes[i] + ": " + e); // console.log("exception looking at iframe" + iframes[i] + ": " + e);
} }
} }
var documentsLength = documents.length; var documentsLength = documents.length;
for (var j = 0; j < documentsLength; j++) { for (var j = 0; j < documentsLength; j++) {
if (closeSelector) { if (closeSelector) {
var closeTargets = documents[j].querySelectorAll(closeSelector); var closeTargets = documents[j].querySelectorAll(closeSelector);
for (var i = 0; i < closeTargets.length; i++) { for (var i = 0; i < closeTargets.length; i++) {
this.doTarget(closeTargets[i], "click"); if (this.isVisible(closeTargets[i])) {
closeTargets[i].click();
didSomething = true;
break;
} }
} }
}
if (firstMatchOnly) { if (firstMatchOnly) {
var doTargets = [ documents[j].querySelector(selector) ]; var doTargets = [ documents[j].querySelector(selector) ];
} else { } else {
var doTargets = documents[j].querySelectorAll(selector); var doTargets = documents[j].querySelectorAll(selector);
} }
var doTargetsLength = doTargets.length; var doTargetsLength = doTargets.length;
if (!(doTargetsLength > 0)) { if (!(doTargetsLength > 0)) {
continue; continue;
} }
for ( var i = 0; i < doTargetsLength; i++) { for ( var i = 0; i < doTargetsLength; i++) {
if (!repeatSameElement && this.alreadyDone.indexOf(doTargets[i]) > -1) { if (!repeatSameElement && this.alreadyDone.indexOf(doTargets[i]) > -1) {
continue; continue;

View file

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup( setuptools.setup(
name='brozzler', name='brozzler',
version='1.5.7', version='1.5.8',
description='Distributed web crawling with browsers', description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler', url='https://github.com/internetarchive/brozzler',
author='Noah Levitt', author='Noah Levitt',