Merge branch 'insta18q4' into qa

This commit is contained in:
Barbara Miller 2018-12-13 17:29:36 -08:00
commit cbd6f0f90a
5 changed files with 17 additions and 9 deletions

View file

@ -26,7 +26,7 @@
behavior_js_template: umbraBehavior18q4.js.j2 behavior_js_template: umbraBehavior18q4.js.j2
default_parameters: default_parameters:
interval: 500 interval: 500
iframes: false skip_iframes: true
actions: actions:
- selector: button.coreSpriteDismissLarge - selector: button.coreSpriteDismissLarge
- selector: 'a>.eLAPa>.KL4Bh' - selector: 'a>.eLAPa>.KL4Bh'

View file

@ -256,6 +256,8 @@ class WebsockReceiverThread(threading.Thread):
self.logger.debug( self.logger.debug(
'console.%s %s', message['params']['message']['level'], 'console.%s %s', message['params']['message']['level'],
message['params']['message']['text']) message['params']['message']['text'])
elif message['method'] == 'Runtime.exceptionThrown':
self.logger.debug('uncaught exception: %s', message)
elif message['method'] == 'Page.javascriptDialogOpening': elif message['method'] == 'Page.javascriptDialogOpening':
self._javascript_dialog_opening(message) self._javascript_dialog_opening(message)
elif (message['method'] == 'Network.loadingFailed' elif (message['method'] == 'Network.loadingFailed'

View file

@ -26,7 +26,11 @@ class UmbraBehavior {
this.idleSince = null; this.idleSince = null;
this.intervalId = null; this.intervalId = null;
this.intervalTimeMs = {{interval or 300}}; this.intervalTimeMs = {{interval or 300}};
this.doIframes = {{iframes or true}}; {% if skip_iframes %}
this.skipIframes = true;
{% else %}
this.skipIframes = false;
{% endif %}
this.index = 0; this.index = 0;
} }
@ -44,7 +48,7 @@ class UmbraBehavior {
var documents = []; var documents = [];
documents[0] = document; documents[0] = document;
if (this.doIframes) { if (!(this.skipIframes)) {
var iframes = document.querySelectorAll("iframe"); var iframes = document.querySelectorAll("iframe");
var iframesLength = iframes.length; var iframesLength = iframes.length;
for (var i = 0; i < iframesLength; i++) { for (var i = 0; i < iframesLength; i++) {

View file

@ -278,11 +278,13 @@ class BrozzlerWorker:
def _on_service_worker_version_updated(chrome_msg): def _on_service_worker_version_updated(chrome_msg):
# https://github.com/internetarchive/brozzler/issues/140 # https://github.com/internetarchive/brozzler/issues/140
self.logger.trace('%r', chrome_msg) self.logger.trace('%r', chrome_msg)
url = chrome_msg.get('params', {}).get('versions', [{}])[0].get('scriptURL') if chrome_msg.get('params', {}).get('versions'):
if url not in sw_fetched: url = chrome_msg.get('params', {}).get('versions')[0]\
self.logger.info('fetching service worker script %s', url) .get('scriptURL')
self._fetch_url(site, url) if url and url not in sw_fetched:
sw_fetched.add(url) self.logger.info('fetching service worker script %s', url)
self._fetch_url(site, url)
sw_fetched.add(url)
if not browser.is_running(): if not browser.is_running():
browser.start( browser.start(

View file

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup( setuptools.setup(
name='brozzler', name='brozzler',
version='1.5.dev315', version='1.5.dev317',
description='Distributed web crawling with browsers', description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler', url='https://github.com/internetarchive/brozzler',
author='Noah Levitt', author='Noah Levitt',