diff --git a/brozzler/behaviors.yaml b/brozzler/behaviors.yaml index daf40f5..264a785 100644 --- a/brozzler/behaviors.yaml +++ b/brozzler/behaviors.yaml @@ -26,7 +26,7 @@ behavior_js_template: umbraBehavior18q4.js.j2 default_parameters: interval: 500 - iframes: false + skip_iframes: true actions: - selector: button.coreSpriteDismissLarge - selector: 'a>.eLAPa>.KL4Bh' diff --git a/brozzler/browser.py b/brozzler/browser.py index 000ed8c..0548b1e 100644 --- a/brozzler/browser.py +++ b/brozzler/browser.py @@ -256,6 +256,8 @@ class WebsockReceiverThread(threading.Thread): self.logger.debug( 'console.%s %s', message['params']['message']['level'], message['params']['message']['text']) + elif message['method'] == 'Runtime.exceptionThrown': + self.logger.debug('uncaught exception: %s', message) elif message['method'] == 'Page.javascriptDialogOpening': self._javascript_dialog_opening(message) elif (message['method'] == 'Network.loadingFailed' diff --git a/brozzler/js-templates/umbraBehavior18q4.js.j2 b/brozzler/js-templates/umbraBehavior18q4.js.j2 index 100b5b7..7c24a13 100644 --- a/brozzler/js-templates/umbraBehavior18q4.js.j2 +++ b/brozzler/js-templates/umbraBehavior18q4.js.j2 @@ -26,7 +26,11 @@ class UmbraBehavior { this.idleSince = null; this.intervalId = null; this.intervalTimeMs = {{interval or 300}}; - this.doIframes = {{iframes or true}}; + {% if skip_iframes %} + this.skipIframes = true; + {% else %} + this.skipIframes = false; + {% endif %} this.index = 0; } @@ -44,7 +48,7 @@ class UmbraBehavior { var documents = []; documents[0] = document; - if (this.doIframes) { + if (!(this.skipIframes)) { var iframes = document.querySelectorAll("iframe"); var iframesLength = iframes.length; for (var i = 0; i < iframesLength; i++) { diff --git a/brozzler/worker.py b/brozzler/worker.py index 3cfa9fc..fba83aa 100644 --- a/brozzler/worker.py +++ b/brozzler/worker.py @@ -278,11 +278,13 @@ class BrozzlerWorker: def _on_service_worker_version_updated(chrome_msg): # https://github.com/internetarchive/brozzler/issues/140 self.logger.trace('%r', chrome_msg) - url = chrome_msg.get('params', {}).get('versions', [{}])[0].get('scriptURL') - if url not in sw_fetched: - self.logger.info('fetching service worker script %s', url) - self._fetch_url(site, url) - sw_fetched.add(url) + if chrome_msg.get('params', {}).get('versions'): + url = chrome_msg.get('params', {}).get('versions')[0]\ + .get('scriptURL') + if url and url not in sw_fetched: + self.logger.info('fetching service worker script %s', url) + self._fetch_url(site, url) + sw_fetched.add(url) if not browser.is_running(): browser.start( diff --git a/setup.py b/setup.py index bc4b868..1fb7ff8 100755 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ def find_package_data(package): setuptools.setup( name='brozzler', - version='1.5.dev315', + version='1.5.dev317', description='Distributed web crawling with browsers', url='https://github.com/internetarchive/brozzler', author='Noah Levitt',