mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-07 22:12:15 -04:00
Merge branch 'insta18q4' into qa
This commit is contained in:
commit
cbd6f0f90a
5 changed files with 17 additions and 9 deletions
|
@ -26,7 +26,7 @@
|
|||
behavior_js_template: umbraBehavior18q4.js.j2
|
||||
default_parameters:
|
||||
interval: 500
|
||||
iframes: false
|
||||
skip_iframes: true
|
||||
actions:
|
||||
- selector: button.coreSpriteDismissLarge
|
||||
- selector: 'a>.eLAPa>.KL4Bh'
|
||||
|
|
|
@ -256,6 +256,8 @@ class WebsockReceiverThread(threading.Thread):
|
|||
self.logger.debug(
|
||||
'console.%s %s', message['params']['message']['level'],
|
||||
message['params']['message']['text'])
|
||||
elif message['method'] == 'Runtime.exceptionThrown':
|
||||
self.logger.debug('uncaught exception: %s', message)
|
||||
elif message['method'] == 'Page.javascriptDialogOpening':
|
||||
self._javascript_dialog_opening(message)
|
||||
elif (message['method'] == 'Network.loadingFailed'
|
||||
|
|
|
@ -26,7 +26,11 @@ class UmbraBehavior {
|
|||
this.idleSince = null;
|
||||
this.intervalId = null;
|
||||
this.intervalTimeMs = {{interval or 300}};
|
||||
this.doIframes = {{iframes or true}};
|
||||
{% if skip_iframes %}
|
||||
this.skipIframes = true;
|
||||
{% else %}
|
||||
this.skipIframes = false;
|
||||
{% endif %}
|
||||
this.index = 0;
|
||||
}
|
||||
|
||||
|
@ -44,7 +48,7 @@ class UmbraBehavior {
|
|||
|
||||
var documents = [];
|
||||
documents[0] = document;
|
||||
if (this.doIframes) {
|
||||
if (!(this.skipIframes)) {
|
||||
var iframes = document.querySelectorAll("iframe");
|
||||
var iframesLength = iframes.length;
|
||||
for (var i = 0; i < iframesLength; i++) {
|
||||
|
|
|
@ -278,8 +278,10 @@ class BrozzlerWorker:
|
|||
def _on_service_worker_version_updated(chrome_msg):
|
||||
# https://github.com/internetarchive/brozzler/issues/140
|
||||
self.logger.trace('%r', chrome_msg)
|
||||
url = chrome_msg.get('params', {}).get('versions', [{}])[0].get('scriptURL')
|
||||
if url not in sw_fetched:
|
||||
if chrome_msg.get('params', {}).get('versions'):
|
||||
url = chrome_msg.get('params', {}).get('versions')[0]\
|
||||
.get('scriptURL')
|
||||
if url and url not in sw_fetched:
|
||||
self.logger.info('fetching service worker script %s', url)
|
||||
self._fetch_url(site, url)
|
||||
sw_fetched.add(url)
|
||||
|
|
2
setup.py
2
setup.py
|
@ -32,7 +32,7 @@ def find_package_data(package):
|
|||
|
||||
setuptools.setup(
|
||||
name='brozzler',
|
||||
version='1.5.dev315',
|
||||
version='1.5.dev317',
|
||||
description='Distributed web crawling with browsers',
|
||||
url='https://github.com/internetarchive/brozzler',
|
||||
author='Noah Levitt',
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue