mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-08 06:22:23 -04:00
Merge branch 'insta18q4' into qa
This commit is contained in:
commit
cbd6f0f90a
5 changed files with 17 additions and 9 deletions
|
@ -26,7 +26,7 @@
|
||||||
behavior_js_template: umbraBehavior18q4.js.j2
|
behavior_js_template: umbraBehavior18q4.js.j2
|
||||||
default_parameters:
|
default_parameters:
|
||||||
interval: 500
|
interval: 500
|
||||||
iframes: false
|
skip_iframes: true
|
||||||
actions:
|
actions:
|
||||||
- selector: button.coreSpriteDismissLarge
|
- selector: button.coreSpriteDismissLarge
|
||||||
- selector: 'a>.eLAPa>.KL4Bh'
|
- selector: 'a>.eLAPa>.KL4Bh'
|
||||||
|
|
|
@ -256,6 +256,8 @@ class WebsockReceiverThread(threading.Thread):
|
||||||
self.logger.debug(
|
self.logger.debug(
|
||||||
'console.%s %s', message['params']['message']['level'],
|
'console.%s %s', message['params']['message']['level'],
|
||||||
message['params']['message']['text'])
|
message['params']['message']['text'])
|
||||||
|
elif message['method'] == 'Runtime.exceptionThrown':
|
||||||
|
self.logger.debug('uncaught exception: %s', message)
|
||||||
elif message['method'] == 'Page.javascriptDialogOpening':
|
elif message['method'] == 'Page.javascriptDialogOpening':
|
||||||
self._javascript_dialog_opening(message)
|
self._javascript_dialog_opening(message)
|
||||||
elif (message['method'] == 'Network.loadingFailed'
|
elif (message['method'] == 'Network.loadingFailed'
|
||||||
|
|
|
@ -26,7 +26,11 @@ class UmbraBehavior {
|
||||||
this.idleSince = null;
|
this.idleSince = null;
|
||||||
this.intervalId = null;
|
this.intervalId = null;
|
||||||
this.intervalTimeMs = {{interval or 300}};
|
this.intervalTimeMs = {{interval or 300}};
|
||||||
this.doIframes = {{iframes or true}};
|
{% if skip_iframes %}
|
||||||
|
this.skipIframes = true;
|
||||||
|
{% else %}
|
||||||
|
this.skipIframes = false;
|
||||||
|
{% endif %}
|
||||||
this.index = 0;
|
this.index = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -44,7 +48,7 @@ class UmbraBehavior {
|
||||||
|
|
||||||
var documents = [];
|
var documents = [];
|
||||||
documents[0] = document;
|
documents[0] = document;
|
||||||
if (this.doIframes) {
|
if (!(this.skipIframes)) {
|
||||||
var iframes = document.querySelectorAll("iframe");
|
var iframes = document.querySelectorAll("iframe");
|
||||||
var iframesLength = iframes.length;
|
var iframesLength = iframes.length;
|
||||||
for (var i = 0; i < iframesLength; i++) {
|
for (var i = 0; i < iframesLength; i++) {
|
||||||
|
|
|
@ -278,11 +278,13 @@ class BrozzlerWorker:
|
||||||
def _on_service_worker_version_updated(chrome_msg):
|
def _on_service_worker_version_updated(chrome_msg):
|
||||||
# https://github.com/internetarchive/brozzler/issues/140
|
# https://github.com/internetarchive/brozzler/issues/140
|
||||||
self.logger.trace('%r', chrome_msg)
|
self.logger.trace('%r', chrome_msg)
|
||||||
url = chrome_msg.get('params', {}).get('versions', [{}])[0].get('scriptURL')
|
if chrome_msg.get('params', {}).get('versions'):
|
||||||
if url not in sw_fetched:
|
url = chrome_msg.get('params', {}).get('versions')[0]\
|
||||||
self.logger.info('fetching service worker script %s', url)
|
.get('scriptURL')
|
||||||
self._fetch_url(site, url)
|
if url and url not in sw_fetched:
|
||||||
sw_fetched.add(url)
|
self.logger.info('fetching service worker script %s', url)
|
||||||
|
self._fetch_url(site, url)
|
||||||
|
sw_fetched.add(url)
|
||||||
|
|
||||||
if not browser.is_running():
|
if not browser.is_running():
|
||||||
browser.start(
|
browser.start(
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -32,7 +32,7 @@ def find_package_data(package):
|
||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.5.dev315',
|
version='1.5.dev317',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue