mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-08 06:22:23 -04:00
Merge branch 'senate-videos' into qa
This commit is contained in:
commit
5304ba4491
3 changed files with 12 additions and 3 deletions
|
@ -366,11 +366,12 @@ class Browser:
|
|||
self.send_to_chrome(method='ServiceWorker.enable')
|
||||
self.send_to_chrome(method='ServiceWorker.setForceUpdateOnPageLoad')
|
||||
|
||||
# disable google analytics
|
||||
# disable google analytics and amp analytics
|
||||
self.send_to_chrome(
|
||||
method='Network.setBlockedURLs',
|
||||
params={'urls': ['*google-analytics.com/analytics.js',
|
||||
'*google-analytics.com/ga.js']})
|
||||
'*google-analytics.com/ga.js',
|
||||
'*cdn.ampproject.org/*/amp-analytics*.js']})
|
||||
|
||||
def stop(self):
|
||||
'''
|
||||
|
|
|
@ -8,6 +8,14 @@ var __brzl_compileOutlinks = function(frame) {
|
|||
if (frame && frame.document) {
|
||||
outlinks = Array.prototype.slice.call(
|
||||
frame.document.querySelectorAll('a[href], area[href]'));
|
||||
popups = Array.prototype.slice.call(
|
||||
frame.document.querySelectorAll('a[onclick]'));
|
||||
if (popups && popups.length > 0) {
|
||||
for (var p=0; p < popups.length; p++) {
|
||||
popups[p] = popups[p].onclick.toString().split("'")[1];
|
||||
}
|
||||
outlinks = outlinks.concat(popups);
|
||||
}
|
||||
for (var i = 0; i < frame.frames.length; i++) {
|
||||
if (frame.frames[i] && !__brzl_framesDone.has(frame.frames[i])) {
|
||||
outlinks = outlinks.concat(
|
||||
|
|
2
setup.py
2
setup.py
|
@ -32,7 +32,7 @@ def find_package_data(package):
|
|||
|
||||
setuptools.setup(
|
||||
name='brozzler',
|
||||
version='1.5.6',
|
||||
version='1.5.7',
|
||||
description='Distributed web crawling with browsers',
|
||||
url='https://github.com/internetarchive/brozzler',
|
||||
author='Noah Levitt',
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue