mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-23 16:19:49 -05:00
Merge branch 'master' into adds-hop-path-support
This commit is contained in:
commit
d61cec399e
@ -14,7 +14,7 @@ or Chromium) to fetch pages and embedded URLs and to extract links. It employs
|
||||
capabilities and `rethinkdb <https://github.com/rethinkdb/rethinkdb>`_ to
|
||||
manage crawl state.
|
||||
|
||||
Brozzler is designed to work in conjuction with warcprox for web archiving.
|
||||
Brozzler is designed to work in conjunction with warcprox for web archiving.
|
||||
|
||||
Requirements
|
||||
------------
|
||||
|
@ -168,7 +168,7 @@ class Chrome:
|
||||
'--remote-debugging-port=%s' % self.port,
|
||||
'--use-mock-keychain', # mac thing
|
||||
'--user-data-dir=%s' % self._chrome_user_data_dir,
|
||||
'--disable-background-networking',
|
||||
'--disable-background-networking', '--disable-breakpad',
|
||||
'--disable-renderer-backgrounding', '--disable-hang-monitor',
|
||||
'--disable-background-timer-throttling', '--mute-audio',
|
||||
'--disable-web-sockets',
|
||||
@ -176,7 +176,8 @@ class Chrome:
|
||||
'--disable-first-run-ui', '--no-first-run',
|
||||
'--homepage=about:blank', '--disable-direct-npapi-requests',
|
||||
'--disable-web-security', '--disable-notifications',
|
||||
'--disable-extensions', '--disable-save-password-bubble']
|
||||
'--disable-extensions', '--disable-save-password-bubble',
|
||||
'--disable-sync']
|
||||
|
||||
extra_chrome_args = os.environ.get('BROZZLER_EXTRA_CHROME_ARGS')
|
||||
if extra_chrome_args:
|
||||
|
@ -65,7 +65,7 @@ var umbraBehavior = {
|
||||
if (where == 0) {
|
||||
console.log("clicking on " + clickRadioButtonTargets[k]);
|
||||
// do mouse over event on click target
|
||||
// since some urls are requsted only on
|
||||
// since some urls are requested only on
|
||||
// this event - see
|
||||
// https://webarchive.jira.com/browse/AITFIVE-451
|
||||
var mouseOverEvent = document.createEvent('Events');
|
||||
|
@ -46,7 +46,7 @@ var umbraBehavior = {
|
||||
if (where == 0) {
|
||||
console.log("clicking on " + clickTargets[i].outerHTML);
|
||||
// do mouse over event on click target
|
||||
// since some urls are requsted only on
|
||||
// since some urls are requested only on
|
||||
// this event - see
|
||||
// https://webarchive.jira.com/browse/AITFIVE-451
|
||||
var mouseOverEvent = document.createEvent('Events');
|
||||
|
@ -168,7 +168,7 @@ class UmbraBehavior {
|
||||
doTarget(target, action) {
|
||||
// console.log("doing " + action + target.outerHTML);
|
||||
// do mouse over event on target
|
||||
// since some urls are requsted only on
|
||||
// since some urls are requested only on
|
||||
// this event - see
|
||||
// https://webarchive.jira.com/browse/AITFIVE-451
|
||||
var mouseOverEvent = document.createEvent("Events");
|
||||
|
4
setup.py
4
setup.py
@ -2,7 +2,7 @@
|
||||
'''
|
||||
setup.py - brozzler setup script
|
||||
|
||||
Copyright (C) 2014-2021 Internet Archive
|
||||
Copyright (C) 2014-2022 Internet Archive
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@ -32,7 +32,7 @@ def find_package_data(package):
|
||||
|
||||
setuptools.setup(
|
||||
name='brozzler',
|
||||
version='1.5.25',
|
||||
version='1.5.26',
|
||||
description='Distributed web crawling with browsers',
|
||||
url='https://github.com/internetarchive/brozzler',
|
||||
author='Noah Levitt',
|
||||
|
@ -75,7 +75,7 @@ def httpd(request):
|
||||
def do_POST(self):
|
||||
if self.path == '/login-action':
|
||||
self.send_response(200)
|
||||
payload = b'login successfull\n'
|
||||
payload = b'login successful\n'
|
||||
self.send_header('Content-Type', 'text/plain;charset=utf-8')
|
||||
self.send_header('Content-Length', len(payload))
|
||||
self.end_headers()
|
||||
|
Loading…
x
Reference in New Issue
Block a user