mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
Merge branch 'master' into adds-hop-path-support
This commit is contained in:
commit
d61cec399e
@ -14,7 +14,7 @@ or Chromium) to fetch pages and embedded URLs and to extract links. It employs
|
|||||||
capabilities and `rethinkdb <https://github.com/rethinkdb/rethinkdb>`_ to
|
capabilities and `rethinkdb <https://github.com/rethinkdb/rethinkdb>`_ to
|
||||||
manage crawl state.
|
manage crawl state.
|
||||||
|
|
||||||
Brozzler is designed to work in conjuction with warcprox for web archiving.
|
Brozzler is designed to work in conjunction with warcprox for web archiving.
|
||||||
|
|
||||||
Requirements
|
Requirements
|
||||||
------------
|
------------
|
||||||
|
@ -168,7 +168,7 @@ class Chrome:
|
|||||||
'--remote-debugging-port=%s' % self.port,
|
'--remote-debugging-port=%s' % self.port,
|
||||||
'--use-mock-keychain', # mac thing
|
'--use-mock-keychain', # mac thing
|
||||||
'--user-data-dir=%s' % self._chrome_user_data_dir,
|
'--user-data-dir=%s' % self._chrome_user_data_dir,
|
||||||
'--disable-background-networking',
|
'--disable-background-networking', '--disable-breakpad',
|
||||||
'--disable-renderer-backgrounding', '--disable-hang-monitor',
|
'--disable-renderer-backgrounding', '--disable-hang-monitor',
|
||||||
'--disable-background-timer-throttling', '--mute-audio',
|
'--disable-background-timer-throttling', '--mute-audio',
|
||||||
'--disable-web-sockets',
|
'--disable-web-sockets',
|
||||||
@ -176,7 +176,8 @@ class Chrome:
|
|||||||
'--disable-first-run-ui', '--no-first-run',
|
'--disable-first-run-ui', '--no-first-run',
|
||||||
'--homepage=about:blank', '--disable-direct-npapi-requests',
|
'--homepage=about:blank', '--disable-direct-npapi-requests',
|
||||||
'--disable-web-security', '--disable-notifications',
|
'--disable-web-security', '--disable-notifications',
|
||||||
'--disable-extensions', '--disable-save-password-bubble']
|
'--disable-extensions', '--disable-save-password-bubble',
|
||||||
|
'--disable-sync']
|
||||||
|
|
||||||
extra_chrome_args = os.environ.get('BROZZLER_EXTRA_CHROME_ARGS')
|
extra_chrome_args = os.environ.get('BROZZLER_EXTRA_CHROME_ARGS')
|
||||||
if extra_chrome_args:
|
if extra_chrome_args:
|
||||||
|
@ -65,7 +65,7 @@ var umbraBehavior = {
|
|||||||
if (where == 0) {
|
if (where == 0) {
|
||||||
console.log("clicking on " + clickRadioButtonTargets[k]);
|
console.log("clicking on " + clickRadioButtonTargets[k]);
|
||||||
// do mouse over event on click target
|
// do mouse over event on click target
|
||||||
// since some urls are requsted only on
|
// since some urls are requested only on
|
||||||
// this event - see
|
// this event - see
|
||||||
// https://webarchive.jira.com/browse/AITFIVE-451
|
// https://webarchive.jira.com/browse/AITFIVE-451
|
||||||
var mouseOverEvent = document.createEvent('Events');
|
var mouseOverEvent = document.createEvent('Events');
|
||||||
|
@ -46,7 +46,7 @@ var umbraBehavior = {
|
|||||||
if (where == 0) {
|
if (where == 0) {
|
||||||
console.log("clicking on " + clickTargets[i].outerHTML);
|
console.log("clicking on " + clickTargets[i].outerHTML);
|
||||||
// do mouse over event on click target
|
// do mouse over event on click target
|
||||||
// since some urls are requsted only on
|
// since some urls are requested only on
|
||||||
// this event - see
|
// this event - see
|
||||||
// https://webarchive.jira.com/browse/AITFIVE-451
|
// https://webarchive.jira.com/browse/AITFIVE-451
|
||||||
var mouseOverEvent = document.createEvent('Events');
|
var mouseOverEvent = document.createEvent('Events');
|
||||||
|
@ -168,7 +168,7 @@ class UmbraBehavior {
|
|||||||
doTarget(target, action) {
|
doTarget(target, action) {
|
||||||
// console.log("doing " + action + target.outerHTML);
|
// console.log("doing " + action + target.outerHTML);
|
||||||
// do mouse over event on target
|
// do mouse over event on target
|
||||||
// since some urls are requsted only on
|
// since some urls are requested only on
|
||||||
// this event - see
|
// this event - see
|
||||||
// https://webarchive.jira.com/browse/AITFIVE-451
|
// https://webarchive.jira.com/browse/AITFIVE-451
|
||||||
var mouseOverEvent = document.createEvent("Events");
|
var mouseOverEvent = document.createEvent("Events");
|
||||||
|
4
setup.py
4
setup.py
@ -2,7 +2,7 @@
|
|||||||
'''
|
'''
|
||||||
setup.py - brozzler setup script
|
setup.py - brozzler setup script
|
||||||
|
|
||||||
Copyright (C) 2014-2021 Internet Archive
|
Copyright (C) 2014-2022 Internet Archive
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
you may not use this file except in compliance with the License.
|
you may not use this file except in compliance with the License.
|
||||||
@ -32,7 +32,7 @@ def find_package_data(package):
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.5.25',
|
version='1.5.26',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
@ -75,7 +75,7 @@ def httpd(request):
|
|||||||
def do_POST(self):
|
def do_POST(self):
|
||||||
if self.path == '/login-action':
|
if self.path == '/login-action':
|
||||||
self.send_response(200)
|
self.send_response(200)
|
||||||
payload = b'login successfull\n'
|
payload = b'login successful\n'
|
||||||
self.send_header('Content-Type', 'text/plain;charset=utf-8')
|
self.send_header('Content-Type', 'text/plain;charset=utf-8')
|
||||||
self.send_header('Content-Length', len(payload))
|
self.send_header('Content-Length', len(payload))
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user