mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-20 23:56:34 -04:00
Merge branch 'master' into qa
* master: move behavior_parameters into top level of site configuration install the virtualenv package with pip because the apt version is old and conflicts with the recent version of pip we're using logging tweak rename webconsole to dashboard add login details to behavior parameters initial login additions
This commit is contained in:
commit
fbd540244b
4
.gitmodules
vendored
4
.gitmodules
vendored
@ -1,3 +1,3 @@
|
||||
[submodule "brozzler/webconsole/static/noVNC"]
|
||||
path = brozzler/webconsole/static/noVNC
|
||||
[submodule "noVNC"]
|
||||
path = brozzler/dashboard/static/noVNC
|
||||
url = https://github.com/kanaka/noVNC.git
|
||||
|
12
README.rst
12
README.rst
@ -33,7 +33,7 @@ Getting Started
|
||||
|
||||
The easiest way to get started with brozzler for web archiving is with
|
||||
``brozzler-easy``. Brozzler-easy runs brozzler-worker, warcprox,
|
||||
`pywb <https://github.com/ikreymer/pywb>`_, and brozzler-webconsole, configured
|
||||
`pywb <https://github.com/ikreymer/pywb>`_, and brozzler-dashboard, configured
|
||||
to work with each other, in a single process.
|
||||
|
||||
Mac instructions:
|
||||
@ -118,24 +118,24 @@ must be specified, everything else is optional. For details, see
|
||||
scope:
|
||||
surt: http://(org,example,
|
||||
|
||||
Brozzler Web Console
|
||||
--------------------
|
||||
Brozzler Dashboard
|
||||
------------------
|
||||
|
||||
Brozzler comes with a rudimentary web application for viewing crawl job status.
|
||||
To install the brozzler with dependencies required to run this app, run
|
||||
|
||||
::
|
||||
|
||||
pip install brozzler[webconsole]
|
||||
pip install brozzler[dashboard]
|
||||
|
||||
|
||||
To start the app, run
|
||||
|
||||
::
|
||||
|
||||
brozzler-webconsole
|
||||
brozzler-dashboard
|
||||
|
||||
See ``brozzler-webconsole --help`` for configuration options.
|
||||
See ``brozzler-dashboard --help`` for configuration options.
|
||||
|
||||
Headless Chromium
|
||||
-----------------
|
||||
|
@ -9,7 +9,7 @@ localhost
|
||||
[brozzler-worker]
|
||||
localhost
|
||||
|
||||
[brozzler-webconsole]
|
||||
[brozzler-dashboard]
|
||||
localhost
|
||||
|
||||
[pywb]
|
||||
|
@ -16,7 +16,7 @@ work_dir=/vagrant
|
||||
[brozzler-worker]
|
||||
10.9.9.9
|
||||
|
||||
[brozzler-webconsole]
|
||||
[brozzler-dashboard]
|
||||
10.9.9.9
|
||||
|
||||
[pywb]
|
||||
|
@ -14,10 +14,10 @@
|
||||
roles:
|
||||
- brozzler-worker
|
||||
|
||||
- name: deploy brozzler-webconsole
|
||||
hosts: brozzler-webconsole
|
||||
- name: deploy brozzler-dashboard
|
||||
hosts: brozzler-dashboard
|
||||
roles:
|
||||
- brozzler-webconsole
|
||||
- brozzler-dashboard
|
||||
|
||||
- name: deploy pywb
|
||||
hosts: pywb
|
||||
|
4
ansible/roles/brozzler-dashboard/handlers/main.yml
Normal file
4
ansible/roles/brozzler-dashboard/handlers/main.yml
Normal file
@ -0,0 +1,4 @@
|
||||
---
|
||||
- name: restart brozzler-dashboard
|
||||
service: name=brozzler-dashboard state=restarted
|
||||
become: true
|
20
ansible/roles/brozzler-dashboard/tasks/main.yml
Normal file
20
ansible/roles/brozzler-dashboard/tasks/main.yml
Normal file
@ -0,0 +1,20 @@
|
||||
---
|
||||
- name: mkdir {{venv_root}}/brozzler-dashboard-ve34
|
||||
file: path={{venv_root}}/brozzler-dashboard-ve34 state=directory
|
||||
owner={{user}}
|
||||
become: true
|
||||
- name: install brozzler[dashboard] in virtualenv
|
||||
pip: name='{{brozzler_pip_name}}[dashboard]'
|
||||
virtualenv={{venv_root}}/brozzler-dashboard-ve34
|
||||
virtualenv_python=python3.4
|
||||
extra_args='--no-input --upgrade --pre --cache-dir=/tmp/pip-cache'
|
||||
become: true
|
||||
become_user: '{{user}}'
|
||||
notify:
|
||||
- restart brozzler-dashboard
|
||||
- name: install upstart config /etc/init/brozzler-dashboard.conf
|
||||
become: true
|
||||
template: src=templates/brozzler-dashboard.conf.j2
|
||||
dest=/etc/init/brozzler-dashboard.conf
|
||||
notify:
|
||||
- restart brozzler-dashboard
|
@ -1,10 +1,10 @@
|
||||
description "brozzler-webconsole"
|
||||
description "brozzler-dashboard"
|
||||
|
||||
start on runlevel [2345]
|
||||
stop on runlevel [!2345]
|
||||
|
||||
env PYTHONPATH={{venv_root}}/brozzler-webconsole-ve34/lib/python3.4/site-packages
|
||||
env PATH={{venv_root}}/brozzler-webconsole-ve34/bin:/usr/bin:/bin
|
||||
env PYTHONPATH={{venv_root}}/brozzler-dashboard-ve34/lib/python3.4/site-packages
|
||||
env PATH={{venv_root}}/brozzler-dashboard-ve34/bin:/usr/bin:/bin
|
||||
env LC_ALL=C.UTF-8
|
||||
|
||||
env WAYBACK_BASEURL=http://{{groups['pywb'][0]}}:8880/brozzler
|
||||
@ -15,4 +15,4 @@ setuid {{user}}
|
||||
|
||||
console log
|
||||
|
||||
exec gunicorn --bind=0.0.0.0:8881 brozzler.webconsole:app
|
||||
exec gunicorn --bind=0.0.0.0:8881 brozzler.dashboard:app
|
@ -1,4 +0,0 @@
|
||||
---
|
||||
- name: restart brozzler-webconsole
|
||||
service: name=brozzler-webconsole state=restarted
|
||||
become: true
|
@ -1,20 +0,0 @@
|
||||
---
|
||||
- name: mkdir {{venv_root}}/brozzler-webconsole-ve34
|
||||
file: path={{venv_root}}/brozzler-webconsole-ve34 state=directory
|
||||
owner={{user}}
|
||||
become: true
|
||||
- name: install brozzler[webconsole] in virtualenv
|
||||
pip: name='{{brozzler_pip_name}}[webconsole]'
|
||||
virtualenv={{venv_root}}/brozzler-webconsole-ve34
|
||||
virtualenv_python=python3.4
|
||||
extra_args='--no-input --upgrade --pre --cache-dir=/tmp/pip-cache'
|
||||
become: true
|
||||
become_user: '{{user}}'
|
||||
notify:
|
||||
- restart brozzler-webconsole
|
||||
- name: install upstart config /etc/init/brozzler-webconsole.conf
|
||||
become: true
|
||||
template: src=templates/brozzler-webconsole.conf.j2
|
||||
dest=/etc/init/brozzler-webconsole.conf
|
||||
notify:
|
||||
- restart brozzler-webconsole
|
@ -9,7 +9,6 @@
|
||||
become: true
|
||||
apt: name={{item}} state=present
|
||||
with_items:
|
||||
- python-virtualenv
|
||||
- vnc4server
|
||||
- chromium-browser
|
||||
- xfonts-base
|
||||
|
@ -10,6 +10,6 @@ console log
|
||||
env PYTHONPATH={{venv_root}}/websockify-ve34/lib/python3.4/site-packages
|
||||
env PATH={{venv_root}}/websockify-ve34/bin:/usr/bin:/bin
|
||||
|
||||
# port 8901 is hard-coded in brozzler/webconsole/static/partials/workers.html
|
||||
# port 8901 is hard-coded in brozzler/dashboard/static/partials/workers.html
|
||||
exec nice websockify 0.0.0.0:8901 localhost:5901
|
||||
|
||||
|
@ -1,24 +1,28 @@
|
||||
---
|
||||
## # get latest pip (had problems with version from apt-get, specifically
|
||||
## # "pip install pyopenssl" did not install the dependency "cryptography")
|
||||
## # http://stackoverflow.com/questions/34587473/what-is-get-pip-py-checksum-where-can-i-get-it-for-sure
|
||||
## - name: install setuptools for python 2 and 3
|
||||
## become: true
|
||||
## apt: name={{item}} state=present
|
||||
## with_items:
|
||||
## - python-setuptools
|
||||
## - python3-setuptools
|
||||
## - name: download pip-8.1.2.tar.gz
|
||||
## get_url:
|
||||
## url: https://pypi.python.org/packages/e7/a8/7556133689add8d1a54c0b14aeff0acb03c64707ce100ecd53934da1aa13/pip-8.1.2.tar.gz
|
||||
## dest: /tmp
|
||||
## checksum: sha1:1c13c247967ec5bee6de5fd104c5d78ba30951c7
|
||||
## - name: extract pip-8.1.2.tar.gz
|
||||
## unarchive: src=/tmp/pip-8.1.2.tar.gz dest=/tmp copy=no
|
||||
## - name: run "python3 setup.py install" in /tmp/pip-8.1.2
|
||||
## command: python3 setup.py install chdir=/tmp/pip-8.1.2
|
||||
## creates=/usr/local/lib/python2.7/dist-packages/pip-8.1.2-py2.7.egg/pip/__init__.py
|
||||
## become: true
|
||||
# get latest pip (had problems with version from apt-get, specifically
|
||||
# "pip install pyopenssl" did not install the dependency "cryptography")
|
||||
# http://stackoverflow.com/questions/34587473/what-is-get-pip-py-checksum-where-can-i-get-it-for-sure
|
||||
- name: install setuptools for python 2 and 3
|
||||
become: true
|
||||
apt: name={{item}} state=present
|
||||
with_items:
|
||||
- python-setuptools
|
||||
- python3-setuptools
|
||||
- name: download pip-8.1.2.tar.gz
|
||||
get_url:
|
||||
url: https://pypi.python.org/packages/e7/a8/7556133689add8d1a54c0b14aeff0acb03c64707ce100ecd53934da1aa13/pip-8.1.2.tar.gz
|
||||
dest: /tmp
|
||||
checksum: sha1:1c13c247967ec5bee6de5fd104c5d78ba30951c7
|
||||
- name: extract pip-8.1.2.tar.gz
|
||||
unarchive: src=/tmp/pip-8.1.2.tar.gz dest=/tmp copy=no
|
||||
- name: run "python3 setup.py install" in /tmp/pip-8.1.2
|
||||
command: python3 setup.py install chdir=/tmp/pip-8.1.2
|
||||
creates=/usr/local/lib/python2.7/dist-packages/pip-8.1.2-py2.7.egg/pip/__init__.py
|
||||
become: true
|
||||
- name: run "pip install virtualenv"
|
||||
command: pip install virtualenv
|
||||
creates=/usr/local/lib/python3.4/dist-packages/virtualenv.py
|
||||
become: true
|
||||
- command: id {{user}}
|
||||
register: id_user
|
||||
ignore_errors: true
|
||||
|
@ -4,7 +4,6 @@
|
||||
apt: name={{item}} state=present
|
||||
with_items:
|
||||
- gcc
|
||||
- python-virtualenv
|
||||
- python3.4
|
||||
- libpython3.4-dev
|
||||
- libffi-dev
|
||||
|
12
brozzler/cli.py
Normal file → Executable file
12
brozzler/cli.py
Normal file → Executable file
@ -120,6 +120,12 @@ def brozzle_page():
|
||||
'-e', '--chrome-exe', dest='chrome_exe',
|
||||
default=suggest_default_chrome_exe(),
|
||||
help='executable to use to invoke chrome')
|
||||
arg_parser.add_argument(
|
||||
'--behavior-parameters', dest='behavior_parameters',
|
||||
default=None, help=(
|
||||
'json blob of parameters to populate the javascript behavior '
|
||||
'template, e.g. {"parameter_username":"x",'
|
||||
'"parameter_password":"y"}'))
|
||||
arg_parser.add_argument(
|
||||
'--proxy', dest='proxy', default=None,
|
||||
help='http proxy')
|
||||
@ -133,9 +139,13 @@ def brozzle_page():
|
||||
args = arg_parser.parse_args(args=sys.argv[1:])
|
||||
_configure_logging(args)
|
||||
|
||||
behavior_parameters = {}
|
||||
if args.behavior_parameters:
|
||||
behavior_parameters = json.loads(args.behavior_parameters)
|
||||
site = brozzler.Site(
|
||||
id=-1, seed=args.url, proxy=args.proxy,
|
||||
enable_warcprox_features=args.enable_warcprox_features)
|
||||
enable_warcprox_features=args.enable_warcprox_features,
|
||||
behavior_parameters=behavior_parameters)
|
||||
page = brozzler.Page(url=args.url, site_id=site.id)
|
||||
worker = brozzler.BrozzlerWorker(frontier=None)
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
'''
|
||||
brozzler/webconsole/__init__.py - flask app for brozzler web console, defines
|
||||
api endspoints etc
|
||||
brozzler/dashboard/__init__.py - flask app for brozzler dashboard, defines api
|
||||
endspoints etc
|
||||
|
||||
Copyright (C) 2014-2016 Internet Archive
|
||||
|
||||
@ -24,7 +24,7 @@ try:
|
||||
except ImportError as e:
|
||||
logging.critical(
|
||||
'%s: %s\n\nYou might need to run "pip install '
|
||||
'brozzler[webconsole]".\nSee README.rst for more information.',
|
||||
'brozzler[dashboard]".\nSee README.rst for more information.',
|
||||
type(e).__name__, e)
|
||||
sys.exit(1)
|
||||
import rethinkstuff
|
||||
@ -210,11 +210,11 @@ try:
|
||||
import gunicorn.app.base
|
||||
from gunicorn.six import iteritems
|
||||
|
||||
class GunicornBrozzlerWebConsole(gunicorn.app.base.BaseApplication):
|
||||
class GunicornBrozzlerDashboard(gunicorn.app.base.BaseApplication):
|
||||
def __init__(self, app, options=None):
|
||||
self.options = options or {}
|
||||
self.application = app
|
||||
super(GunicornBrozzlerWebConsole, self).__init__()
|
||||
super(GunicornBrozzlerDashboard, self).__init__()
|
||||
|
||||
def load_config(self):
|
||||
config = dict(
|
||||
@ -227,12 +227,12 @@ try:
|
||||
return self.application
|
||||
|
||||
def run(**options):
|
||||
logging.info('running brozzler-webconsole using gunicorn')
|
||||
GunicornBrozzlerWebConsole(app, options).run()
|
||||
logging.info('running brozzler-dashboard using gunicorn')
|
||||
GunicornBrozzlerDashboard(app, options).run()
|
||||
|
||||
except ImportError:
|
||||
def run():
|
||||
logging.info('running brozzler-webconsole using simple flask app.run')
|
||||
logging.info('running brozzler-dashboard using simple flask app.run')
|
||||
app.run()
|
||||
|
||||
def main():
|
||||
@ -241,10 +241,10 @@ def main():
|
||||
prog=os.path.basename(sys.argv[0]),
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
description=(
|
||||
'brozzler-webconsole - web application for viewing brozzler '
|
||||
'brozzler-dashboard - web application for viewing brozzler '
|
||||
'crawl status'),
|
||||
epilog=(
|
||||
'brozzler-webconsole has no command line options, but can be '
|
||||
'brozzler-dashboard has no command line options, but can be '
|
||||
'configured using the following environment variables:\n\n'
|
||||
' RETHINKDB_SERVERS rethinkdb servers, e.g. db0.foo.org,'
|
||||
'db0.foo.org:38015,db1.foo.org (default: localhost)\n'
|
Before Width: | Height: | Size: 9.1 KiB After Width: | Height: | Size: 9.1 KiB |
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* brozzler-webconsole/static/js/app.js - brozzler console angularjs code
|
||||
* brozzler/dashboard/static/js/app.js - brozzler dashboard angularjs code
|
||||
*
|
||||
* Copyright (C) 2014-2016 Internet Archive
|
||||
*
|
||||
@ -18,12 +18,12 @@
|
||||
|
||||
"use strict";
|
||||
|
||||
var brozzlerConsoleApp = angular.module("brozzlerConsoleApp", [
|
||||
var brozzlerDashboardApp = angular.module("brozzlerDashboardApp", [
|
||||
"ngRoute",
|
||||
"brozzlerControllers",
|
||||
]);
|
||||
|
||||
brozzlerConsoleApp.config(["$routeProvider", "$locationProvider",
|
||||
brozzlerDashboardApp.config(["$routeProvider", "$locationProvider",
|
||||
function($routeProvider, $locationProvider) {
|
||||
$routeProvider.
|
||||
when("/workers", {
|
||||
@ -53,7 +53,7 @@ brozzlerConsoleApp.config(["$routeProvider", "$locationProvider",
|
||||
}]);
|
||||
|
||||
// copied from https://bitbucket.org/webarchive/ait5/src/master/archiveit/static/app/js/filters/ByteFormat.js
|
||||
brozzlerConsoleApp.filter("byteformat", function() {
|
||||
brozzlerDashboardApp.filter("byteformat", function() {
|
||||
return function(bytes, precision) {
|
||||
var bytes_f = parseFloat(bytes);
|
||||
if (bytes_f == 0 || isNaN(bytes_f) || !isFinite(bytes_f)) return "0";
|
1
brozzler/dashboard/static/noVNC
Submodule
1
brozzler/dashboard/static/noVNC
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit ef887cdb123df21b61043ff025e6208631e9eb7b
|
@ -1,12 +1,12 @@
|
||||
<!doctype html>
|
||||
<html lang="en" ng-app="brozzlerConsoleApp">
|
||||
<html lang="en" ng-app="brozzlerDashboardApp">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
|
||||
<meta name="apple-mobile-web-app-capable" content="yes" />
|
||||
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />
|
||||
<title>Brozzler Console</title>
|
||||
<title>Brozzler Dashboard</title>
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.3.5/css/bootstrap.css">
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.3.5/css/bootstrap-theme.css">
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.4.0/css/font-awesome.css">
|
@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
'''
|
||||
brozzler-easy - brozzler-worker, warcprox, pywb, and brozzler-webconsole all
|
||||
brozzler-easy - brozzler-worker, warcprox, pywb, and brozzler-dashboard all
|
||||
working together in a single process
|
||||
|
||||
Copyright (C) 2016 Internet Archive
|
||||
@ -27,7 +27,7 @@ try:
|
||||
import brozzler.pywb
|
||||
import wsgiref.simple_server
|
||||
import wsgiref.handlers
|
||||
import brozzler.webconsole
|
||||
import brozzler.dashboard
|
||||
except ImportError as e:
|
||||
logging.critical(
|
||||
'%s: %s\n\nYou might need to run "pip install '
|
||||
@ -51,7 +51,7 @@ def _build_arg_parser(prog=os.path.basename(sys.argv[0])):
|
||||
prog=prog, formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
description=(
|
||||
'brozzler-easy - easy deployment of brozzler, with '
|
||||
'brozzler-worker, warcprox, pywb, and brozzler-webconsole all '
|
||||
'brozzler-worker, warcprox, pywb, and brozzler-dashboard all '
|
||||
'running in a single process'))
|
||||
|
||||
# common args
|
||||
@ -104,14 +104,14 @@ def _build_arg_parser(prog=os.path.basename(sys.argv[0])):
|
||||
'--pywb-port', dest='pywb_port', type=int,
|
||||
default=8880, help='pywb wayback port')
|
||||
|
||||
# webconsole args
|
||||
# dashboard args
|
||||
arg_parser.add_argument(
|
||||
'--webconsole-address', dest='webconsole_address',
|
||||
'--dashboard-address', dest='dashboard_address',
|
||||
default='localhost',
|
||||
help='brozzler web console address to listen on')
|
||||
help='brozzler dashboard address to listen on')
|
||||
arg_parser.add_argument(
|
||||
'--webconsole-port', dest='webconsole_port',
|
||||
type=int, default=8881, help='brozzler web console port')
|
||||
'--dashboard-port', dest='dashboard_port',
|
||||
type=int, default=8881, help='brozzler dashboard port')
|
||||
|
||||
# common at the bottom args
|
||||
arg_parser.add_argument(
|
||||
@ -143,12 +143,12 @@ class BrozzlerEasyController:
|
||||
self._warcprox_args(args))
|
||||
self.brozzler_worker = self._init_brozzler_worker(args)
|
||||
self.pywb_httpd = self._init_pywb(args)
|
||||
self.webconsole_httpd = self._init_brozzler_webconsole(args)
|
||||
self.dashboard_httpd = self._init_brozzler_dashboard(args)
|
||||
|
||||
def _init_brozzler_webconsole(self, args):
|
||||
def _init_brozzler_dashboard(self, args):
|
||||
return wsgiref.simple_server.make_server(
|
||||
args.webconsole_address, args.webconsole_port,
|
||||
brozzler.webconsole.app, ThreadingWSGIServer)
|
||||
args.dashboard_address, args.dashboard_port,
|
||||
brozzler.dashboard.app, ThreadingWSGIServer)
|
||||
|
||||
def _init_brozzler_worker(self, args):
|
||||
r = rethinkstuff.Rethinker(
|
||||
@ -212,13 +212,13 @@ class BrozzlerEasyController:
|
||||
threading.Thread(target=self.pywb_httpd.serve_forever).start()
|
||||
|
||||
self.logger.info(
|
||||
'starting brozzler-webconsole at %s:%s',
|
||||
*self.webconsole_httpd.server_address)
|
||||
threading.Thread(target=self.webconsole_httpd.serve_forever).start()
|
||||
'starting brozzler-dashboard at %s:%s',
|
||||
*self.dashboard_httpd.server_address)
|
||||
threading.Thread(target=self.dashboard_httpd.serve_forever).start()
|
||||
|
||||
def shutdown(self):
|
||||
self.logger.info('shutting down brozzler-webconsole')
|
||||
self.webconsole_httpd.shutdown()
|
||||
self.logger.info('shutting down brozzler-dashboard')
|
||||
self.dashboard_httpd.shutdown()
|
||||
|
||||
self.logger.info('shutting down brozzler-worker')
|
||||
self.brozzler_worker.shutdown_now()
|
||||
|
@ -75,6 +75,8 @@ def new_job(frontier, job_conf):
|
||||
sites = []
|
||||
for seed_conf in job_conf["seeds"]:
|
||||
merged_conf = merge(seed_conf, job_conf)
|
||||
if "login" in merged_conf and "metadata" in merged_conf:
|
||||
merged_conf["metadata"]["login"] = merged_conf["login"]
|
||||
site = brozzler.Site(
|
||||
job_id=job.id, seed=merged_conf["url"],
|
||||
scope=merged_conf.get("scope"),
|
||||
|
@ -69,6 +69,9 @@ id:
|
||||
user_agent:
|
||||
type: string
|
||||
|
||||
behavior_parameters:
|
||||
type: dict
|
||||
|
||||
seeds:
|
||||
type: list
|
||||
required: true
|
||||
|
@ -96,7 +96,7 @@ class Site(brozzler.BaseDictable):
|
||||
status="ACTIVE", claimed=False, start_time=None,
|
||||
last_disclaimed=_EPOCH_UTC, last_claimed_by=None,
|
||||
last_claimed=_EPOCH_UTC, metadata={}, remember_outlinks=None,
|
||||
cookie_db=None, user_agent=None):
|
||||
cookie_db=None, user_agent=None, behavior_parameters=None):
|
||||
|
||||
self.seed = seed
|
||||
self.id = id
|
||||
@ -117,6 +117,7 @@ class Site(brozzler.BaseDictable):
|
||||
self.remember_outlinks = remember_outlinks
|
||||
self.cookie_db = cookie_db
|
||||
self.user_agent = user_agent
|
||||
self.behavior_parameters = behavior_parameters
|
||||
|
||||
self.scope = scope or {}
|
||||
if not "surt" in self.scope:
|
||||
|
@ -1 +0,0 @@
|
||||
Subproject commit 6a90803feb124791960e3962e328aa3cfb729aeb
|
@ -273,6 +273,7 @@ class BrozzlerWorker:
|
||||
browser.start(proxy=self._proxy(site), cookie_db=site.cookie_db)
|
||||
outlinks = browser.browse_page(
|
||||
page.url, extra_headers=site.extra_headers(),
|
||||
behavior_parameters=site.behavior_parameters,
|
||||
user_agent=site.user_agent,
|
||||
on_screenshot=_on_screenshot,
|
||||
on_url_change=page.note_redirect)
|
||||
@ -388,7 +389,9 @@ class BrozzlerWorker:
|
||||
try:
|
||||
site = self._frontier.claim_site("{}:{}".format(
|
||||
socket.gethostname(), browser.chrome_port))
|
||||
self.logger.info("brozzling site %s", site)
|
||||
self.logger.info(
|
||||
"brozzling site (proxy=%s) %s",
|
||||
repr(self._proxy(site)), site)
|
||||
th = threading.Thread(
|
||||
target=lambda: self._brozzle_site(
|
||||
browser, site),
|
||||
|
10
setup.py
10
setup.py
@ -32,17 +32,17 @@ def find_package_data(package):
|
||||
|
||||
setuptools.setup(
|
||||
name='brozzler',
|
||||
version='1.1b7.dev109',
|
||||
version='1.1b7.dev113',
|
||||
description='Distributed web crawling with browsers',
|
||||
url='https://github.com/internetarchive/brozzler',
|
||||
author='Noah Levitt',
|
||||
author_email='nlevitt@archive.org',
|
||||
long_description=open('README.rst', mode='rb').read().decode('UTF-8'),
|
||||
license='Apache License 2.0',
|
||||
packages=['brozzler', 'brozzler.webconsole'],
|
||||
packages=['brozzler', 'brozzler.dashboard'],
|
||||
package_data={
|
||||
'brozzler': ['behaviors.d/*.js*', 'behaviors.yaml', 'job_schema.yaml'],
|
||||
'brozzler.webconsole': find_package_data('brozzler.webconsole'),
|
||||
'brozzler.dashboard': find_package_data('brozzler.dashboard'),
|
||||
},
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
@ -51,7 +51,7 @@ setuptools.setup(
|
||||
'brozzler-new-site=brozzler.cli:brozzler_new_site',
|
||||
'brozzler-worker=brozzler.cli:brozzler_worker',
|
||||
'brozzler-ensure-tables=brozzler.cli:brozzler_ensure_tables',
|
||||
'brozzler-webconsole=brozzler.webconsole:main',
|
||||
'brozzler-dashboard=brozzler.dashboard:main',
|
||||
'brozzler-easy=brozzler.easy:main',
|
||||
'brozzler-wayback=brozzler.pywb:main',
|
||||
],
|
||||
@ -70,7 +70,7 @@ setuptools.setup(
|
||||
'cerberus==1.0.1',
|
||||
],
|
||||
extras_require={
|
||||
'webconsole': ['flask>=0.11', 'gunicorn'],
|
||||
'dashboard': ['flask>=0.11', 'gunicorn'],
|
||||
'easy': ['warcprox>=2.0b1', 'pywb', 'flask>=0.11', 'gunicorn'],
|
||||
},
|
||||
zip_safe=False,
|
||||
|
@ -86,7 +86,7 @@ def test_services_up():
|
||||
# if the connect fails an exception is raised and the test fails
|
||||
s.connect(('localhost', 8880))
|
||||
|
||||
# check that brozzler webconsole is listening
|
||||
# check that brozzler dashboard is listening
|
||||
with socket.socket() as s:
|
||||
# if the connect fails an exception is raised and the test fails
|
||||
s.connect(('localhost', 8881))
|
||||
|
@ -6,7 +6,7 @@ echo service status:
|
||||
vagrant ssh -- 'status warcprox ;
|
||||
status Xvnc ;
|
||||
status brozzler-worker ;
|
||||
status brozzler-webconsole ;
|
||||
status brozzler-dashboard ;
|
||||
status vnc-websock'
|
||||
echo
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user