mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-08 22:42:32 -04:00
Merge branch 'master' into qa
* master: move behavior_parameters into top level of site configuration install the virtualenv package with pip because the apt version is old and conflicts with the recent version of pip we're using logging tweak rename webconsole to dashboard add login details to behavior parameters initial login additions
This commit is contained in:
commit
fbd540244b
34 changed files with 129 additions and 108 deletions
4
.gitmodules
vendored
4
.gitmodules
vendored
|
@ -1,3 +1,3 @@
|
||||||
[submodule "brozzler/webconsole/static/noVNC"]
|
[submodule "noVNC"]
|
||||||
path = brozzler/webconsole/static/noVNC
|
path = brozzler/dashboard/static/noVNC
|
||||||
url = https://github.com/kanaka/noVNC.git
|
url = https://github.com/kanaka/noVNC.git
|
||||||
|
|
12
README.rst
12
README.rst
|
@ -33,7 +33,7 @@ Getting Started
|
||||||
|
|
||||||
The easiest way to get started with brozzler for web archiving is with
|
The easiest way to get started with brozzler for web archiving is with
|
||||||
``brozzler-easy``. Brozzler-easy runs brozzler-worker, warcprox,
|
``brozzler-easy``. Brozzler-easy runs brozzler-worker, warcprox,
|
||||||
`pywb <https://github.com/ikreymer/pywb>`_, and brozzler-webconsole, configured
|
`pywb <https://github.com/ikreymer/pywb>`_, and brozzler-dashboard, configured
|
||||||
to work with each other, in a single process.
|
to work with each other, in a single process.
|
||||||
|
|
||||||
Mac instructions:
|
Mac instructions:
|
||||||
|
@ -118,24 +118,24 @@ must be specified, everything else is optional. For details, see
|
||||||
scope:
|
scope:
|
||||||
surt: http://(org,example,
|
surt: http://(org,example,
|
||||||
|
|
||||||
Brozzler Web Console
|
Brozzler Dashboard
|
||||||
--------------------
|
------------------
|
||||||
|
|
||||||
Brozzler comes with a rudimentary web application for viewing crawl job status.
|
Brozzler comes with a rudimentary web application for viewing crawl job status.
|
||||||
To install the brozzler with dependencies required to run this app, run
|
To install the brozzler with dependencies required to run this app, run
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
pip install brozzler[webconsole]
|
pip install brozzler[dashboard]
|
||||||
|
|
||||||
|
|
||||||
To start the app, run
|
To start the app, run
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
brozzler-webconsole
|
brozzler-dashboard
|
||||||
|
|
||||||
See ``brozzler-webconsole --help`` for configuration options.
|
See ``brozzler-dashboard --help`` for configuration options.
|
||||||
|
|
||||||
Headless Chromium
|
Headless Chromium
|
||||||
-----------------
|
-----------------
|
||||||
|
|
|
@ -9,7 +9,7 @@ localhost
|
||||||
[brozzler-worker]
|
[brozzler-worker]
|
||||||
localhost
|
localhost
|
||||||
|
|
||||||
[brozzler-webconsole]
|
[brozzler-dashboard]
|
||||||
localhost
|
localhost
|
||||||
|
|
||||||
[pywb]
|
[pywb]
|
||||||
|
|
|
@ -16,7 +16,7 @@ work_dir=/vagrant
|
||||||
[brozzler-worker]
|
[brozzler-worker]
|
||||||
10.9.9.9
|
10.9.9.9
|
||||||
|
|
||||||
[brozzler-webconsole]
|
[brozzler-dashboard]
|
||||||
10.9.9.9
|
10.9.9.9
|
||||||
|
|
||||||
[pywb]
|
[pywb]
|
||||||
|
|
|
@ -14,10 +14,10 @@
|
||||||
roles:
|
roles:
|
||||||
- brozzler-worker
|
- brozzler-worker
|
||||||
|
|
||||||
- name: deploy brozzler-webconsole
|
- name: deploy brozzler-dashboard
|
||||||
hosts: brozzler-webconsole
|
hosts: brozzler-dashboard
|
||||||
roles:
|
roles:
|
||||||
- brozzler-webconsole
|
- brozzler-dashboard
|
||||||
|
|
||||||
- name: deploy pywb
|
- name: deploy pywb
|
||||||
hosts: pywb
|
hosts: pywb
|
||||||
|
|
4
ansible/roles/brozzler-dashboard/handlers/main.yml
Normal file
4
ansible/roles/brozzler-dashboard/handlers/main.yml
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
---
|
||||||
|
- name: restart brozzler-dashboard
|
||||||
|
service: name=brozzler-dashboard state=restarted
|
||||||
|
become: true
|
20
ansible/roles/brozzler-dashboard/tasks/main.yml
Normal file
20
ansible/roles/brozzler-dashboard/tasks/main.yml
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
---
|
||||||
|
- name: mkdir {{venv_root}}/brozzler-dashboard-ve34
|
||||||
|
file: path={{venv_root}}/brozzler-dashboard-ve34 state=directory
|
||||||
|
owner={{user}}
|
||||||
|
become: true
|
||||||
|
- name: install brozzler[dashboard] in virtualenv
|
||||||
|
pip: name='{{brozzler_pip_name}}[dashboard]'
|
||||||
|
virtualenv={{venv_root}}/brozzler-dashboard-ve34
|
||||||
|
virtualenv_python=python3.4
|
||||||
|
extra_args='--no-input --upgrade --pre --cache-dir=/tmp/pip-cache'
|
||||||
|
become: true
|
||||||
|
become_user: '{{user}}'
|
||||||
|
notify:
|
||||||
|
- restart brozzler-dashboard
|
||||||
|
- name: install upstart config /etc/init/brozzler-dashboard.conf
|
||||||
|
become: true
|
||||||
|
template: src=templates/brozzler-dashboard.conf.j2
|
||||||
|
dest=/etc/init/brozzler-dashboard.conf
|
||||||
|
notify:
|
||||||
|
- restart brozzler-dashboard
|
|
@ -1,10 +1,10 @@
|
||||||
description "brozzler-webconsole"
|
description "brozzler-dashboard"
|
||||||
|
|
||||||
start on runlevel [2345]
|
start on runlevel [2345]
|
||||||
stop on runlevel [!2345]
|
stop on runlevel [!2345]
|
||||||
|
|
||||||
env PYTHONPATH={{venv_root}}/brozzler-webconsole-ve34/lib/python3.4/site-packages
|
env PYTHONPATH={{venv_root}}/brozzler-dashboard-ve34/lib/python3.4/site-packages
|
||||||
env PATH={{venv_root}}/brozzler-webconsole-ve34/bin:/usr/bin:/bin
|
env PATH={{venv_root}}/brozzler-dashboard-ve34/bin:/usr/bin:/bin
|
||||||
env LC_ALL=C.UTF-8
|
env LC_ALL=C.UTF-8
|
||||||
|
|
||||||
env WAYBACK_BASEURL=http://{{groups['pywb'][0]}}:8880/brozzler
|
env WAYBACK_BASEURL=http://{{groups['pywb'][0]}}:8880/brozzler
|
||||||
|
@ -15,4 +15,4 @@ setuid {{user}}
|
||||||
|
|
||||||
console log
|
console log
|
||||||
|
|
||||||
exec gunicorn --bind=0.0.0.0:8881 brozzler.webconsole:app
|
exec gunicorn --bind=0.0.0.0:8881 brozzler.dashboard:app
|
|
@ -1,4 +0,0 @@
|
||||||
---
|
|
||||||
- name: restart brozzler-webconsole
|
|
||||||
service: name=brozzler-webconsole state=restarted
|
|
||||||
become: true
|
|
|
@ -1,20 +0,0 @@
|
||||||
---
|
|
||||||
- name: mkdir {{venv_root}}/brozzler-webconsole-ve34
|
|
||||||
file: path={{venv_root}}/brozzler-webconsole-ve34 state=directory
|
|
||||||
owner={{user}}
|
|
||||||
become: true
|
|
||||||
- name: install brozzler[webconsole] in virtualenv
|
|
||||||
pip: name='{{brozzler_pip_name}}[webconsole]'
|
|
||||||
virtualenv={{venv_root}}/brozzler-webconsole-ve34
|
|
||||||
virtualenv_python=python3.4
|
|
||||||
extra_args='--no-input --upgrade --pre --cache-dir=/tmp/pip-cache'
|
|
||||||
become: true
|
|
||||||
become_user: '{{user}}'
|
|
||||||
notify:
|
|
||||||
- restart brozzler-webconsole
|
|
||||||
- name: install upstart config /etc/init/brozzler-webconsole.conf
|
|
||||||
become: true
|
|
||||||
template: src=templates/brozzler-webconsole.conf.j2
|
|
||||||
dest=/etc/init/brozzler-webconsole.conf
|
|
||||||
notify:
|
|
||||||
- restart brozzler-webconsole
|
|
|
@ -9,7 +9,6 @@
|
||||||
become: true
|
become: true
|
||||||
apt: name={{item}} state=present
|
apt: name={{item}} state=present
|
||||||
with_items:
|
with_items:
|
||||||
- python-virtualenv
|
|
||||||
- vnc4server
|
- vnc4server
|
||||||
- chromium-browser
|
- chromium-browser
|
||||||
- xfonts-base
|
- xfonts-base
|
||||||
|
|
|
@ -10,6 +10,6 @@ console log
|
||||||
env PYTHONPATH={{venv_root}}/websockify-ve34/lib/python3.4/site-packages
|
env PYTHONPATH={{venv_root}}/websockify-ve34/lib/python3.4/site-packages
|
||||||
env PATH={{venv_root}}/websockify-ve34/bin:/usr/bin:/bin
|
env PATH={{venv_root}}/websockify-ve34/bin:/usr/bin:/bin
|
||||||
|
|
||||||
# port 8901 is hard-coded in brozzler/webconsole/static/partials/workers.html
|
# port 8901 is hard-coded in brozzler/dashboard/static/partials/workers.html
|
||||||
exec nice websockify 0.0.0.0:8901 localhost:5901
|
exec nice websockify 0.0.0.0:8901 localhost:5901
|
||||||
|
|
||||||
|
|
|
@ -1,24 +1,28 @@
|
||||||
---
|
---
|
||||||
## # get latest pip (had problems with version from apt-get, specifically
|
# get latest pip (had problems with version from apt-get, specifically
|
||||||
## # "pip install pyopenssl" did not install the dependency "cryptography")
|
# "pip install pyopenssl" did not install the dependency "cryptography")
|
||||||
## # http://stackoverflow.com/questions/34587473/what-is-get-pip-py-checksum-where-can-i-get-it-for-sure
|
# http://stackoverflow.com/questions/34587473/what-is-get-pip-py-checksum-where-can-i-get-it-for-sure
|
||||||
## - name: install setuptools for python 2 and 3
|
- name: install setuptools for python 2 and 3
|
||||||
## become: true
|
become: true
|
||||||
## apt: name={{item}} state=present
|
apt: name={{item}} state=present
|
||||||
## with_items:
|
with_items:
|
||||||
## - python-setuptools
|
- python-setuptools
|
||||||
## - python3-setuptools
|
- python3-setuptools
|
||||||
## - name: download pip-8.1.2.tar.gz
|
- name: download pip-8.1.2.tar.gz
|
||||||
## get_url:
|
get_url:
|
||||||
## url: https://pypi.python.org/packages/e7/a8/7556133689add8d1a54c0b14aeff0acb03c64707ce100ecd53934da1aa13/pip-8.1.2.tar.gz
|
url: https://pypi.python.org/packages/e7/a8/7556133689add8d1a54c0b14aeff0acb03c64707ce100ecd53934da1aa13/pip-8.1.2.tar.gz
|
||||||
## dest: /tmp
|
dest: /tmp
|
||||||
## checksum: sha1:1c13c247967ec5bee6de5fd104c5d78ba30951c7
|
checksum: sha1:1c13c247967ec5bee6de5fd104c5d78ba30951c7
|
||||||
## - name: extract pip-8.1.2.tar.gz
|
- name: extract pip-8.1.2.tar.gz
|
||||||
## unarchive: src=/tmp/pip-8.1.2.tar.gz dest=/tmp copy=no
|
unarchive: src=/tmp/pip-8.1.2.tar.gz dest=/tmp copy=no
|
||||||
## - name: run "python3 setup.py install" in /tmp/pip-8.1.2
|
- name: run "python3 setup.py install" in /tmp/pip-8.1.2
|
||||||
## command: python3 setup.py install chdir=/tmp/pip-8.1.2
|
command: python3 setup.py install chdir=/tmp/pip-8.1.2
|
||||||
## creates=/usr/local/lib/python2.7/dist-packages/pip-8.1.2-py2.7.egg/pip/__init__.py
|
creates=/usr/local/lib/python2.7/dist-packages/pip-8.1.2-py2.7.egg/pip/__init__.py
|
||||||
## become: true
|
become: true
|
||||||
|
- name: run "pip install virtualenv"
|
||||||
|
command: pip install virtualenv
|
||||||
|
creates=/usr/local/lib/python3.4/dist-packages/virtualenv.py
|
||||||
|
become: true
|
||||||
- command: id {{user}}
|
- command: id {{user}}
|
||||||
register: id_user
|
register: id_user
|
||||||
ignore_errors: true
|
ignore_errors: true
|
||||||
|
|
|
@ -4,7 +4,6 @@
|
||||||
apt: name={{item}} state=present
|
apt: name={{item}} state=present
|
||||||
with_items:
|
with_items:
|
||||||
- gcc
|
- gcc
|
||||||
- python-virtualenv
|
|
||||||
- python3.4
|
- python3.4
|
||||||
- libpython3.4-dev
|
- libpython3.4-dev
|
||||||
- libffi-dev
|
- libffi-dev
|
||||||
|
|
12
brozzler/cli.py
Normal file → Executable file
12
brozzler/cli.py
Normal file → Executable file
|
@ -120,6 +120,12 @@ def brozzle_page():
|
||||||
'-e', '--chrome-exe', dest='chrome_exe',
|
'-e', '--chrome-exe', dest='chrome_exe',
|
||||||
default=suggest_default_chrome_exe(),
|
default=suggest_default_chrome_exe(),
|
||||||
help='executable to use to invoke chrome')
|
help='executable to use to invoke chrome')
|
||||||
|
arg_parser.add_argument(
|
||||||
|
'--behavior-parameters', dest='behavior_parameters',
|
||||||
|
default=None, help=(
|
||||||
|
'json blob of parameters to populate the javascript behavior '
|
||||||
|
'template, e.g. {"parameter_username":"x",'
|
||||||
|
'"parameter_password":"y"}'))
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
'--proxy', dest='proxy', default=None,
|
'--proxy', dest='proxy', default=None,
|
||||||
help='http proxy')
|
help='http proxy')
|
||||||
|
@ -133,9 +139,13 @@ def brozzle_page():
|
||||||
args = arg_parser.parse_args(args=sys.argv[1:])
|
args = arg_parser.parse_args(args=sys.argv[1:])
|
||||||
_configure_logging(args)
|
_configure_logging(args)
|
||||||
|
|
||||||
|
behavior_parameters = {}
|
||||||
|
if args.behavior_parameters:
|
||||||
|
behavior_parameters = json.loads(args.behavior_parameters)
|
||||||
site = brozzler.Site(
|
site = brozzler.Site(
|
||||||
id=-1, seed=args.url, proxy=args.proxy,
|
id=-1, seed=args.url, proxy=args.proxy,
|
||||||
enable_warcprox_features=args.enable_warcprox_features)
|
enable_warcprox_features=args.enable_warcprox_features,
|
||||||
|
behavior_parameters=behavior_parameters)
|
||||||
page = brozzler.Page(url=args.url, site_id=site.id)
|
page = brozzler.Page(url=args.url, site_id=site.id)
|
||||||
worker = brozzler.BrozzlerWorker(frontier=None)
|
worker = brozzler.BrozzlerWorker(frontier=None)
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
'''
|
'''
|
||||||
brozzler/webconsole/__init__.py - flask app for brozzler web console, defines
|
brozzler/dashboard/__init__.py - flask app for brozzler dashboard, defines api
|
||||||
api endspoints etc
|
endspoints etc
|
||||||
|
|
||||||
Copyright (C) 2014-2016 Internet Archive
|
Copyright (C) 2014-2016 Internet Archive
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ try:
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
logging.critical(
|
logging.critical(
|
||||||
'%s: %s\n\nYou might need to run "pip install '
|
'%s: %s\n\nYou might need to run "pip install '
|
||||||
'brozzler[webconsole]".\nSee README.rst for more information.',
|
'brozzler[dashboard]".\nSee README.rst for more information.',
|
||||||
type(e).__name__, e)
|
type(e).__name__, e)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
import rethinkstuff
|
import rethinkstuff
|
||||||
|
@ -210,11 +210,11 @@ try:
|
||||||
import gunicorn.app.base
|
import gunicorn.app.base
|
||||||
from gunicorn.six import iteritems
|
from gunicorn.six import iteritems
|
||||||
|
|
||||||
class GunicornBrozzlerWebConsole(gunicorn.app.base.BaseApplication):
|
class GunicornBrozzlerDashboard(gunicorn.app.base.BaseApplication):
|
||||||
def __init__(self, app, options=None):
|
def __init__(self, app, options=None):
|
||||||
self.options = options or {}
|
self.options = options or {}
|
||||||
self.application = app
|
self.application = app
|
||||||
super(GunicornBrozzlerWebConsole, self).__init__()
|
super(GunicornBrozzlerDashboard, self).__init__()
|
||||||
|
|
||||||
def load_config(self):
|
def load_config(self):
|
||||||
config = dict(
|
config = dict(
|
||||||
|
@ -227,12 +227,12 @@ try:
|
||||||
return self.application
|
return self.application
|
||||||
|
|
||||||
def run(**options):
|
def run(**options):
|
||||||
logging.info('running brozzler-webconsole using gunicorn')
|
logging.info('running brozzler-dashboard using gunicorn')
|
||||||
GunicornBrozzlerWebConsole(app, options).run()
|
GunicornBrozzlerDashboard(app, options).run()
|
||||||
|
|
||||||
except ImportError:
|
except ImportError:
|
||||||
def run():
|
def run():
|
||||||
logging.info('running brozzler-webconsole using simple flask app.run')
|
logging.info('running brozzler-dashboard using simple flask app.run')
|
||||||
app.run()
|
app.run()
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
@ -241,10 +241,10 @@ def main():
|
||||||
prog=os.path.basename(sys.argv[0]),
|
prog=os.path.basename(sys.argv[0]),
|
||||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
description=(
|
description=(
|
||||||
'brozzler-webconsole - web application for viewing brozzler '
|
'brozzler-dashboard - web application for viewing brozzler '
|
||||||
'crawl status'),
|
'crawl status'),
|
||||||
epilog=(
|
epilog=(
|
||||||
'brozzler-webconsole has no command line options, but can be '
|
'brozzler-dashboard has no command line options, but can be '
|
||||||
'configured using the following environment variables:\n\n'
|
'configured using the following environment variables:\n\n'
|
||||||
' RETHINKDB_SERVERS rethinkdb servers, e.g. db0.foo.org,'
|
' RETHINKDB_SERVERS rethinkdb servers, e.g. db0.foo.org,'
|
||||||
'db0.foo.org:38015,db1.foo.org (default: localhost)\n'
|
'db0.foo.org:38015,db1.foo.org (default: localhost)\n'
|
Before Width: | Height: | Size: 9.1 KiB After Width: | Height: | Size: 9.1 KiB |
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* brozzler-webconsole/static/js/app.js - brozzler console angularjs code
|
* brozzler/dashboard/static/js/app.js - brozzler dashboard angularjs code
|
||||||
*
|
*
|
||||||
* Copyright (C) 2014-2016 Internet Archive
|
* Copyright (C) 2014-2016 Internet Archive
|
||||||
*
|
*
|
||||||
|
@ -18,12 +18,12 @@
|
||||||
|
|
||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
var brozzlerConsoleApp = angular.module("brozzlerConsoleApp", [
|
var brozzlerDashboardApp = angular.module("brozzlerDashboardApp", [
|
||||||
"ngRoute",
|
"ngRoute",
|
||||||
"brozzlerControllers",
|
"brozzlerControllers",
|
||||||
]);
|
]);
|
||||||
|
|
||||||
brozzlerConsoleApp.config(["$routeProvider", "$locationProvider",
|
brozzlerDashboardApp.config(["$routeProvider", "$locationProvider",
|
||||||
function($routeProvider, $locationProvider) {
|
function($routeProvider, $locationProvider) {
|
||||||
$routeProvider.
|
$routeProvider.
|
||||||
when("/workers", {
|
when("/workers", {
|
||||||
|
@ -53,7 +53,7 @@ brozzlerConsoleApp.config(["$routeProvider", "$locationProvider",
|
||||||
}]);
|
}]);
|
||||||
|
|
||||||
// copied from https://bitbucket.org/webarchive/ait5/src/master/archiveit/static/app/js/filters/ByteFormat.js
|
// copied from https://bitbucket.org/webarchive/ait5/src/master/archiveit/static/app/js/filters/ByteFormat.js
|
||||||
brozzlerConsoleApp.filter("byteformat", function() {
|
brozzlerDashboardApp.filter("byteformat", function() {
|
||||||
return function(bytes, precision) {
|
return function(bytes, precision) {
|
||||||
var bytes_f = parseFloat(bytes);
|
var bytes_f = parseFloat(bytes);
|
||||||
if (bytes_f == 0 || isNaN(bytes_f) || !isFinite(bytes_f)) return "0";
|
if (bytes_f == 0 || isNaN(bytes_f) || !isFinite(bytes_f)) return "0";
|
1
brozzler/dashboard/static/noVNC
Submodule
1
brozzler/dashboard/static/noVNC
Submodule
|
@ -0,0 +1 @@
|
||||||
|
Subproject commit ef887cdb123df21b61043ff025e6208631e9eb7b
|
|
@ -1,12 +1,12 @@
|
||||||
<!doctype html>
|
<!doctype html>
|
||||||
<html lang="en" ng-app="brozzlerConsoleApp">
|
<html lang="en" ng-app="brozzlerDashboardApp">
|
||||||
<head>
|
<head>
|
||||||
<meta charset="utf-8">
|
<meta charset="utf-8">
|
||||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
|
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
|
||||||
<meta name="apple-mobile-web-app-capable" content="yes" />
|
<meta name="apple-mobile-web-app-capable" content="yes" />
|
||||||
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />
|
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />
|
||||||
<title>Brozzler Console</title>
|
<title>Brozzler Dashboard</title>
|
||||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.3.5/css/bootstrap.css">
|
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.3.5/css/bootstrap.css">
|
||||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.3.5/css/bootstrap-theme.css">
|
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.3.5/css/bootstrap-theme.css">
|
||||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.4.0/css/font-awesome.css">
|
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.4.0/css/font-awesome.css">
|
|
@ -1,6 +1,6 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
'''
|
'''
|
||||||
brozzler-easy - brozzler-worker, warcprox, pywb, and brozzler-webconsole all
|
brozzler-easy - brozzler-worker, warcprox, pywb, and brozzler-dashboard all
|
||||||
working together in a single process
|
working together in a single process
|
||||||
|
|
||||||
Copyright (C) 2016 Internet Archive
|
Copyright (C) 2016 Internet Archive
|
||||||
|
@ -27,7 +27,7 @@ try:
|
||||||
import brozzler.pywb
|
import brozzler.pywb
|
||||||
import wsgiref.simple_server
|
import wsgiref.simple_server
|
||||||
import wsgiref.handlers
|
import wsgiref.handlers
|
||||||
import brozzler.webconsole
|
import brozzler.dashboard
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
logging.critical(
|
logging.critical(
|
||||||
'%s: %s\n\nYou might need to run "pip install '
|
'%s: %s\n\nYou might need to run "pip install '
|
||||||
|
@ -51,7 +51,7 @@ def _build_arg_parser(prog=os.path.basename(sys.argv[0])):
|
||||||
prog=prog, formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
prog=prog, formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||||
description=(
|
description=(
|
||||||
'brozzler-easy - easy deployment of brozzler, with '
|
'brozzler-easy - easy deployment of brozzler, with '
|
||||||
'brozzler-worker, warcprox, pywb, and brozzler-webconsole all '
|
'brozzler-worker, warcprox, pywb, and brozzler-dashboard all '
|
||||||
'running in a single process'))
|
'running in a single process'))
|
||||||
|
|
||||||
# common args
|
# common args
|
||||||
|
@ -104,14 +104,14 @@ def _build_arg_parser(prog=os.path.basename(sys.argv[0])):
|
||||||
'--pywb-port', dest='pywb_port', type=int,
|
'--pywb-port', dest='pywb_port', type=int,
|
||||||
default=8880, help='pywb wayback port')
|
default=8880, help='pywb wayback port')
|
||||||
|
|
||||||
# webconsole args
|
# dashboard args
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
'--webconsole-address', dest='webconsole_address',
|
'--dashboard-address', dest='dashboard_address',
|
||||||
default='localhost',
|
default='localhost',
|
||||||
help='brozzler web console address to listen on')
|
help='brozzler dashboard address to listen on')
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
'--webconsole-port', dest='webconsole_port',
|
'--dashboard-port', dest='dashboard_port',
|
||||||
type=int, default=8881, help='brozzler web console port')
|
type=int, default=8881, help='brozzler dashboard port')
|
||||||
|
|
||||||
# common at the bottom args
|
# common at the bottom args
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
|
@ -143,12 +143,12 @@ class BrozzlerEasyController:
|
||||||
self._warcprox_args(args))
|
self._warcprox_args(args))
|
||||||
self.brozzler_worker = self._init_brozzler_worker(args)
|
self.brozzler_worker = self._init_brozzler_worker(args)
|
||||||
self.pywb_httpd = self._init_pywb(args)
|
self.pywb_httpd = self._init_pywb(args)
|
||||||
self.webconsole_httpd = self._init_brozzler_webconsole(args)
|
self.dashboard_httpd = self._init_brozzler_dashboard(args)
|
||||||
|
|
||||||
def _init_brozzler_webconsole(self, args):
|
def _init_brozzler_dashboard(self, args):
|
||||||
return wsgiref.simple_server.make_server(
|
return wsgiref.simple_server.make_server(
|
||||||
args.webconsole_address, args.webconsole_port,
|
args.dashboard_address, args.dashboard_port,
|
||||||
brozzler.webconsole.app, ThreadingWSGIServer)
|
brozzler.dashboard.app, ThreadingWSGIServer)
|
||||||
|
|
||||||
def _init_brozzler_worker(self, args):
|
def _init_brozzler_worker(self, args):
|
||||||
r = rethinkstuff.Rethinker(
|
r = rethinkstuff.Rethinker(
|
||||||
|
@ -212,13 +212,13 @@ class BrozzlerEasyController:
|
||||||
threading.Thread(target=self.pywb_httpd.serve_forever).start()
|
threading.Thread(target=self.pywb_httpd.serve_forever).start()
|
||||||
|
|
||||||
self.logger.info(
|
self.logger.info(
|
||||||
'starting brozzler-webconsole at %s:%s',
|
'starting brozzler-dashboard at %s:%s',
|
||||||
*self.webconsole_httpd.server_address)
|
*self.dashboard_httpd.server_address)
|
||||||
threading.Thread(target=self.webconsole_httpd.serve_forever).start()
|
threading.Thread(target=self.dashboard_httpd.serve_forever).start()
|
||||||
|
|
||||||
def shutdown(self):
|
def shutdown(self):
|
||||||
self.logger.info('shutting down brozzler-webconsole')
|
self.logger.info('shutting down brozzler-dashboard')
|
||||||
self.webconsole_httpd.shutdown()
|
self.dashboard_httpd.shutdown()
|
||||||
|
|
||||||
self.logger.info('shutting down brozzler-worker')
|
self.logger.info('shutting down brozzler-worker')
|
||||||
self.brozzler_worker.shutdown_now()
|
self.brozzler_worker.shutdown_now()
|
||||||
|
|
|
@ -75,6 +75,8 @@ def new_job(frontier, job_conf):
|
||||||
sites = []
|
sites = []
|
||||||
for seed_conf in job_conf["seeds"]:
|
for seed_conf in job_conf["seeds"]:
|
||||||
merged_conf = merge(seed_conf, job_conf)
|
merged_conf = merge(seed_conf, job_conf)
|
||||||
|
if "login" in merged_conf and "metadata" in merged_conf:
|
||||||
|
merged_conf["metadata"]["login"] = merged_conf["login"]
|
||||||
site = brozzler.Site(
|
site = brozzler.Site(
|
||||||
job_id=job.id, seed=merged_conf["url"],
|
job_id=job.id, seed=merged_conf["url"],
|
||||||
scope=merged_conf.get("scope"),
|
scope=merged_conf.get("scope"),
|
||||||
|
|
|
@ -69,6 +69,9 @@ id:
|
||||||
user_agent:
|
user_agent:
|
||||||
type: string
|
type: string
|
||||||
|
|
||||||
|
behavior_parameters:
|
||||||
|
type: dict
|
||||||
|
|
||||||
seeds:
|
seeds:
|
||||||
type: list
|
type: list
|
||||||
required: true
|
required: true
|
||||||
|
|
|
@ -96,7 +96,7 @@ class Site(brozzler.BaseDictable):
|
||||||
status="ACTIVE", claimed=False, start_time=None,
|
status="ACTIVE", claimed=False, start_time=None,
|
||||||
last_disclaimed=_EPOCH_UTC, last_claimed_by=None,
|
last_disclaimed=_EPOCH_UTC, last_claimed_by=None,
|
||||||
last_claimed=_EPOCH_UTC, metadata={}, remember_outlinks=None,
|
last_claimed=_EPOCH_UTC, metadata={}, remember_outlinks=None,
|
||||||
cookie_db=None, user_agent=None):
|
cookie_db=None, user_agent=None, behavior_parameters=None):
|
||||||
|
|
||||||
self.seed = seed
|
self.seed = seed
|
||||||
self.id = id
|
self.id = id
|
||||||
|
@ -117,6 +117,7 @@ class Site(brozzler.BaseDictable):
|
||||||
self.remember_outlinks = remember_outlinks
|
self.remember_outlinks = remember_outlinks
|
||||||
self.cookie_db = cookie_db
|
self.cookie_db = cookie_db
|
||||||
self.user_agent = user_agent
|
self.user_agent = user_agent
|
||||||
|
self.behavior_parameters = behavior_parameters
|
||||||
|
|
||||||
self.scope = scope or {}
|
self.scope = scope or {}
|
||||||
if not "surt" in self.scope:
|
if not "surt" in self.scope:
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
Subproject commit 6a90803feb124791960e3962e328aa3cfb729aeb
|
|
|
@ -273,6 +273,7 @@ class BrozzlerWorker:
|
||||||
browser.start(proxy=self._proxy(site), cookie_db=site.cookie_db)
|
browser.start(proxy=self._proxy(site), cookie_db=site.cookie_db)
|
||||||
outlinks = browser.browse_page(
|
outlinks = browser.browse_page(
|
||||||
page.url, extra_headers=site.extra_headers(),
|
page.url, extra_headers=site.extra_headers(),
|
||||||
|
behavior_parameters=site.behavior_parameters,
|
||||||
user_agent=site.user_agent,
|
user_agent=site.user_agent,
|
||||||
on_screenshot=_on_screenshot,
|
on_screenshot=_on_screenshot,
|
||||||
on_url_change=page.note_redirect)
|
on_url_change=page.note_redirect)
|
||||||
|
@ -388,7 +389,9 @@ class BrozzlerWorker:
|
||||||
try:
|
try:
|
||||||
site = self._frontier.claim_site("{}:{}".format(
|
site = self._frontier.claim_site("{}:{}".format(
|
||||||
socket.gethostname(), browser.chrome_port))
|
socket.gethostname(), browser.chrome_port))
|
||||||
self.logger.info("brozzling site %s", site)
|
self.logger.info(
|
||||||
|
"brozzling site (proxy=%s) %s",
|
||||||
|
repr(self._proxy(site)), site)
|
||||||
th = threading.Thread(
|
th = threading.Thread(
|
||||||
target=lambda: self._brozzle_site(
|
target=lambda: self._brozzle_site(
|
||||||
browser, site),
|
browser, site),
|
||||||
|
|
10
setup.py
10
setup.py
|
@ -32,17 +32,17 @@ def find_package_data(package):
|
||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.1b7.dev109',
|
version='1.1b7.dev113',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
author_email='nlevitt@archive.org',
|
author_email='nlevitt@archive.org',
|
||||||
long_description=open('README.rst', mode='rb').read().decode('UTF-8'),
|
long_description=open('README.rst', mode='rb').read().decode('UTF-8'),
|
||||||
license='Apache License 2.0',
|
license='Apache License 2.0',
|
||||||
packages=['brozzler', 'brozzler.webconsole'],
|
packages=['brozzler', 'brozzler.dashboard'],
|
||||||
package_data={
|
package_data={
|
||||||
'brozzler': ['behaviors.d/*.js*', 'behaviors.yaml', 'job_schema.yaml'],
|
'brozzler': ['behaviors.d/*.js*', 'behaviors.yaml', 'job_schema.yaml'],
|
||||||
'brozzler.webconsole': find_package_data('brozzler.webconsole'),
|
'brozzler.dashboard': find_package_data('brozzler.dashboard'),
|
||||||
},
|
},
|
||||||
entry_points={
|
entry_points={
|
||||||
'console_scripts': [
|
'console_scripts': [
|
||||||
|
@ -51,7 +51,7 @@ setuptools.setup(
|
||||||
'brozzler-new-site=brozzler.cli:brozzler_new_site',
|
'brozzler-new-site=brozzler.cli:brozzler_new_site',
|
||||||
'brozzler-worker=brozzler.cli:brozzler_worker',
|
'brozzler-worker=brozzler.cli:brozzler_worker',
|
||||||
'brozzler-ensure-tables=brozzler.cli:brozzler_ensure_tables',
|
'brozzler-ensure-tables=brozzler.cli:brozzler_ensure_tables',
|
||||||
'brozzler-webconsole=brozzler.webconsole:main',
|
'brozzler-dashboard=brozzler.dashboard:main',
|
||||||
'brozzler-easy=brozzler.easy:main',
|
'brozzler-easy=brozzler.easy:main',
|
||||||
'brozzler-wayback=brozzler.pywb:main',
|
'brozzler-wayback=brozzler.pywb:main',
|
||||||
],
|
],
|
||||||
|
@ -70,7 +70,7 @@ setuptools.setup(
|
||||||
'cerberus==1.0.1',
|
'cerberus==1.0.1',
|
||||||
],
|
],
|
||||||
extras_require={
|
extras_require={
|
||||||
'webconsole': ['flask>=0.11', 'gunicorn'],
|
'dashboard': ['flask>=0.11', 'gunicorn'],
|
||||||
'easy': ['warcprox>=2.0b1', 'pywb', 'flask>=0.11', 'gunicorn'],
|
'easy': ['warcprox>=2.0b1', 'pywb', 'flask>=0.11', 'gunicorn'],
|
||||||
},
|
},
|
||||||
zip_safe=False,
|
zip_safe=False,
|
||||||
|
|
|
@ -86,7 +86,7 @@ def test_services_up():
|
||||||
# if the connect fails an exception is raised and the test fails
|
# if the connect fails an exception is raised and the test fails
|
||||||
s.connect(('localhost', 8880))
|
s.connect(('localhost', 8880))
|
||||||
|
|
||||||
# check that brozzler webconsole is listening
|
# check that brozzler dashboard is listening
|
||||||
with socket.socket() as s:
|
with socket.socket() as s:
|
||||||
# if the connect fails an exception is raised and the test fails
|
# if the connect fails an exception is raised and the test fails
|
||||||
s.connect(('localhost', 8881))
|
s.connect(('localhost', 8881))
|
||||||
|
|
|
@ -6,7 +6,7 @@ echo service status:
|
||||||
vagrant ssh -- 'status warcprox ;
|
vagrant ssh -- 'status warcprox ;
|
||||||
status Xvnc ;
|
status Xvnc ;
|
||||||
status brozzler-worker ;
|
status brozzler-worker ;
|
||||||
status brozzler-webconsole ;
|
status brozzler-dashboard ;
|
||||||
status vnc-websock'
|
status vnc-websock'
|
||||||
echo
|
echo
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue