mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-06-19 12:24:20 -04:00
Merge branch 'master' into ydl-stitched
* master: vagrant readme fixes (thanks funkyfuture) update cryptography dep version
This commit is contained in:
commit
8cdc3dee21
2 changed files with 26 additions and 24 deletions
29
setup.py
29
setup.py
|
@ -32,7 +32,7 @@ def find_package_data(package):
|
||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.4.dev297',
|
version='1.4.dev299',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
@ -63,27 +63,30 @@ setuptools.setup(
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
install_requires=[
|
install_requires=[
|
||||||
'PyYAML',
|
'PyYAML>=3.12',
|
||||||
'youtube-dl',
|
'youtube-dl>=2018.7.21',
|
||||||
'reppy==0.3.4',
|
'reppy==0.3.4',
|
||||||
'requests',
|
'requests>=2.18.4',
|
||||||
'websocket-client!=0.39.0,!=0.49.0',
|
'websocket-client>=0.39.0,!=0.49.0',
|
||||||
'pillow>=5.2.0',
|
'pillow>=5.2.0',
|
||||||
'urlcanon>=0.1.dev23',
|
'urlcanon>=0.1.dev23',
|
||||||
'doublethink>=0.2.0.dev88',
|
'doublethink>=0.2.0.dev88',
|
||||||
'rethinkdb>=2.3,<2.4',
|
'rethinkdb>=2.3',
|
||||||
'cerberus==1.0.1',
|
'cerberus>=1.0.1',
|
||||||
'jinja2',
|
'jinja2>=2.10',
|
||||||
'cryptography!=2.1.1', # 2.1.1 installation is failing on ubuntu
|
'cryptography>=2.3',
|
||||||
'python-magic',
|
'python-magic>=0.4.15',
|
||||||
],
|
],
|
||||||
extras_require={
|
extras_require={
|
||||||
'dashboard': ['flask>=0.11', 'gunicorn'],
|
'dashboard': [
|
||||||
|
'flask>=0.11',
|
||||||
|
'gunicorn>=19.8.1'
|
||||||
|
],
|
||||||
'easy': [
|
'easy': [
|
||||||
'warcprox>=2.4b2.dev173',
|
'warcprox>=2.4b2.dev173',
|
||||||
'pywb<2',
|
'pywb>=0.33.2,<2',
|
||||||
'flask>=0.11',
|
'flask>=0.11',
|
||||||
'gunicorn'
|
'gunicorn>=19.8.1'
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
zip_safe=False,
|
zip_safe=False,
|
||||||
|
|
|
@ -1,15 +1,14 @@
|
||||||
Single-VM Vagrant Brozzler Deployment
|
Single-VM Vagrant Brozzler Deployment
|
||||||
-------------------------------------
|
-------------------------------------
|
||||||
|
|
||||||
This is a work in progress. Vagrant + ansible configuration for a single-vm
|
This is a vagrant + ansible configuration for a single-vm deployment of
|
||||||
deployment of brozzler and warcprox with dependencies (notably rethinkdb).
|
brozzler and warcprox with dependencies (notably rethinkdb).
|
||||||
|
|
||||||
The idea is for this to be a quick way for people to get up and running with a
|
The idea is for this to be a quick way for people to get up and running with a
|
||||||
deployment resembling a real distributed deployment, and to offer a starting
|
deployment resembling a real distributed deployment, and to offer a starting
|
||||||
configuration for people to adapt to their clusters.
|
configuration for people to adapt to their clusters.
|
||||||
|
|
||||||
And equally important, as a harness for integration tests. (As of now brozzler
|
And equally important, as a harness for integration tests.
|
||||||
itself has no automated tests!)
|
|
||||||
|
|
||||||
You'll need vagrant installed.
|
You'll need vagrant installed.
|
||||||
https://www.vagrantup.com/docs/installation/
|
https://www.vagrantup.com/docs/installation/
|
||||||
|
@ -25,27 +24,27 @@ the brozzler virtualenv.
|
||||||
::
|
::
|
||||||
|
|
||||||
my-laptop$ vagrant ssh
|
my-laptop$ vagrant ssh
|
||||||
vagrant@brozzler-easy:~$ source ~/brozzler-ve34/bin/activate
|
vagrant@brzl:~$ source /opt/brozzler-ve34/bin/activate
|
||||||
(brozzler-ve34)vagrant@brozzler-easy:~$
|
(brozzler-ve34)vagrant@brzl:~$
|
||||||
|
|
||||||
Then you can run brozzler-new-site:
|
Then you can run brozzler-new-site:
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
(brozzler-ve34)vagrant@brozzler-easy:~$ brozzler-new-site \
|
(brozzler-ve34)vagrant@brzl:~$ brozzler-new-site --proxy=localhost:8000 http://example.com/
|
||||||
--proxy=localhost:8000 http://example.com/
|
|
||||||
|
|
||||||
|
|
||||||
Or brozzler-new-job (make sure to set the proxy to localhost:8000):
|
Or brozzler-new-job (make sure to set the proxy to localhost:8000):
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
(brozzler-ve34)vagrant@brozzler-easy:~$ cat >job1.yml
|
(brozzler-ve34)vagrant@brzl:~$ cat >job1.yml <<EOF
|
||||||
id: job1
|
id: job1
|
||||||
proxy: localhost:8000 # point at warcprox for archiving
|
proxy: localhost:8000 # point at warcprox for archiving
|
||||||
seeds:
|
seeds:
|
||||||
- url: https://example.org/
|
- url: https://example.org/
|
||||||
(brozzler-ve34)vagrant@brozzler-easy:~$ brozzler-new-job job1.yml
|
EOF
|
||||||
|
(brozzler-ve34)vagrant@brzl:~$ brozzler-new-job job1.yml
|
||||||
|
|
||||||
WARC files will appear in ./warcs and brozzler, warcprox and rethinkdb logs in
|
WARC files will appear in ./warcs and brozzler, warcprox and rethinkdb logs in
|
||||||
./logs (via vagrant folders syncing).
|
./logs (via vagrant folders syncing).
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue