mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-23 16:19:49 -05:00
explain --warcprox-auto briefly
This commit is contained in:
parent
d19e139101
commit
eaf7ef74be
10
README.rst
10
README.rst
@ -73,7 +73,7 @@ To install brozzler only::
|
|||||||
|
|
||||||
pip install brozzler # in a virtualenv if desired
|
pip install brozzler # in a virtualenv if desired
|
||||||
|
|
||||||
Launch one or more workers::
|
Launch one or more workers: [*]_ ::
|
||||||
|
|
||||||
brozzler-worker --warcprox-auto
|
brozzler-worker --warcprox-auto
|
||||||
|
|
||||||
@ -85,6 +85,14 @@ Submit sites not tied to a job::
|
|||||||
|
|
||||||
brozzler-new-site --time-limit=600 http://example.com/
|
brozzler-new-site --time-limit=600 http://example.com/
|
||||||
|
|
||||||
|
.. [*] A note about ``--warcprox-auto``: this option tells brozzler to
|
||||||
|
look for a healthy warcprox instance in the `rethinkdb service registry
|
||||||
|
<https://github.com/internetarchive/doublethink#service-registry>`_. For
|
||||||
|
this to work you need to have at least one instance of warcprox running,
|
||||||
|
with the ``--rethinkdb-services-url`` option pointing to the same rethinkdb
|
||||||
|
services table that brozzler is using. Using ``--warcprox-auto`` is
|
||||||
|
recommended for clustered deployments.
|
||||||
|
|
||||||
Job Configuration
|
Job Configuration
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
2
setup.py
2
setup.py
@ -32,7 +32,7 @@ def find_package_data(package):
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.4.dev299',
|
version='1.4.dev300',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user