mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 00:29:53 -05:00
Document the job config format
This commit is contained in:
parent
182cbfd0ce
commit
02af30edd4
41
README.rst
41
README.rst
@ -27,6 +27,47 @@ Installation
|
||||
|
||||
Brozzler also requires a rethinkdb deployment.
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
Launch one or more workers:
|
||||
|
||||
::
|
||||
|
||||
brozzler-worker -e chromium
|
||||
|
||||
Submit jobs:
|
||||
|
||||
::
|
||||
|
||||
brozzler-new-job myjob.yaml
|
||||
|
||||
Job Configuration
|
||||
-----------------
|
||||
|
||||
Jobs are defined using yaml files. Options may be specified either at the
|
||||
top-level or on individual seeds. A job id id and at least one seed url
|
||||
must be specified, everything else is optional.
|
||||
|
||||
::
|
||||
|
||||
id: myjob
|
||||
time_limit: 60 # seconds
|
||||
proxy: http://127.0.0.1:8000 # point at warcprox for archiving
|
||||
ignore_robots: false
|
||||
enable_warcprox_features: false
|
||||
warcprox_meta: null
|
||||
metadata: {}
|
||||
seeds:
|
||||
- url: http://one.example.org/
|
||||
- url: http://two.example.org/
|
||||
time_limit: 30
|
||||
- url: http://three.example.org/
|
||||
time_limit: 10
|
||||
ignore_robots: true
|
||||
scope:
|
||||
surt: http://(org,example,
|
||||
|
||||
Fonts (for decent screenshots)
|
||||
------------------------------
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user