From 02af30edd4670b7c823ae41cb9b158f680343cb0 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Tue, 17 May 2016 15:20:09 +1000 Subject: [PATCH] Document the job config format --- README.rst | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/README.rst b/README.rst index 759f751..d8414dd 100644 --- a/README.rst +++ b/README.rst @@ -27,6 +27,47 @@ Installation Brozzler also requires a rethinkdb deployment. +Usage +----- + +Launch one or more workers: + +:: + + brozzler-worker -e chromium + +Submit jobs: + +:: + + brozzler-new-job myjob.yaml + +Job Configuration +----------------- + +Jobs are defined using yaml files. Options may be specified either at the +top-level or on individual seeds. A job id id and at least one seed url +must be specified, everything else is optional. + +:: + + id: myjob + time_limit: 60 # seconds + proxy: http://127.0.0.1:8000 # point at warcprox for archiving + ignore_robots: false + enable_warcprox_features: false + warcprox_meta: null + metadata: {} + seeds: + - url: http://one.example.org/ + - url: http://two.example.org/ + time_limit: 30 + - url: http://three.example.org/ + time_limit: 10 + ignore_robots: true + scope: + surt: http://(org,example, + Fonts (for decent screenshots) ------------------------------