brozzler/brozzler/job_schema.yaml
2018-05-14 15:38:28 -07:00

103 lines
1.5 KiB
YAML

id:
type:
- string
- integer
required: false
<<: &multi_level_options
time_limit:
type: number
min: 0
ignore_robots:
type: boolean
warcprox_meta:
type: dict
nullable: true
scope:
type: dict
schema:
surt:
type: string
accepts:
type: list
schema: &scope_rule
type: dict
schema:
domain:
type: string
substring:
type: string
regex:
type: string # code up a cerberus regex type?
ssurt:
type: string
surt:
type: string
parent_url_regex:
type: string
url_match:
type: string
allowed:
- STRING_MATCH
- SURT_MATCH
- REGEX_MATCH
value:
type: string
dependencies:
- url_match
blocks:
type: list
schema: *scope_rule
max_hops:
type: integer
max_hops_off:
type: integer
metadata:
type: dict
user_agent:
type: string
behavior_parameters:
type: dict
seeds:
type: list
required: true
minlength: 1
schema:
type: dict
schema:
url:
type: url
required: true
username:
type: string
password:
type: string
<<: *multi_level_options
max_claimed_sites:
type: integer