mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-23 16:19:49 -05:00
103 lines
1.5 KiB
YAML
103 lines
1.5 KiB
YAML
id:
|
|
type:
|
|
- string
|
|
- integer
|
|
required: false
|
|
|
|
<<: &multi_level_options
|
|
time_limit:
|
|
type: number
|
|
min: 0
|
|
|
|
ignore_robots:
|
|
type: boolean
|
|
|
|
warcprox_meta:
|
|
type: dict
|
|
nullable: true
|
|
|
|
scope:
|
|
type: dict
|
|
schema:
|
|
surt:
|
|
type: string
|
|
|
|
accepts:
|
|
type: list
|
|
schema: &scope_rule
|
|
type: dict
|
|
schema:
|
|
|
|
domain:
|
|
type: string
|
|
|
|
substring:
|
|
type: string
|
|
|
|
regex:
|
|
type: string # code up a cerberus regex type?
|
|
|
|
ssurt:
|
|
type: string
|
|
|
|
surt:
|
|
type: string
|
|
|
|
parent_url_regex:
|
|
type: string
|
|
|
|
url_match:
|
|
type: string
|
|
allowed:
|
|
- STRING_MATCH
|
|
- SURT_MATCH
|
|
- REGEX_MATCH
|
|
|
|
value:
|
|
type: string
|
|
dependencies:
|
|
- url_match
|
|
|
|
blocks:
|
|
type: list
|
|
schema: *scope_rule
|
|
|
|
max_hops:
|
|
type: integer
|
|
|
|
max_hops_off:
|
|
type: integer
|
|
|
|
metadata:
|
|
type: dict
|
|
|
|
user_agent:
|
|
type: string
|
|
|
|
behavior_parameters:
|
|
type: dict
|
|
|
|
seeds:
|
|
type: list
|
|
required: true
|
|
minlength: 1
|
|
schema:
|
|
type: dict
|
|
schema:
|
|
|
|
url:
|
|
type: url
|
|
required: true
|
|
|
|
username:
|
|
type: string
|
|
|
|
password:
|
|
type: string
|
|
|
|
<<: *multi_level_options
|
|
|
|
max_claimed_sites:
|
|
type: integer
|
|
|