mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-06 05:24:19 -04:00
add the new urlcanon.MatchRule conditions to job_schema.yaml
This commit is contained in:
parent
12fb9eaa15
commit
0021a9d5f0
2 changed files with 16 additions and 1 deletions
|
@ -38,6 +38,21 @@ id:
|
||||||
domain:
|
domain:
|
||||||
type: string
|
type: string
|
||||||
|
|
||||||
|
substring:
|
||||||
|
type: string
|
||||||
|
|
||||||
|
regex:
|
||||||
|
type: string # code up a regex type?
|
||||||
|
|
||||||
|
ssurt:
|
||||||
|
type: string
|
||||||
|
|
||||||
|
surt:
|
||||||
|
type: string
|
||||||
|
|
||||||
|
parent_url_regex:
|
||||||
|
type: string
|
||||||
|
|
||||||
url_match:
|
url_match:
|
||||||
type: string
|
type: string
|
||||||
allowed:
|
allowed:
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -32,7 +32,7 @@ def find_package_data(package):
|
||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.1b9.dev203',
|
version='1.1b9.dev204',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue