mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-11-30 12:16:55 -05:00
don't check robots.txt when scheduling a new site to be crawled, but mark the seed Page as needs_robots_check, and delegate the robots check to brozzler-worker; new test of robots.txt adherence
This commit is contained in:
parent
24cc8377fb
commit
72816d1058
7 changed files with 121 additions and 36 deletions
2
setup.py
2
setup.py
|
|
@ -32,7 +32,7 @@ def find_package_data(package):
|
|||
|
||||
setuptools.setup(
|
||||
name='brozzler',
|
||||
version='1.1b8.dev126',
|
||||
version='1.1b8.dev127',
|
||||
description='Distributed web crawling with browsers',
|
||||
url='https://github.com/internetarchive/brozzler',
|
||||
author='Noah Levitt',
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue