mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-05-03 07:05:12 -04:00
remove some vestiges of old proxy stuff
This commit is contained in:
parent
a826fdc7ef
commit
a836269e95
3 changed files with 3 additions and 9 deletions
|
@ -161,8 +161,7 @@ def brozzle_page():
|
||||||
if args.behavior_parameters:
|
if args.behavior_parameters:
|
||||||
behavior_parameters = json.loads(args.behavior_parameters)
|
behavior_parameters = json.loads(args.behavior_parameters)
|
||||||
site = brozzler.Site(None, {
|
site = brozzler.Site(None, {
|
||||||
'id': -1, 'seed': args.url, 'proxy': args.proxy,
|
'id': -1, 'seed': args.url, 'behavior_parameters': behavior_parameters,
|
||||||
'behavior_parameters': behavior_parameters,
|
|
||||||
'username': args.username, 'password': args.password})
|
'username': args.username, 'password': args.password})
|
||||||
page = brozzler.Page(None, {'url': args.url, 'site_id': site.id})
|
page = brozzler.Page(None, {'url': args.url, 'site_id': site.id})
|
||||||
worker = brozzler.BrozzlerWorker(frontier=None)
|
worker = brozzler.BrozzlerWorker(frontier=None)
|
||||||
|
@ -178,7 +177,7 @@ def brozzle_page():
|
||||||
logging.info('wrote screenshot to %s', filename)
|
logging.info('wrote screenshot to %s', filename)
|
||||||
|
|
||||||
browser = brozzler.Browser(chrome_exe=args.chrome_exe)
|
browser = brozzler.Browser(chrome_exe=args.chrome_exe)
|
||||||
browser.start(proxy=site.proxy)
|
browser.start(proxy=args.proxy)
|
||||||
try:
|
try:
|
||||||
outlinks = worker.brozzle_page(
|
outlinks = worker.brozzle_page(
|
||||||
browser, site, page, on_screenshot=on_screenshot)
|
browser, site, page, on_screenshot=on_screenshot)
|
||||||
|
@ -260,7 +259,6 @@ def brozzler_new_site():
|
||||||
rr = rethinker(args)
|
rr = rethinker(args)
|
||||||
site = brozzler.Site(rr, {
|
site = brozzler.Site(rr, {
|
||||||
'seed': args.seed,
|
'seed': args.seed,
|
||||||
'proxy': args.proxy,
|
|
||||||
'time_limit': int(args.time_limit) if args.time_limit else None,
|
'time_limit': int(args.time_limit) if args.time_limit else None,
|
||||||
'ignore_robots': args.ignore_robots,
|
'ignore_robots': args.ignore_robots,
|
||||||
'warcprox_meta': json.loads(
|
'warcprox_meta': json.loads(
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -32,7 +32,7 @@ def find_package_data(package):
|
||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.1b10.dev222',
|
version='1.1b10.dev223',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
|
|
@ -115,7 +115,6 @@ def test_brozzle_site(httpd):
|
||||||
rr = doublethink.Rethinker('localhost', db='brozzler')
|
rr = doublethink.Rethinker('localhost', db='brozzler')
|
||||||
site = brozzler.Site(rr, {
|
site = brozzler.Site(rr, {
|
||||||
'seed': 'http://localhost:%s/site1/' % httpd.server_port,
|
'seed': 'http://localhost:%s/site1/' % httpd.server_port,
|
||||||
'proxy': 'localhost:8000',
|
|
||||||
'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}})
|
'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}})
|
||||||
|
|
||||||
# the two pages we expect to be crawled
|
# the two pages we expect to be crawled
|
||||||
|
@ -336,7 +335,6 @@ def test_obey_robots(httpd):
|
||||||
rr = doublethink.Rethinker('localhost', db='brozzler')
|
rr = doublethink.Rethinker('localhost', db='brozzler')
|
||||||
site = brozzler.Site(rr, {
|
site = brozzler.Site(rr, {
|
||||||
'seed': 'http://localhost:%s/site1/' % httpd.server_port,
|
'seed': 'http://localhost:%s/site1/' % httpd.server_port,
|
||||||
'proxy': 'localhost:8000',
|
|
||||||
'user_agent': 'im a badbot', # robots.txt blocks badbot
|
'user_agent': 'im a badbot', # robots.txt blocks badbot
|
||||||
'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}})
|
'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}})
|
||||||
|
|
||||||
|
@ -389,7 +387,6 @@ def test_login(httpd):
|
||||||
rr = doublethink.Rethinker('localhost', db='brozzler')
|
rr = doublethink.Rethinker('localhost', db='brozzler')
|
||||||
site = brozzler.Site(rr, {
|
site = brozzler.Site(rr, {
|
||||||
'seed': 'http://localhost:%s/site2/' % httpd.server_port,
|
'seed': 'http://localhost:%s/site2/' % httpd.server_port,
|
||||||
'proxy': 'localhost:8000',
|
|
||||||
'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}},
|
'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}},
|
||||||
'username': 'test_username', 'password': 'test_password'})
|
'username': 'test_username', 'password': 'test_password'})
|
||||||
|
|
||||||
|
@ -430,7 +427,6 @@ def test_seed_redirect(httpd):
|
||||||
seed_url = 'http://localhost:%s/site5/redirect/' % httpd.server_port
|
seed_url = 'http://localhost:%s/site5/redirect/' % httpd.server_port
|
||||||
site = brozzler.Site(rr, {
|
site = brozzler.Site(rr, {
|
||||||
'seed': 'http://localhost:%s/site5/redirect/' % httpd.server_port,
|
'seed': 'http://localhost:%s/site5/redirect/' % httpd.server_port,
|
||||||
'proxy': 'localhost:8000',
|
|
||||||
'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}})
|
'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}})
|
||||||
assert site.scope['surt'] == 'http://(localhost:%s,)/site5/redirect/' % httpd.server_port
|
assert site.scope['surt'] == 'http://(localhost:%s,)/site5/redirect/' % httpd.server_port
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue