remove some vestiges of old proxy stuff

This commit is contained in:
Noah Levitt 2017-03-24 16:04:43 -07:00
parent a826fdc7ef
commit a836269e95
3 changed files with 3 additions and 9 deletions

View file

@ -161,8 +161,7 @@ def brozzle_page():
if args.behavior_parameters:
behavior_parameters = json.loads(args.behavior_parameters)
site = brozzler.Site(None, {
'id': -1, 'seed': args.url, 'proxy': args.proxy,
'behavior_parameters': behavior_parameters,
'id': -1, 'seed': args.url, 'behavior_parameters': behavior_parameters,
'username': args.username, 'password': args.password})
page = brozzler.Page(None, {'url': args.url, 'site_id': site.id})
worker = brozzler.BrozzlerWorker(frontier=None)
@ -178,7 +177,7 @@ def brozzle_page():
logging.info('wrote screenshot to %s', filename)
browser = brozzler.Browser(chrome_exe=args.chrome_exe)
browser.start(proxy=site.proxy)
browser.start(proxy=args.proxy)
try:
outlinks = worker.brozzle_page(
browser, site, page, on_screenshot=on_screenshot)
@ -260,7 +259,6 @@ def brozzler_new_site():
rr = rethinker(args)
site = brozzler.Site(rr, {
'seed': args.seed,
'proxy': args.proxy,
'time_limit': int(args.time_limit) if args.time_limit else None,
'ignore_robots': args.ignore_robots,
'warcprox_meta': json.loads(

View file

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup(
name='brozzler',
version='1.1b10.dev222',
version='1.1b10.dev223',
description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler',
author='Noah Levitt',

View file

@ -115,7 +115,6 @@ def test_brozzle_site(httpd):
rr = doublethink.Rethinker('localhost', db='brozzler')
site = brozzler.Site(rr, {
'seed': 'http://localhost:%s/site1/' % httpd.server_port,
'proxy': 'localhost:8000',
'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}})
# the two pages we expect to be crawled
@ -336,7 +335,6 @@ def test_obey_robots(httpd):
rr = doublethink.Rethinker('localhost', db='brozzler')
site = brozzler.Site(rr, {
'seed': 'http://localhost:%s/site1/' % httpd.server_port,
'proxy': 'localhost:8000',
'user_agent': 'im a badbot', # robots.txt blocks badbot
'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}})
@ -389,7 +387,6 @@ def test_login(httpd):
rr = doublethink.Rethinker('localhost', db='brozzler')
site = brozzler.Site(rr, {
'seed': 'http://localhost:%s/site2/' % httpd.server_port,
'proxy': 'localhost:8000',
'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}},
'username': 'test_username', 'password': 'test_password'})
@ -430,7 +427,6 @@ def test_seed_redirect(httpd):
seed_url = 'http://localhost:%s/site5/redirect/' % httpd.server_port
site = brozzler.Site(rr, {
'seed': 'http://localhost:%s/site5/redirect/' % httpd.server_port,
'proxy': 'localhost:8000',
'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}})
assert site.scope['surt'] == 'http://(localhost:%s,)/site5/redirect/' % httpd.server_port