remove some vestiges of old proxy stuff

This commit is contained in:
Noah Levitt 2017-03-24 16:04:43 -07:00
parent a826fdc7ef
commit a836269e95
3 changed files with 3 additions and 9 deletions

View file

@ -161,8 +161,7 @@ def brozzle_page():
if args.behavior_parameters: if args.behavior_parameters:
behavior_parameters = json.loads(args.behavior_parameters) behavior_parameters = json.loads(args.behavior_parameters)
site = brozzler.Site(None, { site = brozzler.Site(None, {
'id': -1, 'seed': args.url, 'proxy': args.proxy, 'id': -1, 'seed': args.url, 'behavior_parameters': behavior_parameters,
'behavior_parameters': behavior_parameters,
'username': args.username, 'password': args.password}) 'username': args.username, 'password': args.password})
page = brozzler.Page(None, {'url': args.url, 'site_id': site.id}) page = brozzler.Page(None, {'url': args.url, 'site_id': site.id})
worker = brozzler.BrozzlerWorker(frontier=None) worker = brozzler.BrozzlerWorker(frontier=None)
@ -178,7 +177,7 @@ def brozzle_page():
logging.info('wrote screenshot to %s', filename) logging.info('wrote screenshot to %s', filename)
browser = brozzler.Browser(chrome_exe=args.chrome_exe) browser = brozzler.Browser(chrome_exe=args.chrome_exe)
browser.start(proxy=site.proxy) browser.start(proxy=args.proxy)
try: try:
outlinks = worker.brozzle_page( outlinks = worker.brozzle_page(
browser, site, page, on_screenshot=on_screenshot) browser, site, page, on_screenshot=on_screenshot)
@ -260,7 +259,6 @@ def brozzler_new_site():
rr = rethinker(args) rr = rethinker(args)
site = brozzler.Site(rr, { site = brozzler.Site(rr, {
'seed': args.seed, 'seed': args.seed,
'proxy': args.proxy,
'time_limit': int(args.time_limit) if args.time_limit else None, 'time_limit': int(args.time_limit) if args.time_limit else None,
'ignore_robots': args.ignore_robots, 'ignore_robots': args.ignore_robots,
'warcprox_meta': json.loads( 'warcprox_meta': json.loads(

View file

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup( setuptools.setup(
name='brozzler', name='brozzler',
version='1.1b10.dev222', version='1.1b10.dev223',
description='Distributed web crawling with browsers', description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler', url='https://github.com/internetarchive/brozzler',
author='Noah Levitt', author='Noah Levitt',

View file

@ -115,7 +115,6 @@ def test_brozzle_site(httpd):
rr = doublethink.Rethinker('localhost', db='brozzler') rr = doublethink.Rethinker('localhost', db='brozzler')
site = brozzler.Site(rr, { site = brozzler.Site(rr, {
'seed': 'http://localhost:%s/site1/' % httpd.server_port, 'seed': 'http://localhost:%s/site1/' % httpd.server_port,
'proxy': 'localhost:8000',
'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}}) 'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}})
# the two pages we expect to be crawled # the two pages we expect to be crawled
@ -336,7 +335,6 @@ def test_obey_robots(httpd):
rr = doublethink.Rethinker('localhost', db='brozzler') rr = doublethink.Rethinker('localhost', db='brozzler')
site = brozzler.Site(rr, { site = brozzler.Site(rr, {
'seed': 'http://localhost:%s/site1/' % httpd.server_port, 'seed': 'http://localhost:%s/site1/' % httpd.server_port,
'proxy': 'localhost:8000',
'user_agent': 'im a badbot', # robots.txt blocks badbot 'user_agent': 'im a badbot', # robots.txt blocks badbot
'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}}) 'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}})
@ -389,7 +387,6 @@ def test_login(httpd):
rr = doublethink.Rethinker('localhost', db='brozzler') rr = doublethink.Rethinker('localhost', db='brozzler')
site = brozzler.Site(rr, { site = brozzler.Site(rr, {
'seed': 'http://localhost:%s/site2/' % httpd.server_port, 'seed': 'http://localhost:%s/site2/' % httpd.server_port,
'proxy': 'localhost:8000',
'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}, 'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}},
'username': 'test_username', 'password': 'test_password'}) 'username': 'test_username', 'password': 'test_password'})
@ -430,7 +427,6 @@ def test_seed_redirect(httpd):
seed_url = 'http://localhost:%s/site5/redirect/' % httpd.server_port seed_url = 'http://localhost:%s/site5/redirect/' % httpd.server_port
site = brozzler.Site(rr, { site = brozzler.Site(rr, {
'seed': 'http://localhost:%s/site5/redirect/' % httpd.server_port, 'seed': 'http://localhost:%s/site5/redirect/' % httpd.server_port,
'proxy': 'localhost:8000',
'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}}) 'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}})
assert site.scope['surt'] == 'http://(localhost:%s,)/site5/redirect/' % httpd.server_port assert site.scope['surt'] == 'http://(localhost:%s,)/site5/redirect/' % httpd.server_port