diff --git a/brozzler/cli.py b/brozzler/cli.py index 97c2541..b0d5afa 100644 --- a/brozzler/cli.py +++ b/brozzler/cli.py @@ -161,8 +161,7 @@ def brozzle_page(): if args.behavior_parameters: behavior_parameters = json.loads(args.behavior_parameters) site = brozzler.Site(None, { - 'id': -1, 'seed': args.url, 'proxy': args.proxy, - 'behavior_parameters': behavior_parameters, + 'id': -1, 'seed': args.url, 'behavior_parameters': behavior_parameters, 'username': args.username, 'password': args.password}) page = brozzler.Page(None, {'url': args.url, 'site_id': site.id}) worker = brozzler.BrozzlerWorker(frontier=None) @@ -178,7 +177,7 @@ def brozzle_page(): logging.info('wrote screenshot to %s', filename) browser = brozzler.Browser(chrome_exe=args.chrome_exe) - browser.start(proxy=site.proxy) + browser.start(proxy=args.proxy) try: outlinks = worker.brozzle_page( browser, site, page, on_screenshot=on_screenshot) @@ -260,7 +259,6 @@ def brozzler_new_site(): rr = rethinker(args) site = brozzler.Site(rr, { 'seed': args.seed, - 'proxy': args.proxy, 'time_limit': int(args.time_limit) if args.time_limit else None, 'ignore_robots': args.ignore_robots, 'warcprox_meta': json.loads( diff --git a/setup.py b/setup.py index a0f9152..ad9eea4 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ def find_package_data(package): setuptools.setup( name='brozzler', - version='1.1b10.dev222', + version='1.1b10.dev223', description='Distributed web crawling with browsers', url='https://github.com/internetarchive/brozzler', author='Noah Levitt', diff --git a/tests/test_cluster.py b/tests/test_cluster.py index 669c7bf..0f1e9e6 100644 --- a/tests/test_cluster.py +++ b/tests/test_cluster.py @@ -115,7 +115,6 @@ def test_brozzle_site(httpd): rr = doublethink.Rethinker('localhost', db='brozzler') site = brozzler.Site(rr, { 'seed': 'http://localhost:%s/site1/' % httpd.server_port, - 'proxy': 'localhost:8000', 'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}}) # the two pages we expect to be crawled @@ -336,7 +335,6 @@ def test_obey_robots(httpd): rr = doublethink.Rethinker('localhost', db='brozzler') site = brozzler.Site(rr, { 'seed': 'http://localhost:%s/site1/' % httpd.server_port, - 'proxy': 'localhost:8000', 'user_agent': 'im a badbot', # robots.txt blocks badbot 'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}}) @@ -389,7 +387,6 @@ def test_login(httpd): rr = doublethink.Rethinker('localhost', db='brozzler') site = brozzler.Site(rr, { 'seed': 'http://localhost:%s/site2/' % httpd.server_port, - 'proxy': 'localhost:8000', 'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}, 'username': 'test_username', 'password': 'test_password'}) @@ -430,7 +427,6 @@ def test_seed_redirect(httpd): seed_url = 'http://localhost:%s/site5/redirect/' % httpd.server_port site = brozzler.Site(rr, { 'seed': 'http://localhost:%s/site5/redirect/' % httpd.server_port, - 'proxy': 'localhost:8000', 'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}}) assert site.scope['surt'] == 'http://(localhost:%s,)/site5/redirect/' % httpd.server_port