mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-05-02 14:46:18 -04:00
remove some vestiges of old proxy stuff
This commit is contained in:
parent
a826fdc7ef
commit
a836269e95
3 changed files with 3 additions and 9 deletions
|
@ -161,8 +161,7 @@ def brozzle_page():
|
|||
if args.behavior_parameters:
|
||||
behavior_parameters = json.loads(args.behavior_parameters)
|
||||
site = brozzler.Site(None, {
|
||||
'id': -1, 'seed': args.url, 'proxy': args.proxy,
|
||||
'behavior_parameters': behavior_parameters,
|
||||
'id': -1, 'seed': args.url, 'behavior_parameters': behavior_parameters,
|
||||
'username': args.username, 'password': args.password})
|
||||
page = brozzler.Page(None, {'url': args.url, 'site_id': site.id})
|
||||
worker = brozzler.BrozzlerWorker(frontier=None)
|
||||
|
@ -178,7 +177,7 @@ def brozzle_page():
|
|||
logging.info('wrote screenshot to %s', filename)
|
||||
|
||||
browser = brozzler.Browser(chrome_exe=args.chrome_exe)
|
||||
browser.start(proxy=site.proxy)
|
||||
browser.start(proxy=args.proxy)
|
||||
try:
|
||||
outlinks = worker.brozzle_page(
|
||||
browser, site, page, on_screenshot=on_screenshot)
|
||||
|
@ -260,7 +259,6 @@ def brozzler_new_site():
|
|||
rr = rethinker(args)
|
||||
site = brozzler.Site(rr, {
|
||||
'seed': args.seed,
|
||||
'proxy': args.proxy,
|
||||
'time_limit': int(args.time_limit) if args.time_limit else None,
|
||||
'ignore_robots': args.ignore_robots,
|
||||
'warcprox_meta': json.loads(
|
||||
|
|
2
setup.py
2
setup.py
|
@ -32,7 +32,7 @@ def find_package_data(package):
|
|||
|
||||
setuptools.setup(
|
||||
name='brozzler',
|
||||
version='1.1b10.dev222',
|
||||
version='1.1b10.dev223',
|
||||
description='Distributed web crawling with browsers',
|
||||
url='https://github.com/internetarchive/brozzler',
|
||||
author='Noah Levitt',
|
||||
|
|
|
@ -115,7 +115,6 @@ def test_brozzle_site(httpd):
|
|||
rr = doublethink.Rethinker('localhost', db='brozzler')
|
||||
site = brozzler.Site(rr, {
|
||||
'seed': 'http://localhost:%s/site1/' % httpd.server_port,
|
||||
'proxy': 'localhost:8000',
|
||||
'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}})
|
||||
|
||||
# the two pages we expect to be crawled
|
||||
|
@ -336,7 +335,6 @@ def test_obey_robots(httpd):
|
|||
rr = doublethink.Rethinker('localhost', db='brozzler')
|
||||
site = brozzler.Site(rr, {
|
||||
'seed': 'http://localhost:%s/site1/' % httpd.server_port,
|
||||
'proxy': 'localhost:8000',
|
||||
'user_agent': 'im a badbot', # robots.txt blocks badbot
|
||||
'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}})
|
||||
|
||||
|
@ -389,7 +387,6 @@ def test_login(httpd):
|
|||
rr = doublethink.Rethinker('localhost', db='brozzler')
|
||||
site = brozzler.Site(rr, {
|
||||
'seed': 'http://localhost:%s/site2/' % httpd.server_port,
|
||||
'proxy': 'localhost:8000',
|
||||
'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}},
|
||||
'username': 'test_username', 'password': 'test_password'})
|
||||
|
||||
|
@ -430,7 +427,6 @@ def test_seed_redirect(httpd):
|
|||
seed_url = 'http://localhost:%s/site5/redirect/' % httpd.server_port
|
||||
site = brozzler.Site(rr, {
|
||||
'seed': 'http://localhost:%s/site5/redirect/' % httpd.server_port,
|
||||
'proxy': 'localhost:8000',
|
||||
'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}})
|
||||
assert site.scope['surt'] == 'http://(localhost:%s,)/site5/redirect/' % httpd.server_port
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue