raise new exception brozzler.ProxyError in case of proxy error browsing a page

This commit is contained in:
Noah Levitt 2017-04-17 18:14:02 -07:00
parent 87a7301f4d
commit 349b41ab32
4 changed files with 36 additions and 2 deletions

View File

@ -29,6 +29,9 @@ class NothingToClaim(Exception):
class CrawlStopped(Exception):
pass
class ProxyError(Exception):
pass
class ReachedLimit(Exception):
def __init__(self, http_error=None, warcprox_meta=None, http_payload=None):
import json

View File

@ -239,6 +239,10 @@ class WebsockReceiverThread(threading.Thread):
message['params']['message']['text'])
elif message['method'] == 'Page.javascriptDialogOpening':
self._javascript_dialog_opening(message)
elif (message['method'] == 'Network.loadingFailed'
and 'params' in message and 'errorText' in message['params']
and message['params']['errorText'] == 'net::ERR_PROXY_CONNECTION_FAILED'):
brozzler.thread_raise(self.calling_thread, brozzler.ProxyError)
# else:
# self.logger.debug("%s %s", message["method"], json_message)
elif 'result' in message:
@ -411,7 +415,8 @@ class Browser:
outlinks: a list of navigational links extracted from the page
Raises:
BrowsingException: if browsing the page fails
brozzler.ProxyError: in case of proxy connection error
BrowsingException: if browsing the page fails in some other way
'''
if not self.is_running():
raise BrowsingException('browser has not been started')

View File

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup(
name='brozzler',
version='1.1b11.dev231',
version='1.1b11.dev232',
description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler',
author='Noah Levitt',

View File

@ -26,6 +26,7 @@ import threading
import argparse
import urllib
import json
import threading
args = argparse.Namespace()
args.log_level = logging.INFO
@ -186,3 +187,28 @@ def test_extract_outlinks(httpd):
'http://localhost:%s/site8/fdjisapofdjisap#2' % httpd.server_port
}
def test_proxy_down():
'''
Test that browsing raises `brozzler.ProxyError` when proxy is down.
See also `test_proxy_down` in test_units.py.
'''
site = brozzler.Site(None, {'seed':'http://example.com/'})
page = brozzler.Page(None, {'url': 'http://example.com/'})
# nobody listens on port 4 :)
not_listening_proxy = '127.0.0.1:4'
### binding and not listening produces another type of connection
### error, which we could test, but it takes a while
# sock = socket.socket()
# sock.bind(('127.0.0.1', 0))
# not_listening_proxy = '127.0.0.1:%s' % sock.getsockname()[1]
worker = brozzler.BrozzlerWorker(frontier=None, proxy=not_listening_proxy)
chrome_exe = brozzler.suggest_default_chrome_exe()
with brozzler.Browser(chrome_exe=chrome_exe) as browser:
with pytest.raises(brozzler.ProxyError):
worker.brozzle_page(browser, site, page)