mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-23 16:19:49 -05:00
raise new exception brozzler.ProxyError in case of proxy error browsing a page
This commit is contained in:
parent
87a7301f4d
commit
349b41ab32
@ -29,6 +29,9 @@ class NothingToClaim(Exception):
|
||||
class CrawlStopped(Exception):
|
||||
pass
|
||||
|
||||
class ProxyError(Exception):
|
||||
pass
|
||||
|
||||
class ReachedLimit(Exception):
|
||||
def __init__(self, http_error=None, warcprox_meta=None, http_payload=None):
|
||||
import json
|
||||
|
@ -239,6 +239,10 @@ class WebsockReceiverThread(threading.Thread):
|
||||
message['params']['message']['text'])
|
||||
elif message['method'] == 'Page.javascriptDialogOpening':
|
||||
self._javascript_dialog_opening(message)
|
||||
elif (message['method'] == 'Network.loadingFailed'
|
||||
and 'params' in message and 'errorText' in message['params']
|
||||
and message['params']['errorText'] == 'net::ERR_PROXY_CONNECTION_FAILED'):
|
||||
brozzler.thread_raise(self.calling_thread, brozzler.ProxyError)
|
||||
# else:
|
||||
# self.logger.debug("%s %s", message["method"], json_message)
|
||||
elif 'result' in message:
|
||||
@ -411,7 +415,8 @@ class Browser:
|
||||
outlinks: a list of navigational links extracted from the page
|
||||
|
||||
Raises:
|
||||
BrowsingException: if browsing the page fails
|
||||
brozzler.ProxyError: in case of proxy connection error
|
||||
BrowsingException: if browsing the page fails in some other way
|
||||
'''
|
||||
if not self.is_running():
|
||||
raise BrowsingException('browser has not been started')
|
||||
|
2
setup.py
2
setup.py
@ -32,7 +32,7 @@ def find_package_data(package):
|
||||
|
||||
setuptools.setup(
|
||||
name='brozzler',
|
||||
version='1.1b11.dev231',
|
||||
version='1.1b11.dev232',
|
||||
description='Distributed web crawling with browsers',
|
||||
url='https://github.com/internetarchive/brozzler',
|
||||
author='Noah Levitt',
|
||||
|
@ -26,6 +26,7 @@ import threading
|
||||
import argparse
|
||||
import urllib
|
||||
import json
|
||||
import threading
|
||||
|
||||
args = argparse.Namespace()
|
||||
args.log_level = logging.INFO
|
||||
@ -186,3 +187,28 @@ def test_extract_outlinks(httpd):
|
||||
'http://localhost:%s/site8/fdjisapofdjisap#2' % httpd.server_port
|
||||
}
|
||||
|
||||
def test_proxy_down():
|
||||
'''
|
||||
Test that browsing raises `brozzler.ProxyError` when proxy is down.
|
||||
|
||||
See also `test_proxy_down` in test_units.py.
|
||||
'''
|
||||
site = brozzler.Site(None, {'seed':'http://example.com/'})
|
||||
page = brozzler.Page(None, {'url': 'http://example.com/'})
|
||||
|
||||
# nobody listens on port 4 :)
|
||||
not_listening_proxy = '127.0.0.1:4'
|
||||
|
||||
### binding and not listening produces another type of connection
|
||||
### error, which we could test, but it takes a while
|
||||
# sock = socket.socket()
|
||||
# sock.bind(('127.0.0.1', 0))
|
||||
# not_listening_proxy = '127.0.0.1:%s' % sock.getsockname()[1]
|
||||
|
||||
worker = brozzler.BrozzlerWorker(frontier=None, proxy=not_listening_proxy)
|
||||
chrome_exe = brozzler.suggest_default_chrome_exe()
|
||||
|
||||
with brozzler.Browser(chrome_exe=chrome_exe) as browser:
|
||||
with pytest.raises(brozzler.ProxyError):
|
||||
worker.brozzle_page(browser, site, page)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user