mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-05-02 06:36:20 -04:00
bubble up proxy errors fetching robots.txt, with unit test, and documentation
This commit is contained in:
parent
df7734f2ca
commit
0884b4cd56
2 changed files with 45 additions and 3 deletions
|
@ -27,6 +27,7 @@ import socket
|
|||
import logging
|
||||
import yaml
|
||||
import datetime
|
||||
import requests
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def httpd(request):
|
||||
|
@ -107,6 +108,19 @@ blocks:
|
|||
assert site.is_in_scope(
|
||||
'https://www.youtube.com/watch?v=dUIn5OAPS5s', yt_user_page)
|
||||
|
||||
def test_robots_proxy_down(httpd):
|
||||
'''
|
||||
Test that exception fetching robots.txt bubbles up if proxy is down.
|
||||
'''
|
||||
url = 'http://localhost:%s/' % httpd.server_port
|
||||
site = brozzler.Site(None, {'seed':url,'user_agent':'im/a/GoOdbot/yep'})
|
||||
|
||||
sock = socket.socket()
|
||||
sock.bind(('127.0.0.1', 0))
|
||||
not_listening_proxy = '127.0.0.1:%s' % sock.getsockname()[1]
|
||||
with pytest.raises(requests.exceptions.ProxyError):
|
||||
brozzler.is_permitted_by_robots(site, url, proxy=not_listening_proxy)
|
||||
|
||||
def test_start_stop_backwards_compat():
|
||||
site = brozzler.Site(None, {'seed': 'http://example.com/'})
|
||||
assert len(site.starts_and_stops) == 1
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue