raise brozzler.ProxyError in case of proxy error fetching robots.txt, doing youtube-dl, or doing raw fetch

This commit is contained in:
Noah Levitt 2017-04-17 18:15:22 -07:00
parent 349b41ab32
commit dc43794363
4 changed files with 61 additions and 26 deletions

View file

@ -23,11 +23,11 @@ import threading
import os
import brozzler
import brozzler.chrome
import socket
import logging
import yaml
import datetime
import requests
import tempfile
@pytest.fixture(scope='module')
def httpd(request):
@ -108,18 +108,41 @@ blocks:
assert site.is_in_scope(
'https://www.youtube.com/watch?v=dUIn5OAPS5s', yt_user_page)
def test_robots_proxy_down(httpd):
def test_proxy_down():
'''
Test that exception fetching robots.txt bubbles up if proxy is down.
'''
url = 'http://localhost:%s/' % httpd.server_port
site = brozzler.Site(None, {'seed':url,'user_agent':'im/a/GoOdbot/yep'})
Test all fetching scenarios raise `brozzler.ProxyError` when proxy is down.
sock = socket.socket()
sock.bind(('127.0.0.1', 0))
not_listening_proxy = '127.0.0.1:%s' % sock.getsockname()[1]
with pytest.raises(requests.exceptions.ProxyError):
brozzler.is_permitted_by_robots(site, url, proxy=not_listening_proxy)
This test needs to cover every possible fetch through the proxy other than
fetches from the browser. For that, see test_brozzling.py.
'''
# nobody listens on port 4 :)
not_listening_proxy = '127.0.0.1:4'
### binding and not listening produces another type of connection
### error, which we could test, but it takes a while
# sock = socket.socket()
# sock.bind(('127.0.0.1', 0))
# not_listening_proxy = '127.0.0.1:%s' % sock.getsockname()[1]
worker = brozzler.BrozzlerWorker(frontier=None, proxy=not_listening_proxy)
site = brozzler.Site(None, {'seed':'http://example.com/'})
page = brozzler.Page(None, {'url': 'http://example.com/'})
# robots.txt fetch
with pytest.raises(brozzler.ProxyError):
brozzler.is_permitted_by_robots(
site, 'http://example.com/', proxy=not_listening_proxy)
# youtube-dl fetch
with tempfile.TemporaryDirectory(prefix='brzl-ydl-') as tempdir:
ydl = worker._youtube_dl(tempdir, site)
with pytest.raises(brozzler.ProxyError):
worker._try_youtube_dl(ydl, site, page)
# raw fetch
with pytest.raises(brozzler.ProxyError):
worker._fetch_url(site, page)
def test_start_stop_backwards_compat():
site = brozzler.Site(None, {'seed': 'http://example.com/'})