mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-07-19 21:18:46 -04:00
raise brozzler.ProxyError in case of proxy error fetching robots.txt, doing youtube-dl, or doing raw fetch
This commit is contained in:
parent
349b41ab32
commit
dc43794363
4 changed files with 61 additions and 26 deletions
|
@ -23,11 +23,11 @@ import threading
|
|||
import os
|
||||
import brozzler
|
||||
import brozzler.chrome
|
||||
import socket
|
||||
import logging
|
||||
import yaml
|
||||
import datetime
|
||||
import requests
|
||||
import tempfile
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def httpd(request):
|
||||
|
@ -108,18 +108,41 @@ blocks:
|
|||
assert site.is_in_scope(
|
||||
'https://www.youtube.com/watch?v=dUIn5OAPS5s', yt_user_page)
|
||||
|
||||
def test_robots_proxy_down(httpd):
|
||||
def test_proxy_down():
|
||||
'''
|
||||
Test that exception fetching robots.txt bubbles up if proxy is down.
|
||||
'''
|
||||
url = 'http://localhost:%s/' % httpd.server_port
|
||||
site = brozzler.Site(None, {'seed':url,'user_agent':'im/a/GoOdbot/yep'})
|
||||
Test all fetching scenarios raise `brozzler.ProxyError` when proxy is down.
|
||||
|
||||
sock = socket.socket()
|
||||
sock.bind(('127.0.0.1', 0))
|
||||
not_listening_proxy = '127.0.0.1:%s' % sock.getsockname()[1]
|
||||
with pytest.raises(requests.exceptions.ProxyError):
|
||||
brozzler.is_permitted_by_robots(site, url, proxy=not_listening_proxy)
|
||||
This test needs to cover every possible fetch through the proxy other than
|
||||
fetches from the browser. For that, see test_brozzling.py.
|
||||
'''
|
||||
# nobody listens on port 4 :)
|
||||
not_listening_proxy = '127.0.0.1:4'
|
||||
|
||||
### binding and not listening produces another type of connection
|
||||
### error, which we could test, but it takes a while
|
||||
# sock = socket.socket()
|
||||
# sock.bind(('127.0.0.1', 0))
|
||||
# not_listening_proxy = '127.0.0.1:%s' % sock.getsockname()[1]
|
||||
|
||||
worker = brozzler.BrozzlerWorker(frontier=None, proxy=not_listening_proxy)
|
||||
|
||||
site = brozzler.Site(None, {'seed':'http://example.com/'})
|
||||
page = brozzler.Page(None, {'url': 'http://example.com/'})
|
||||
|
||||
# robots.txt fetch
|
||||
with pytest.raises(brozzler.ProxyError):
|
||||
brozzler.is_permitted_by_robots(
|
||||
site, 'http://example.com/', proxy=not_listening_proxy)
|
||||
|
||||
# youtube-dl fetch
|
||||
with tempfile.TemporaryDirectory(prefix='brzl-ydl-') as tempdir:
|
||||
ydl = worker._youtube_dl(tempdir, site)
|
||||
with pytest.raises(brozzler.ProxyError):
|
||||
worker._try_youtube_dl(ydl, site, page)
|
||||
|
||||
# raw fetch
|
||||
with pytest.raises(brozzler.ProxyError):
|
||||
worker._fetch_url(site, page)
|
||||
|
||||
def test_start_stop_backwards_compat():
|
||||
site = brozzler.Site(None, {'seed': 'http://example.com/'})
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue