raise brozzler.ProxyError in case of proxy error fetching robots.txt, doing youtube-dl, or doing raw fetch

2025-12-15 00:29:03 -05:00 · 2017-04-17 18:15:22 -07:00 · 2017-04-17 18:15:22 -07:00 · dc43794363
commit dc43794363
parent 349b41ab32
4 changed files with 61 additions and 26 deletions
--- a/tests/test_units.py
+++ b/tests/test_units.py
@ -23,11 +23,11 @@ import threading
 import os
 import brozzler
 import brozzler.chrome
-import socket
 import logging
 import yaml
 import datetime
 import requests
+import tempfile

@pytest.fixture(scope='module')
 def httpd(request):
@ -108,18 +108,41 @@ blocks:
    assert site.is_in_scope(
            'https://www.youtube.com/watch?v=dUIn5OAPS5s', yt_user_page)

-def test_robots_proxy_down(httpd):
+def test_proxy_down():
    '''
-    Test that exception fetching robots.txt bubbles up if proxy is down.
-    '''
-    url = 'http://localhost:%s/' % httpd.server_port
-    site = brozzler.Site(None, {'seed':url,'user_agent':'im/a/GoOdbot/yep'})
+    Test all fetching scenarios raise `brozzler.ProxyError` when proxy is down.

-    sock = socket.socket()
-    sock.bind(('127.0.0.1', 0))
-    not_listening_proxy = '127.0.0.1:%s' % sock.getsockname()[1]
-    with pytest.raises(requests.exceptions.ProxyError):
-        brozzler.is_permitted_by_robots(site, url, proxy=not_listening_proxy)
+    This test needs to cover every possible fetch through the proxy other than
+    fetches from the browser. For that, see test_brozzling.py.
+    '''
+    # nobody listens on port 4 :)
+    not_listening_proxy = '127.0.0.1:4'
+
+    ### binding and not listening produces another type of connection
+    ### error, which we could test, but it takes a while
+    # sock = socket.socket()
+    # sock.bind(('127.0.0.1', 0))
+    # not_listening_proxy = '127.0.0.1:%s' % sock.getsockname()[1]
+
+    worker = brozzler.BrozzlerWorker(frontier=None, proxy=not_listening_proxy)
+
+    site = brozzler.Site(None, {'seed':'http://example.com/'})
+    page = brozzler.Page(None, {'url': 'http://example.com/'})
+
+    # robots.txt fetch
+    with pytest.raises(brozzler.ProxyError):
+        brozzler.is_permitted_by_robots(
+                site, 'http://example.com/', proxy=not_listening_proxy)
+
+    # youtube-dl fetch
+    with tempfile.TemporaryDirectory(prefix='brzl-ydl-') as tempdir:
+        ydl = worker._youtube_dl(tempdir, site)
+        with pytest.raises(brozzler.ProxyError):
+            worker._try_youtube_dl(ydl, site, page)
+
+    # raw fetch
+    with pytest.raises(brozzler.ProxyError):
+        worker._fetch_url(site, page)

 def test_start_stop_backwards_compat():
    site = brozzler.Site(None, {'seed': 'http://example.com/'})