diff --git a/setup.py b/setup.py index d3df130..fb5b62a 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ def find_package_data(package): setuptools.setup( name='brozzler', - version='1.1b11.dev233', + version='1.1b11.dev234', description='Distributed web crawling with browsers', url='https://github.com/internetarchive/brozzler', author='Noah Levitt', diff --git a/tests/test_brozzling.py b/tests/test_brozzling.py index 3253954..ae6e8ee 100644 --- a/tests/test_brozzling.py +++ b/tests/test_brozzling.py @@ -27,6 +27,7 @@ import argparse import urllib import json import threading +import socket args = argparse.Namespace() args.log_level = logging.INFO @@ -192,23 +193,23 @@ def test_proxy_down(): Test that browsing raises `brozzler.ProxyError` when proxy is down. See also `test_proxy_down` in test_units.py. + + Tests two different kinds of connection error: + - nothing listening the port (nobody listens on on port 4 :)) + - port bound but not accepting connections ''' - site = brozzler.Site(None, {'seed':'http://example.com/'}) - page = brozzler.Page(None, {'url': 'http://example.com/'}) + sock = socket.socket() + sock.bind(('127.0.0.1', 0)) + for not_listening_proxy in ( + '127.0.0.1:4', '127.0.0.1:%s' % sock.getsockname()[1]): + site = brozzler.Site(None, {'seed':'http://example.com/'}) + page = brozzler.Page(None, {'url': 'http://example.com/'}) - # nobody listens on port 4 :) - not_listening_proxy = '127.0.0.1:4' + worker = brozzler.BrozzlerWorker( + frontier=None, proxy=not_listening_proxy) + chrome_exe = brozzler.suggest_default_chrome_exe() - ### binding and not listening produces another type of connection - ### error, which we could test, but it takes a while - # sock = socket.socket() - # sock.bind(('127.0.0.1', 0)) - # not_listening_proxy = '127.0.0.1:%s' % sock.getsockname()[1] - - worker = brozzler.BrozzlerWorker(frontier=None, proxy=not_listening_proxy) - chrome_exe = brozzler.suggest_default_chrome_exe() - - with brozzler.Browser(chrome_exe=chrome_exe) as browser: - with pytest.raises(brozzler.ProxyError): - worker.brozzle_page(browser, site, page) + with brozzler.Browser(chrome_exe=chrome_exe) as browser: + with pytest.raises(brozzler.ProxyError): + worker.brozzle_page(browser, site, page) diff --git a/tests/test_units.py b/tests/test_units.py index 142a664..d9825d5 100644 --- a/tests/test_units.py +++ b/tests/test_units.py @@ -28,6 +28,8 @@ import yaml import datetime import requests import tempfile +import uuid +import socket @pytest.fixture(scope='module') def httpd(request): @@ -114,35 +116,35 @@ def test_proxy_down(): This test needs to cover every possible fetch through the proxy other than fetches from the browser. For that, see test_brozzling.py. + + Tests two different kinds of connection error: + - nothing listening the port (nobody listens on on port 4 :)) + - port bound but not accepting connections ''' - # nobody listens on port 4 :) - not_listening_proxy = '127.0.0.1:4' + sock = socket.socket() + sock.bind(('127.0.0.1', 0)) + for not_listening_proxy in ( + '127.0.0.1:4', '127.0.0.1:%s' % sock.getsockname()[1]): + worker = brozzler.BrozzlerWorker( + frontier=None, proxy=not_listening_proxy) + site = brozzler.Site(None, { + 'id': str(uuid.uuid4()), 'seed': 'http://example.com/'}) + page = brozzler.Page(None, {'url': 'http://example.com/'}) - ### binding and not listening produces another type of connection - ### error, which we could test, but it takes a while - # sock = socket.socket() - # sock.bind(('127.0.0.1', 0)) - # not_listening_proxy = '127.0.0.1:%s' % sock.getsockname()[1] - - worker = brozzler.BrozzlerWorker(frontier=None, proxy=not_listening_proxy) - - site = brozzler.Site(None, {'seed':'http://example.com/'}) - page = brozzler.Page(None, {'url': 'http://example.com/'}) - - # robots.txt fetch - with pytest.raises(brozzler.ProxyError): - brozzler.is_permitted_by_robots( - site, 'http://example.com/', proxy=not_listening_proxy) - - # youtube-dl fetch - with tempfile.TemporaryDirectory(prefix='brzl-ydl-') as tempdir: - ydl = worker._youtube_dl(tempdir, site) + # robots.txt fetch with pytest.raises(brozzler.ProxyError): - worker._try_youtube_dl(ydl, site, page) + brozzler.is_permitted_by_robots( + site, 'http://example.com/', proxy=not_listening_proxy) - # raw fetch - with pytest.raises(brozzler.ProxyError): - worker._fetch_url(site, page) + # youtube-dl fetch + with tempfile.TemporaryDirectory(prefix='brzl-ydl-') as tempdir: + ydl = worker._youtube_dl(tempdir, site) + with pytest.raises(brozzler.ProxyError): + worker._try_youtube_dl(ydl, site, page) + + # raw fetch + with pytest.raises(brozzler.ProxyError): + worker._fetch_url(site, page) def test_start_stop_backwards_compat(): site = brozzler.Site(None, {'seed': 'http://example.com/'})