diff --git a/brozzler/worker.py b/brozzler/worker.py index dadc4be..b5f38ce 100644 --- a/brozzler/worker.py +++ b/brozzler/worker.py @@ -212,11 +212,10 @@ class BrozzlerWorker: self.logger.info('needs browsing: %s', page) try: browser_outlinks = self._browse_page( - browser, site, page, on_screenshot, on_request) + browser, site, page, on_screenshot, on_request) outlinks.update(browser_outlinks) except brozzler.PageInterstitialShown: self.logger.info('page interstitial shown (http auth): %s', page) - return outlinks else: if not self._already_fetched(page, ydl_fetches): self.logger.info('needs fetch: %s', page) diff --git a/tests/test_brozzling.py b/tests/test_brozzling.py index f4515de..0efe5a3 100644 --- a/tests/test_brozzling.py +++ b/tests/test_brozzling.py @@ -62,6 +62,13 @@ def httpd(request): self.send_header('Content-Length', len(payload)) self.end_headers() self.wfile.write(payload) + elif self.path == '/401': + self.send_response(401) + self.send_header('WWW-Authenticate', 'Basic realm=\"Test\"') + self.send_header('Content-type', 'text/html') + self.end_headers() + self.wfile.write(self.headers.getheader('Authorization')) + self.wfile.write('not authenticated') else: super().do_GET() @@ -111,11 +118,12 @@ def test_aw_snap_hes_dead_jim(): with pytest.raises(brozzler.BrowsingException): browser.browse_page('chrome://crash') -def test_page_interstitial_exception(): +def test_page_interstitial_exception(httpd): chrome_exe = brozzler.suggest_default_chrome_exe() + url = 'http://localhost:%s/401' % httpd.server_port with brozzler.Browser(chrome_exe=chrome_exe) as browser: with pytest.raises(brozzler.PageInterstitialShown): - browser.browse_page('https://monitor.archive.org/cgi-bin/nagios3/status.cgi?hostgroup=38.wbgrp') + browser.browse_page(url) def test_on_response(httpd): response_urls = []