handle http auth (#138)

abort brozzling on insterstial (auth dialog)

because we have no other recourse at this point. waiting on Network.requestIntercepted auth challenge support. (didn't work in our latest testing)
https://chromedevtools.github.io/devtools-protocol/tot/Network#type-AuthChallengeResponse
This commit is contained in:
Barbara Miller 2018-11-16 15:10:30 -08:00 committed by Noah Levitt
parent 15610fa990
commit e2b2542d4a
5 changed files with 30 additions and 6 deletions

View File

@ -30,6 +30,9 @@ class NothingToClaim(Exception):
class CrawlStopped(Exception):
pass
class PageInterstitialShown(Exception):
pass
class ProxyError(Exception):
pass

View File

@ -241,10 +241,12 @@ class WebsockReceiverThread(threading.Thread):
if self.on_request:
self.on_request(message)
elif message['method'] == 'Page.interstitialShown':
# for AITFIVE-1529: handle http auth
# for now, we should consider killing the browser when we receive Page.interstitialShown and
# consider the page finished—-first we should figure out when else that event might happen
self.logger.info('Page.interstitialShown received')
# AITFIVE-1529: handle http auth
# we should kill the browser when we receive Page.interstitialShown and
# consider the page finished, until this is fixed:
# https://bugs.chromium.org/p/chromium/issues/detail?id=764505
self.logger.info('Page.interstialShown (likely unsupported http auth request)')
brozzler.thread_raise(self.calling_thread, brozzler.PageInterstitialShown)
elif message['method'] == 'Inspector.targetCrashed':
self.logger.error(
'''chrome tab went "aw snap" or "he's dead jim"!''')

View File

@ -198,6 +198,8 @@ def brozzle_page(argv=None):
logging.info('outlinks: \n\t%s', '\n\t'.join(sorted(outlinks)))
except brozzler.ReachedLimit as e:
logging.error('reached limit %s', e)
except brozzler.PageInterstitialShown as e:
logging.error('page interstitial shown %s', e)
finally:
browser.stop()

View File

@ -210,9 +210,12 @@ class BrozzlerWorker:
if self._needs_browsing(page, ydl_fetches):
self.logger.info('needs browsing: %s', page)
browser_outlinks = self._browse_page(
try:
browser_outlinks = self._browse_page(
browser, site, page, on_screenshot, on_request)
outlinks.update(browser_outlinks)
outlinks.update(browser_outlinks)
except brozzler.PageInterstitialShown:
self.logger.info('page interstitial shown (http auth): %s', page)
else:
if not self._already_fetched(page, ydl_fetches):
self.logger.info('needs fetch: %s', page)

View File

@ -62,6 +62,13 @@ def httpd(request):
self.send_header('Content-Length', len(payload))
self.end_headers()
self.wfile.write(payload)
elif self.path == '/401':
self.send_response(401)
self.send_header('WWW-Authenticate', 'Basic realm=\"Test\"')
self.send_header('Content-type', 'text/html')
self.end_headers()
self.wfile.write(self.headers.getheader('Authorization'))
self.wfile.write('not authenticated')
else:
super().do_GET()
@ -111,6 +118,13 @@ def test_aw_snap_hes_dead_jim():
with pytest.raises(brozzler.BrowsingException):
browser.browse_page('chrome://crash')
def test_page_interstitial_exception(httpd):
chrome_exe = brozzler.suggest_default_chrome_exe()
url = 'http://localhost:%s/401' % httpd.server_port
with brozzler.Browser(chrome_exe=chrome_exe) as browser:
with pytest.raises(brozzler.PageInterstitialShown):
browser.browse_page(url)
def test_on_response(httpd):
response_urls = []
def on_response(msg):