mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-19 07:15:52 -04:00
handle http auth (#138)
abort brozzling on insterstial (auth dialog) because we have no other recourse at this point. waiting on Network.requestIntercepted auth challenge support. (didn't work in our latest testing) https://chromedevtools.github.io/devtools-protocol/tot/Network#type-AuthChallengeResponse
This commit is contained in:
parent
15610fa990
commit
e2b2542d4a
@ -30,6 +30,9 @@ class NothingToClaim(Exception):
|
||||
class CrawlStopped(Exception):
|
||||
pass
|
||||
|
||||
class PageInterstitialShown(Exception):
|
||||
pass
|
||||
|
||||
class ProxyError(Exception):
|
||||
pass
|
||||
|
||||
|
@ -241,10 +241,12 @@ class WebsockReceiverThread(threading.Thread):
|
||||
if self.on_request:
|
||||
self.on_request(message)
|
||||
elif message['method'] == 'Page.interstitialShown':
|
||||
# for AITFIVE-1529: handle http auth
|
||||
# for now, we should consider killing the browser when we receive Page.interstitialShown and
|
||||
# consider the page finished—-first we should figure out when else that event might happen
|
||||
self.logger.info('Page.interstitialShown received')
|
||||
# AITFIVE-1529: handle http auth
|
||||
# we should kill the browser when we receive Page.interstitialShown and
|
||||
# consider the page finished, until this is fixed:
|
||||
# https://bugs.chromium.org/p/chromium/issues/detail?id=764505
|
||||
self.logger.info('Page.interstialShown (likely unsupported http auth request)')
|
||||
brozzler.thread_raise(self.calling_thread, brozzler.PageInterstitialShown)
|
||||
elif message['method'] == 'Inspector.targetCrashed':
|
||||
self.logger.error(
|
||||
'''chrome tab went "aw snap" or "he's dead jim"!''')
|
||||
|
@ -198,6 +198,8 @@ def brozzle_page(argv=None):
|
||||
logging.info('outlinks: \n\t%s', '\n\t'.join(sorted(outlinks)))
|
||||
except brozzler.ReachedLimit as e:
|
||||
logging.error('reached limit %s', e)
|
||||
except brozzler.PageInterstitialShown as e:
|
||||
logging.error('page interstitial shown %s', e)
|
||||
finally:
|
||||
browser.stop()
|
||||
|
||||
|
@ -210,9 +210,12 @@ class BrozzlerWorker:
|
||||
|
||||
if self._needs_browsing(page, ydl_fetches):
|
||||
self.logger.info('needs browsing: %s', page)
|
||||
browser_outlinks = self._browse_page(
|
||||
try:
|
||||
browser_outlinks = self._browse_page(
|
||||
browser, site, page, on_screenshot, on_request)
|
||||
outlinks.update(browser_outlinks)
|
||||
outlinks.update(browser_outlinks)
|
||||
except brozzler.PageInterstitialShown:
|
||||
self.logger.info('page interstitial shown (http auth): %s', page)
|
||||
else:
|
||||
if not self._already_fetched(page, ydl_fetches):
|
||||
self.logger.info('needs fetch: %s', page)
|
||||
|
@ -62,6 +62,13 @@ def httpd(request):
|
||||
self.send_header('Content-Length', len(payload))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
elif self.path == '/401':
|
||||
self.send_response(401)
|
||||
self.send_header('WWW-Authenticate', 'Basic realm=\"Test\"')
|
||||
self.send_header('Content-type', 'text/html')
|
||||
self.end_headers()
|
||||
self.wfile.write(self.headers.getheader('Authorization'))
|
||||
self.wfile.write('not authenticated')
|
||||
else:
|
||||
super().do_GET()
|
||||
|
||||
@ -111,6 +118,13 @@ def test_aw_snap_hes_dead_jim():
|
||||
with pytest.raises(brozzler.BrowsingException):
|
||||
browser.browse_page('chrome://crash')
|
||||
|
||||
def test_page_interstitial_exception(httpd):
|
||||
chrome_exe = brozzler.suggest_default_chrome_exe()
|
||||
url = 'http://localhost:%s/401' % httpd.server_port
|
||||
with brozzler.Browser(chrome_exe=chrome_exe) as browser:
|
||||
with pytest.raises(brozzler.PageInterstitialShown):
|
||||
browser.browse_page(url)
|
||||
|
||||
def test_on_response(httpd):
|
||||
response_urls = []
|
||||
def on_response(msg):
|
||||
|
Loading…
x
Reference in New Issue
Block a user