tidier, better exception handling?

This commit is contained in:
Barbara Miller 2018-09-21 17:46:19 -07:00
parent 51d011c285
commit 156ec0caa1
3 changed files with 8 additions and 10 deletions

View File

@ -243,12 +243,10 @@ class WebsockReceiverThread(threading.Thread):
elif message['method'] == 'Page.interstitialShown':
# AITFIVE-1529: handle http auth
# we should kill the browser when we receive Page.interstitialShown and
# consider the page finished, until this is fixed: https://bugs.chromium.org/p/chromium/issues/detail?id=764505
self.page_interstitial_shown = brozzler.PageInterstitialShown(
warcprox_meta=None)
self.logger.info('Page.interstialShown (likely http auth request) %s', self.page_interstitial_shown)
brozzler.thread_raise(
self.calling_thread, brozzler.PageInterstitialShown)
# consider the page finished, until this is fixed:
# https://bugs.chromium.org/p/chromium/issues/detail?id=764505
self.logger.info('Page.interstialShown (likely unsupported http auth request)')
brozzler.thread_raise(self.calling_thread, brozzler.PageInterstitialShown)
elif message['method'] == 'Inspector.targetCrashed':
self.logger.error(
'''chrome tab went "aw snap" or "he's dead jim"!''')

View File

@ -198,6 +198,8 @@ def brozzle_page(argv=None):
logging.info('outlinks: \n\t%s', '\n\t'.join(sorted(outlinks)))
except brozzler.ReachedLimit as e:
logging.error('reached limit %s', e)
except brozzler.PageInterstitialShown as e:
logging.error('page interstitial shown %s', e)
finally:
browser.stop()

View File

@ -191,8 +191,6 @@ class BrozzlerWorker:
ydl_fetches = ydl.do_youtube_dl(self, site, page)
except brozzler.ReachedLimit as e:
raise
except brozzler.PageInterstitialShown as e:
raise
except brozzler.ShutdownRequested:
raise
except brozzler.ProxyError:
@ -371,14 +369,14 @@ class BrozzlerWorker:
self.logger.info("shutdown requested")
except brozzler.NothingToClaim:
self.logger.info("no pages left for site %s", site)
except brozzler.PageInterstitialShown:
pass
except brozzler.ReachedLimit as e:
self._frontier.reached_limit(site, e)
except brozzler.ReachedTimeLimit as e:
self._frontier.finished(site, "FINISHED_TIME_LIMIT")
except brozzler.CrawlStopped:
self._frontier.finished(site, "FINISHED_STOP_REQUESTED")
except brozzler.PageInterstitialShown:
self.logger.info("{} shut down after unsupported http auth request".format(browser))
# except brozzler.browser.BrowsingAborted:
# self.logger.info("{} shut down".format(browser))
except brozzler.ProxyError as e: