mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-15 01:35:49 -04:00
tidier, better exception handling?
This commit is contained in:
parent
51d011c285
commit
156ec0caa1
3 changed files with 8 additions and 10 deletions
|
@ -243,12 +243,10 @@ class WebsockReceiverThread(threading.Thread):
|
||||||
elif message['method'] == 'Page.interstitialShown':
|
elif message['method'] == 'Page.interstitialShown':
|
||||||
# AITFIVE-1529: handle http auth
|
# AITFIVE-1529: handle http auth
|
||||||
# we should kill the browser when we receive Page.interstitialShown and
|
# we should kill the browser when we receive Page.interstitialShown and
|
||||||
# consider the page finished, until this is fixed: https://bugs.chromium.org/p/chromium/issues/detail?id=764505
|
# consider the page finished, until this is fixed:
|
||||||
self.page_interstitial_shown = brozzler.PageInterstitialShown(
|
# https://bugs.chromium.org/p/chromium/issues/detail?id=764505
|
||||||
warcprox_meta=None)
|
self.logger.info('Page.interstialShown (likely unsupported http auth request)')
|
||||||
self.logger.info('Page.interstialShown (likely http auth request) %s', self.page_interstitial_shown)
|
brozzler.thread_raise(self.calling_thread, brozzler.PageInterstitialShown)
|
||||||
brozzler.thread_raise(
|
|
||||||
self.calling_thread, brozzler.PageInterstitialShown)
|
|
||||||
elif message['method'] == 'Inspector.targetCrashed':
|
elif message['method'] == 'Inspector.targetCrashed':
|
||||||
self.logger.error(
|
self.logger.error(
|
||||||
'''chrome tab went "aw snap" or "he's dead jim"!''')
|
'''chrome tab went "aw snap" or "he's dead jim"!''')
|
||||||
|
|
|
@ -198,6 +198,8 @@ def brozzle_page(argv=None):
|
||||||
logging.info('outlinks: \n\t%s', '\n\t'.join(sorted(outlinks)))
|
logging.info('outlinks: \n\t%s', '\n\t'.join(sorted(outlinks)))
|
||||||
except brozzler.ReachedLimit as e:
|
except brozzler.ReachedLimit as e:
|
||||||
logging.error('reached limit %s', e)
|
logging.error('reached limit %s', e)
|
||||||
|
except brozzler.PageInterstitialShown as e:
|
||||||
|
logging.error('page interstitial shown %s', e)
|
||||||
finally:
|
finally:
|
||||||
browser.stop()
|
browser.stop()
|
||||||
|
|
||||||
|
|
|
@ -191,8 +191,6 @@ class BrozzlerWorker:
|
||||||
ydl_fetches = ydl.do_youtube_dl(self, site, page)
|
ydl_fetches = ydl.do_youtube_dl(self, site, page)
|
||||||
except brozzler.ReachedLimit as e:
|
except brozzler.ReachedLimit as e:
|
||||||
raise
|
raise
|
||||||
except brozzler.PageInterstitialShown as e:
|
|
||||||
raise
|
|
||||||
except brozzler.ShutdownRequested:
|
except brozzler.ShutdownRequested:
|
||||||
raise
|
raise
|
||||||
except brozzler.ProxyError:
|
except brozzler.ProxyError:
|
||||||
|
@ -371,14 +369,14 @@ class BrozzlerWorker:
|
||||||
self.logger.info("shutdown requested")
|
self.logger.info("shutdown requested")
|
||||||
except brozzler.NothingToClaim:
|
except brozzler.NothingToClaim:
|
||||||
self.logger.info("no pages left for site %s", site)
|
self.logger.info("no pages left for site %s", site)
|
||||||
except brozzler.PageInterstitialShown:
|
|
||||||
pass
|
|
||||||
except brozzler.ReachedLimit as e:
|
except brozzler.ReachedLimit as e:
|
||||||
self._frontier.reached_limit(site, e)
|
self._frontier.reached_limit(site, e)
|
||||||
except brozzler.ReachedTimeLimit as e:
|
except brozzler.ReachedTimeLimit as e:
|
||||||
self._frontier.finished(site, "FINISHED_TIME_LIMIT")
|
self._frontier.finished(site, "FINISHED_TIME_LIMIT")
|
||||||
except brozzler.CrawlStopped:
|
except brozzler.CrawlStopped:
|
||||||
self._frontier.finished(site, "FINISHED_STOP_REQUESTED")
|
self._frontier.finished(site, "FINISHED_STOP_REQUESTED")
|
||||||
|
except brozzler.PageInterstitialShown:
|
||||||
|
self.logger.info("{} shut down after unsupported http auth request".format(browser))
|
||||||
# except brozzler.browser.BrowsingAborted:
|
# except brozzler.browser.BrowsingAborted:
|
||||||
# self.logger.info("{} shut down".format(browser))
|
# self.logger.info("{} shut down".format(browser))
|
||||||
except brozzler.ProxyError as e:
|
except brozzler.ProxyError as e:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue