mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-21 08:06:27 -04:00
Merge branch 'pageInterstitialShown' into qa
This commit is contained in:
commit
24fcca4919
@ -30,6 +30,9 @@ class NothingToClaim(Exception):
|
||||
class CrawlStopped(Exception):
|
||||
pass
|
||||
|
||||
class PageInterstitialShown(Exception):
|
||||
pass
|
||||
|
||||
class ProxyError(Exception):
|
||||
pass
|
||||
|
||||
@ -58,28 +61,6 @@ class ReachedLimit(Exception):
|
||||
def __str__(self):
|
||||
return self.__repr__()
|
||||
|
||||
class PageInterstitialShown(Exception):
|
||||
def __init__(self, http_error=None, warcprox_meta=None, http_payload=None):
|
||||
import json
|
||||
if http_error:
|
||||
if "warcprox-meta" in http_error.headers:
|
||||
self.warcprox_meta = json.loads(
|
||||
http_error.headers["warcprox-meta"])
|
||||
else:
|
||||
self.warcprox_meta = None
|
||||
self.http_payload = http_error.read()
|
||||
elif warcprox_meta:
|
||||
self.warcprox_meta = warcprox_meta
|
||||
self.http_payload = http_payload
|
||||
|
||||
def __repr__(self):
|
||||
return "PageInterstitialShown(warcprox_meta=%r,http_payload=%r)" % (
|
||||
self.warcprox_meta if hasattr(self, 'warcprox_meta') else None,
|
||||
self.http_payload if hasattr(self, 'http_payload') else None)
|
||||
|
||||
def __str__(self):
|
||||
return self.__repr__()
|
||||
|
||||
# monkey-patch log levels TRACE and NOTICE
|
||||
logging.TRACE = (logging.NOTSET + logging.DEBUG) // 2
|
||||
def _logger_trace(self, msg, *args, **kwargs):
|
||||
|
@ -212,10 +212,10 @@ class BrozzlerWorker:
|
||||
try:
|
||||
outlinks = self._browse_page(browser, site, page, on_screenshot,
|
||||
on_request)
|
||||
return outlinks
|
||||
except brozzler.PageInterstitialShown:
|
||||
outlinks = []
|
||||
self.logger.info('page interstitial shown (http auth): %s', page)
|
||||
return []
|
||||
return outlinks
|
||||
else:
|
||||
if not self._already_fetched(page, ydl_fetches):
|
||||
self.logger.info('needs fetch: %s', page)
|
||||
|
Loading…
x
Reference in New Issue
Block a user