Merge branch 'ytdlp_last' into qa

This commit is contained in:
Barbara Miller 2024-04-10 13:42:27 -07:00
commit 4a5283944d
2 changed files with 7 additions and 7 deletions

View File

@ -255,6 +255,7 @@ class BrozzlerWorker:
if enable_youtube_dl and ydl.should_ytdlp(page, site):
try:
ydl_outlinks = ydl.do_youtube_dl(self, site, page)
outlinks.update(ydl_outlinks)
except brozzler.ReachedLimit as e:
raise
except brozzler.ShutdownRequested:
@ -278,8 +279,6 @@ class BrozzlerWorker:
self.logger.error(
"youtube_dl raised exception on %s", page, exc_info=True
)
outlinks.update(ydl_outlinks)
return outlinks
def _browse_page(self, browser, site, page, on_screenshot=None, on_request=None):

View File

@ -378,8 +378,9 @@ def _remember_videos(page, fetches, pushed_videos=None):
def _try_youtube_dl(worker, ydl, site, page):
ytdlp_url = page.redirect_url if page.redirect_url else page.url
try:
logging.info("trying yt-dlp on %s", page)
logging.info("trying yt-dlp on %s", ytdlp_url)
with brozzler.thread_accept_exceptions():
# we do whatwg canonicalization here to avoid "<urlopen error
@ -387,7 +388,7 @@ def _try_youtube_dl(worker, ydl, site, page):
# needs automated test
# and yt-dlp needs sanitize_info for extract_info
ie_result = ydl.sanitize_info(
ydl.extract_info(str(urlcanon.whatwg(page.url)))
ydl.extract_info(str(urlcanon.whatwg(ytdlp_url)))
)
_remember_videos(page, ydl.fetch_spy.fetches, ydl.pushed_videos)
if worker._using_warcprox(site):
@ -395,11 +396,11 @@ def _try_youtube_dl(worker, ydl, site, page):
logging.info(
"sending WARCPROX_WRITE_RECORD request to warcprox "
"with yt-dlp json for %s",
page,
ytdlp_url,
)
worker._warcprox_write_record(
warcprox_address=worker._proxy_for(site),
url="youtube-dl:%s" % str(urlcanon.semantic(page.url)),
url="youtube-dl:%s" % str(urlcanon.semantic(ytdlp_url)),
warc_type="metadata",
content_type="application/vnd.youtube-dl_formats+json;charset=utf-8",
payload=info_json.encode("utf-8"),
@ -425,7 +426,7 @@ def _try_youtube_dl(worker, ydl, site, page):
):
# connection problem when using a proxy == proxy error (XXX?)
raise brozzler.ProxyError(
"yt-dlp hit apparent proxy error from " "%s" % page.url
"yt-dlp hit apparent proxy error from " "%s" % ytdlp_url
) from e
else:
raise