mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-07-26 00:05:42 -04:00
Merge branch 'ytdlp_last' into qa
This commit is contained in:
commit
4a5283944d
2 changed files with 7 additions and 7 deletions
|
@ -255,6 +255,7 @@ class BrozzlerWorker:
|
||||||
if enable_youtube_dl and ydl.should_ytdlp(page, site):
|
if enable_youtube_dl and ydl.should_ytdlp(page, site):
|
||||||
try:
|
try:
|
||||||
ydl_outlinks = ydl.do_youtube_dl(self, site, page)
|
ydl_outlinks = ydl.do_youtube_dl(self, site, page)
|
||||||
|
outlinks.update(ydl_outlinks)
|
||||||
except brozzler.ReachedLimit as e:
|
except brozzler.ReachedLimit as e:
|
||||||
raise
|
raise
|
||||||
except brozzler.ShutdownRequested:
|
except brozzler.ShutdownRequested:
|
||||||
|
@ -278,8 +279,6 @@ class BrozzlerWorker:
|
||||||
self.logger.error(
|
self.logger.error(
|
||||||
"youtube_dl raised exception on %s", page, exc_info=True
|
"youtube_dl raised exception on %s", page, exc_info=True
|
||||||
)
|
)
|
||||||
|
|
||||||
outlinks.update(ydl_outlinks)
|
|
||||||
return outlinks
|
return outlinks
|
||||||
|
|
||||||
def _browse_page(self, browser, site, page, on_screenshot=None, on_request=None):
|
def _browse_page(self, browser, site, page, on_screenshot=None, on_request=None):
|
||||||
|
|
|
@ -378,8 +378,9 @@ def _remember_videos(page, fetches, pushed_videos=None):
|
||||||
|
|
||||||
|
|
||||||
def _try_youtube_dl(worker, ydl, site, page):
|
def _try_youtube_dl(worker, ydl, site, page):
|
||||||
|
ytdlp_url = page.redirect_url if page.redirect_url else page.url
|
||||||
try:
|
try:
|
||||||
logging.info("trying yt-dlp on %s", page)
|
logging.info("trying yt-dlp on %s", ytdlp_url)
|
||||||
|
|
||||||
with brozzler.thread_accept_exceptions():
|
with brozzler.thread_accept_exceptions():
|
||||||
# we do whatwg canonicalization here to avoid "<urlopen error
|
# we do whatwg canonicalization here to avoid "<urlopen error
|
||||||
|
@ -387,7 +388,7 @@ def _try_youtube_dl(worker, ydl, site, page):
|
||||||
# needs automated test
|
# needs automated test
|
||||||
# and yt-dlp needs sanitize_info for extract_info
|
# and yt-dlp needs sanitize_info for extract_info
|
||||||
ie_result = ydl.sanitize_info(
|
ie_result = ydl.sanitize_info(
|
||||||
ydl.extract_info(str(urlcanon.whatwg(page.url)))
|
ydl.extract_info(str(urlcanon.whatwg(ytdlp_url)))
|
||||||
)
|
)
|
||||||
_remember_videos(page, ydl.fetch_spy.fetches, ydl.pushed_videos)
|
_remember_videos(page, ydl.fetch_spy.fetches, ydl.pushed_videos)
|
||||||
if worker._using_warcprox(site):
|
if worker._using_warcprox(site):
|
||||||
|
@ -395,11 +396,11 @@ def _try_youtube_dl(worker, ydl, site, page):
|
||||||
logging.info(
|
logging.info(
|
||||||
"sending WARCPROX_WRITE_RECORD request to warcprox "
|
"sending WARCPROX_WRITE_RECORD request to warcprox "
|
||||||
"with yt-dlp json for %s",
|
"with yt-dlp json for %s",
|
||||||
page,
|
ytdlp_url,
|
||||||
)
|
)
|
||||||
worker._warcprox_write_record(
|
worker._warcprox_write_record(
|
||||||
warcprox_address=worker._proxy_for(site),
|
warcprox_address=worker._proxy_for(site),
|
||||||
url="youtube-dl:%s" % str(urlcanon.semantic(page.url)),
|
url="youtube-dl:%s" % str(urlcanon.semantic(ytdlp_url)),
|
||||||
warc_type="metadata",
|
warc_type="metadata",
|
||||||
content_type="application/vnd.youtube-dl_formats+json;charset=utf-8",
|
content_type="application/vnd.youtube-dl_formats+json;charset=utf-8",
|
||||||
payload=info_json.encode("utf-8"),
|
payload=info_json.encode("utf-8"),
|
||||||
|
@ -425,7 +426,7 @@ def _try_youtube_dl(worker, ydl, site, page):
|
||||||
):
|
):
|
||||||
# connection problem when using a proxy == proxy error (XXX?)
|
# connection problem when using a proxy == proxy error (XXX?)
|
||||||
raise brozzler.ProxyError(
|
raise brozzler.ProxyError(
|
||||||
"yt-dlp hit apparent proxy error from " "%s" % page.url
|
"yt-dlp hit apparent proxy error from " "%s" % ytdlp_url
|
||||||
) from e
|
) from e
|
||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue