mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-16 10:10:42 -04:00
Merge branch 'adam/patch-yt-dlp-infinite-loop-bug' into qa
This commit is contained in:
commit
15d448857f
1 changed files with 16 additions and 35 deletions
|
@ -117,44 +117,25 @@ def _build_youtube_dl(worker, destdir, site, page):
|
||||||
a yt-dlp `yt_dlp.YoutubeDL` instance
|
a yt-dlp `yt_dlp.YoutubeDL` instance
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Custom GenericIE to handle redirect loops with shared state
|
|
||||||
class CustomGenericIE(yt_dlp.extractor.generic.GenericIE):
|
|
||||||
"""Custom Generic Information Extractor to detect redirect loops."""
|
|
||||||
|
|
||||||
logger = logging.getLogger(__module__ + "." + __qualname__)
|
|
||||||
visited_redirect_urls = set()
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
# self.visited_redirect_urls.clear()
|
|
||||||
self.logger.info(f"[CustomGenericIE] Extracting URL: {url}")
|
|
||||||
return super()._real_extract(url)
|
|
||||||
|
|
||||||
def report_following_redirect(self, new_url):
|
|
||||||
self.logger.info(
|
|
||||||
f"[CustomGenericIE] Following redirect URL: {new_url} "
|
|
||||||
f"redirect_count: {len(self.visited_redirect_urls)}"
|
|
||||||
)
|
|
||||||
if new_url in self.visited_redirect_urls:
|
|
||||||
raise ExtractorError(
|
|
||||||
f"Redirect loop detected for URL: {new_url}",
|
|
||||||
expected=True,
|
|
||||||
)
|
|
||||||
if len(self.visited_redirect_urls) > YTDLP_MAX_REDIRECTS:
|
|
||||||
raise ExtractorError(
|
|
||||||
f"Too many redirects for URL: {new_url}",
|
|
||||||
expected=True,
|
|
||||||
)
|
|
||||||
self.visited_redirect_urls.add(new_url)
|
|
||||||
return super().report_following_redirect(new_url)
|
|
||||||
|
|
||||||
yt_dlp.extractor.generic.GenericIE = CustomGenericIE
|
|
||||||
|
|
||||||
class _YoutubeDL(yt_dlp.YoutubeDL):
|
class _YoutubeDL(yt_dlp.YoutubeDL):
|
||||||
logger = logging.getLogger(__module__ + "." + __qualname__)
|
logger = logging.getLogger(__module__ + "." + __qualname__)
|
||||||
|
|
||||||
|
def process_ie_result(self, ie_result, download=True, extra_info=None):
|
||||||
|
if extra_info is None:
|
||||||
|
extra_info = {}
|
||||||
|
if "redirect_count" in extra_info:
|
||||||
|
self.logger.info(
|
||||||
|
f"Following redirect URL: {ie_result['url']} redirect_count: {extra_info['redirect_count']}"
|
||||||
|
)
|
||||||
|
extra_info["redirect_count"] = 1 + extra_info.get("redirect_count", 0)
|
||||||
|
if extra_info["redirect_count"] > YTDLP_MAX_REDIRECTS:
|
||||||
|
raise ExtractorError(
|
||||||
|
f"Too many redirects for URL: {ie_result['url']}",
|
||||||
|
expected=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
super().process_ie_result(ie_result, download, extra_info)
|
||||||
|
|
||||||
def add_default_extra_info(self, ie_result, ie, url):
|
def add_default_extra_info(self, ie_result, ie, url):
|
||||||
# hook in some logging
|
# hook in some logging
|
||||||
super().add_default_extra_info(ie_result, ie, url)
|
super().add_default_extra_info(ie_result, ie, url)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue