mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-23 08:09:48 -05:00
Merge pull request #312 from internetarchive/adam/patch-yt-dlp-infinite-loop-bug
feat: override yt-dlp generic extractor to add redirect loop detectio…
This commit is contained in:
commit
1e30b4f478
@ -18,7 +18,7 @@ limitations under the License.
|
||||
|
||||
import logging
|
||||
import yt_dlp
|
||||
from yt_dlp.utils import match_filter_func
|
||||
from yt_dlp.utils import match_filter_func, ExtractorError
|
||||
import brozzler
|
||||
import urllib.request
|
||||
import tempfile
|
||||
@ -37,6 +37,7 @@ thread_local = threading.local()
|
||||
YTDLP_PROXY = ""
|
||||
PROXY_ATTEMPTS = 4
|
||||
YTDLP_WAIT = 10
|
||||
YTDLP_MAX_REDIRECTS = 5
|
||||
|
||||
|
||||
def should_ytdlp(site, page, page_status, skip_av_seeds):
|
||||
@ -113,6 +114,28 @@ def _build_youtube_dl(worker, destdir, site, page):
|
||||
class _YoutubeDL(yt_dlp.YoutubeDL):
|
||||
logger = logging.getLogger(__module__ + "." + __qualname__)
|
||||
|
||||
def process_ie_result(self, ie_result, download=True, extra_info=None):
|
||||
if extra_info is None:
|
||||
extra_info = {}
|
||||
result_type = ie_result.get("_type", "video")
|
||||
|
||||
if result_type in ("url", "url_transparent"):
|
||||
if "extraction_depth" in extra_info:
|
||||
self.logger.info(
|
||||
f"Following redirect URL: {ie_result['url']} extraction_depth: {extra_info['extraction_depth']}"
|
||||
)
|
||||
extra_info["extraction_depth"] = 1 + extra_info.get(
|
||||
"extraction_depth", 0
|
||||
)
|
||||
else:
|
||||
extra_info["extraction_depth"] = 0
|
||||
if extra_info["extraction_depth"] >= YTDLP_MAX_REDIRECTS:
|
||||
raise ExtractorError(
|
||||
f"Too many hops for URL: {ie_result['url']}",
|
||||
expected=True,
|
||||
)
|
||||
return super().process_ie_result(ie_result, download, extra_info)
|
||||
|
||||
def add_default_extra_info(self, ie_result, ie, url):
|
||||
# hook in some logging
|
||||
super().add_default_extra_info(ie_result, ie, url)
|
||||
@ -361,6 +384,10 @@ def _try_youtube_dl(worker, ydl, site, page):
|
||||
and e.exc_info[1].code == 420
|
||||
):
|
||||
raise brozzler.ReachedLimit(e.exc_info[1])
|
||||
elif isinstance(e, yt_dlp.utils.DownloadError) and (
|
||||
"Redirect loop detected" in e.msg or "Too many redirects" in e.msg
|
||||
):
|
||||
raise brozzler.VideoExtractorError(e.msg)
|
||||
else:
|
||||
# todo: other errors to handle separately?
|
||||
# OSError('Tunnel connection failed: 464 Host Not Allowed') (caused by ProxyError...)
|
||||
|
Loading…
x
Reference in New Issue
Block a user