From 0a5b511c572996fb09481e9da63e14f978752d17 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Wed, 19 Jan 2022 16:53:46 -0800 Subject: [PATCH] minimal yt-dlp updates --- brozzler/ydl.py | 12 ++++++------ setup.py | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/brozzler/ydl.py b/brozzler/ydl.py index 9c25981..c8a2326 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -1,7 +1,7 @@ ''' brozzler/ydl.py - youtube-dl support for brozzler -Copyright (C) 2020 Internet Archive +Copyright (C) 2022 Internet Archive Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ limitations under the License. ''' import logging -import youtube_dl +import yt_dlp as youtube_dl import brozzler import urllib.request import tempfile @@ -44,7 +44,7 @@ def _finish_frag_download(ffd_self, ctx): return result youtube_dl.downloader.fragment.FragmentFD._finish_frag_download = _finish_frag_download -_orig_webpage_read_content = youtube_dl.extractor.generic.GenericIE._webpage_read_content +_orig_webpage_read_content = youtube_dl.extractor.GenericIE._webpage_read_content def _webpage_read_content(self, *args, **kwargs): content = _orig_webpage_read_content(self, *args, **kwargs) if len(content) > 20000000: @@ -53,7 +53,7 @@ def _webpage_read_content(self, *args, **kwargs): 'too large (%s characters)', len(content)) return '' return content -youtube_dl.extractor.generic.GenericIE._webpage_read_content = _webpage_read_content +youtube_dl.extractor.GenericIE._webpage_read_content = _webpage_read_content class ExtraHeaderAdder(urllib.request.BaseHandler): def __init__(self, extra_headers): @@ -251,7 +251,6 @@ def _build_youtube_dl(worker, destdir, site): "outtmpl": "{}/ydl%(autonumber)s.out".format(destdir), "retries": 1, "nocheckcertificate": True, - "hls_prefer_native": True, "noplaylist": True, "noprogress": True, "nopart": True, @@ -329,7 +328,8 @@ def _try_youtube_dl(worker, ydl, site, page): # we do whatwg canonicalization here to avoid "" resulting in ProxyError # needs automated test - ie_result = ydl.extract_info(str(urlcanon.whatwg(page.url))) + # and yt-dlp needs sanitize_info for extract_info + ie_result = ydl.sanitize_info(ydl.extract_info(str(urlcanon.whatwg(page.url)))) _remember_videos(page, ydl.fetch_spy.fetches, ydl.stitch_ups) if worker._using_warcprox(site): info_json = json.dumps(ie_result, sort_keys=True, indent=4) diff --git a/setup.py b/setup.py index e105941..41c26d4 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ ''' setup.py - brozzler setup script -Copyright (C) 2014-2021 Internet Archive +Copyright (C) 2014-2022 Internet Archive Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -65,7 +65,7 @@ setuptools.setup( }, install_requires=[ 'PyYAML>=5.1', - 'youtube-dl>=2018.7.21', + 'yt_dlp', 'reppy==0.3.4', 'requests>=2.21', 'websocket-client>=0.39.0,<=0.48.0',