minimal yt-dlp updates

This commit is contained in:
Barbara Miller 2022-01-19 16:53:46 -08:00
parent 040a942ef2
commit 0a5b511c57
2 changed files with 8 additions and 8 deletions

View File

@ -1,7 +1,7 @@
'''
brozzler/ydl.py - youtube-dl support for brozzler
Copyright (C) 2020 Internet Archive
Copyright (C) 2022 Internet Archive
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -17,7 +17,7 @@ limitations under the License.
'''
import logging
import youtube_dl
import yt_dlp as youtube_dl
import brozzler
import urllib.request
import tempfile
@ -44,7 +44,7 @@ def _finish_frag_download(ffd_self, ctx):
return result
youtube_dl.downloader.fragment.FragmentFD._finish_frag_download = _finish_frag_download
_orig_webpage_read_content = youtube_dl.extractor.generic.GenericIE._webpage_read_content
_orig_webpage_read_content = youtube_dl.extractor.GenericIE._webpage_read_content
def _webpage_read_content(self, *args, **kwargs):
content = _orig_webpage_read_content(self, *args, **kwargs)
if len(content) > 20000000:
@ -53,7 +53,7 @@ def _webpage_read_content(self, *args, **kwargs):
'too large (%s characters)', len(content))
return ''
return content
youtube_dl.extractor.generic.GenericIE._webpage_read_content = _webpage_read_content
youtube_dl.extractor.GenericIE._webpage_read_content = _webpage_read_content
class ExtraHeaderAdder(urllib.request.BaseHandler):
def __init__(self, extra_headers):
@ -251,7 +251,6 @@ def _build_youtube_dl(worker, destdir, site):
"outtmpl": "{}/ydl%(autonumber)s.out".format(destdir),
"retries": 1,
"nocheckcertificate": True,
"hls_prefer_native": True,
"noplaylist": True,
"noprogress": True,
"nopart": True,
@ -329,7 +328,8 @@ def _try_youtube_dl(worker, ydl, site, page):
# we do whatwg canonicalization here to avoid "<urlopen error
# no host given>" resulting in ProxyError
# needs automated test
ie_result = ydl.extract_info(str(urlcanon.whatwg(page.url)))
# and yt-dlp needs sanitize_info for extract_info
ie_result = ydl.sanitize_info(ydl.extract_info(str(urlcanon.whatwg(page.url))))
_remember_videos(page, ydl.fetch_spy.fetches, ydl.stitch_ups)
if worker._using_warcprox(site):
info_json = json.dumps(ie_result, sort_keys=True, indent=4)

View File

@ -2,7 +2,7 @@
'''
setup.py - brozzler setup script
Copyright (C) 2014-2021 Internet Archive
Copyright (C) 2014-2022 Internet Archive
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -65,7 +65,7 @@ setuptools.setup(
},
install_requires=[
'PyYAML>=5.1',
'youtube-dl>=2018.7.21',
'yt_dlp',
'reppy==0.3.4',
'requests>=2.21',
'websocket-client>=0.39.0,<=0.48.0',