From 69b8c6639cce17aa235e1f5eddc9f1b222533b9f Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Fri, 20 Oct 2023 14:41:05 -0700 Subject: [PATCH 1/2] import yt-dlp directly, skip overriding urlopen --- brozzler/ydl.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/brozzler/ydl.py b/brozzler/ydl.py index 65f1902..9329b8d 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -17,7 +17,7 @@ limitations under the License. ''' import logging -import yt_dlp as youtube_dl +import yt_dlp from yt_dlp.utils import match_filter_func import brozzler import urllib.request @@ -93,7 +93,7 @@ def final_bounces(fetches, url): def _build_youtube_dl(worker, destdir, site, page): ''' - Builds a yt-dlp `youtube_dl.YoutubeDL` for brozzling `site` with `worker`. + Builds a yt-dlp `yt_dlp.YoutubeDL` for brozzling `site` with `worker`. The `YoutubeDL` instance does a few special brozzler-specific things: @@ -108,20 +108,12 @@ def _build_youtube_dl(worker, destdir, site, page): site (brozzler.Site): the site we are brozzling Returns: - a yt-dlp `youtube_dl.YoutubeDL` instance + a yt-dlp `yt_dlp.YoutubeDL` instance ''' - class _YoutubeDL(youtube_dl.YoutubeDL): + class _YoutubeDL(yt_dlp.YoutubeDL): logger = logging.getLogger(__module__ + "." + __qualname__) - def urlopen(self, req): - try: - url = req.full_url - except AttributeError: - url = req - self.logger.debug('fetching %r', url) - return super().urlopen(req) - def add_default_extra_info(self, ie_result, ie, url): # hook in some logging super().add_default_extra_info(ie_result, ie, url) @@ -260,8 +252,8 @@ def _build_youtube_dl(worker, destdir, site, page): # this looked like a problem with nsf-mounted homedir, shouldn't be a problem for brozzler on focal? "cache_dir": "/home/archiveit", - "logger": logging.getLogger("youtube_dl"), - "verbose": True, + "logger": logging.getLogger("yt_dlp"), + "verbose": False, "quiet": False, } From 8a6f7418630b26eeaf5cc7d8fa5f814e96669fb4 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Mon, 23 Oct 2023 14:43:01 -0700 Subject: [PATCH 2/2] better format selection --- brozzler/ydl.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/brozzler/ydl.py b/brozzler/ydl.py index 9329b8d..e21e6b5 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -233,16 +233,10 @@ def _build_youtube_dl(worker, destdir, site, page): # https://github.com/yt-dlp/yt-dlp#format-selection # "By default, yt-dlp tries to download the best available quality..." - # https://github.com/yt-dlp/yt-dlp#sorting-formats - # "You can change the criteria for being considered the best by using -S (--format-sort)...." - # "vext: Video Extension (mp4 > webm > flv > other). If --prefer-free-formats is used, webm is preferred." - # "aext: Audio Extension (m4a > aac > mp3 > ogg > opus > webm > other)." - # "If --prefer-free-formats is used, the order changes to opus > ogg > webm > m4a > mp3 > aac." - # "ext: Equivalent to vext,aext" # pre-v.2023.07.06: "format_sort": ["ext"], - # pre-v.2023.07.06: "format": "b/bv+ba" # v.2023.07.06 https://www.reddit.com/r/youtubedl/wiki/h264/?rdt=63577 - "format_sort": ["codec:h264"], + "format": "b/bv+ba", + "format_sort": ["res:720,vcodec:h264,acodec:aac"], # skip live streams "match_filter": match_filter_func("!is_live"),