From d67a05572d64fa4bcb44e55c74afe350d2b7980e Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Mon, 21 Mar 2022 13:28:08 -0700 Subject: [PATCH 1/3] prefer video+audio files, debug postprocessor hook --- brozzler/ydl.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/brozzler/ydl.py b/brozzler/ydl.py index 6c3cf61..c8e56db 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -224,7 +224,8 @@ def _build_youtube_dl(worker, destdir, site): def ydl_postprocess_hook(d): if d['status'] == 'finished': print('[ydl_postprocess_hook] Done postprocessing') - if worker._using_warcprox(site): + print('[ydl_postprocess_hook] {}'.format(d['postprocessor'])) + if d['postprocessor'] == 'ffmpeg' and worker._using_warcprox(site): _YoutubeDL._push_stitched_up_vid_to_warcprox(_YoutubeDL, site, d['info_dict']) ydl_opts = { @@ -247,6 +248,7 @@ def _build_youtube_dl(worker, destdir, site): # "If --prefer-free-formats is used, the order changes to opus > ogg > webm > m4a > mp3 > aac." # "ext: Equivalent to vext,aext" "format_sort": ["ext"], + "format": "b/bv+ba", # --cache-dir local or... "cache_dir": False, From c52b4af608bc51ecc530310c12b23e023e49cb85 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Mon, 21 Mar 2022 20:26:20 -0700 Subject: [PATCH 2/3] vimeo/M3u8 handling, better logging --- brozzler/ydl.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/brozzler/ydl.py b/brozzler/ydl.py index c8e56db..2a9fac8 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -223,9 +223,9 @@ def _build_youtube_dl(worker, destdir, site): def ydl_postprocess_hook(d): if d['status'] == 'finished': - print('[ydl_postprocess_hook] Done postprocessing') - print('[ydl_postprocess_hook] {}'.format(d['postprocessor'])) - if d['postprocessor'] == 'ffmpeg' and worker._using_warcprox(site): + worker.logger.info('[ydl_postprocess_hook] Finished postprocessing') + worker.logger.info('[ydl_postprocess_hook] postprocessor: {}'.format(d['postprocessor'])) + if d['postprocessor'] == 'FixupM3u8' and worker._using_warcprox(site): _YoutubeDL._push_stitched_up_vid_to_warcprox(_YoutubeDL, site, d['info_dict']) ydl_opts = { @@ -260,6 +260,9 @@ def _build_youtube_dl(worker, destdir, site): } if worker._proxy_for(site): ydl_opts["proxy"] = "http://{}".format(worker._proxy_for(site)) + if 'vimeo.com' in site.seed: + ydl_opts["format"] = "bv*+ba/b" + worker.logger.info("setting yt-dlp format to 'bv*+ba/b' for vimeo.com") ydl = _YoutubeDL(ydl_opts) if site.extra_headers(): ydl._opener.add_handler(ExtraHeaderAdder(site.extra_headers())) From d5e41bf9efdeb087873aa16a1becb0f050375287 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Tue, 22 Mar 2022 10:00:18 -0700 Subject: [PATCH 3/3] skip vimeo special case --- brozzler/ydl.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/brozzler/ydl.py b/brozzler/ydl.py index 2a9fac8..166172f 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -260,9 +260,6 @@ def _build_youtube_dl(worker, destdir, site): } if worker._proxy_for(site): ydl_opts["proxy"] = "http://{}".format(worker._proxy_for(site)) - if 'vimeo.com' in site.seed: - ydl_opts["format"] = "bv*+ba/b" - worker.logger.info("setting yt-dlp format to 'bv*+ba/b' for vimeo.com") ydl = _YoutubeDL(ydl_opts) if site.extra_headers(): ydl._opener.add_handler(ExtraHeaderAdder(site.extra_headers()))