From b04fe476f7dac2f783c7dd02ed4cb4392b1c73d4 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Mon, 9 Dec 2024 17:25:39 -0800 Subject: [PATCH 1/9] skip caps for non-constants --- brozzler/ydl.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/brozzler/ydl.py b/brozzler/ydl.py index 8955c8e..c66c50c 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -33,10 +33,11 @@ import time thread_local = threading.local() -YTDLP_PROXY = "" -MAX_YTDLP_ATTEMPTS = 1 -YTDLP_WAIT = 10 +ytdlp_proxy = "" +ytdlp_tmp = "/tmp" +ytdlp_wait = 10 +max_ytdlp_attempts = 1 def should_ytdlp(site, page, page_status, skip_av_seeds): # called only after we've passed needs_browsing() check @@ -284,12 +285,12 @@ def _build_youtube_dl(worker, destdir, site, page): ytdlp_url = page.redirect_url if page.redirect_url else page.url is_youtube_host = isyoutubehost(ytdlp_url) - if is_youtube_host and YTDLP_PROXY: - MAX_YTDLP_ATTEMPTS = 4 - ydl_opts["proxy"] = YTDLP_PROXY + if is_youtube_host and ytdlp_proxy: + max_ytdlp_attempts = 4 + ydl_opts["proxy"] = ytdlp_proxy # don't log proxy value secrets ytdlp_proxy_for_logs = ( - YTDLP_PROXY.split("@")[1] if "@" in YTDLP_PROXY else "@@@" + ytdlp_proxy.split("@")[1] if "@" in ytdlp_proxy else "@@@" ) logging.info("using yt-dlp proxy ... %s", ytdlp_proxy_for_logs) @@ -327,7 +328,7 @@ def _remember_videos(page, pushed_videos=None): def _try_youtube_dl(worker, ydl, site, page): attempt = 0 - while attempt < MAX_YTDLP_ATTEMPTS: + while attempt < max_ytdlp_attempts: try: logging.info("trying yt-dlp on %s", ydl.url) # should_download_vid = not ydl.is_youtube_host @@ -365,15 +366,15 @@ def _try_youtube_dl(worker, ydl, site, page): # OSError('Tunnel connection failed: 464 Host Not Allowed') (caused by ProxyError...) # and others... attempt += 1 - if attempt == MAX_YTDLP_ATTEMPTS: + if attempt == max_ytdlp_attempts: logging.warning( - "Failed after %s attempts. Error: %s", MAX_YTDLP_ATTEMPTS, e + "Failed after %s attempts. Error: %s", max_ytdlp_attempts, e ) raise brozzler.VideoExtractorError( "yt-dlp hit error extracting info for %s" % ydl.url ) else: - retry_wait = min(60, YTDLP_WAIT * (1.5 ** (attempt - 1))) + retry_wait = min(60, ytdlp_wait * (1.5 ** (attempt - 1))) logging.info( "Attempt %s failed. Retrying in %s seconds...", attempt, From 1af418c192054fcb7813c9e48f76866bc4531d1d Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Mon, 9 Dec 2024 17:43:37 -0800 Subject: [PATCH 2/9] black'd --- brozzler/ydl.py | 1 + 1 file changed, 1 insertion(+) diff --git a/brozzler/ydl.py b/brozzler/ydl.py index c66c50c..38dfcd3 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -39,6 +39,7 @@ ytdlp_tmp = "/tmp" ytdlp_wait = 10 max_ytdlp_attempts = 1 + def should_ytdlp(site, page, page_status, skip_av_seeds): # called only after we've passed needs_browsing() check From 36d6a1e5abee33bc6806a64bb311a9a5ca7d6109 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Mon, 9 Dec 2024 18:45:25 -0800 Subject: [PATCH 3/9] define MAX_YTDLP_ATTEMPTS at head of file --- brozzler/ydl.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/brozzler/ydl.py b/brozzler/ydl.py index 38dfcd3..211676b 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -34,10 +34,10 @@ import time thread_local = threading.local() -ytdlp_proxy = "" -ytdlp_tmp = "/tmp" -ytdlp_wait = 10 -max_ytdlp_attempts = 1 +YTDLP_PROXY = "" +YTDLP_TMP = "/tmp" +YTDLP_WAIT = 10 +MAX_YTDLP_ATTEMPTS = 4 if YTDLP_PROXY else 1 def should_ytdlp(site, page, page_status, skip_av_seeds): @@ -286,12 +286,11 @@ def _build_youtube_dl(worker, destdir, site, page): ytdlp_url = page.redirect_url if page.redirect_url else page.url is_youtube_host = isyoutubehost(ytdlp_url) - if is_youtube_host and ytdlp_proxy: - max_ytdlp_attempts = 4 - ydl_opts["proxy"] = ytdlp_proxy + if is_youtube_host and YTDLP_PROXY: + ydl_opts["proxy"] = YTDLP_PROXY # don't log proxy value secrets ytdlp_proxy_for_logs = ( - ytdlp_proxy.split("@")[1] if "@" in ytdlp_proxy else "@@@" + YTDLP_PROXY.split("@")[1] if "@" in YTDLP_PROXY else "@@@" ) logging.info("using yt-dlp proxy ... %s", ytdlp_proxy_for_logs) @@ -329,7 +328,7 @@ def _remember_videos(page, pushed_videos=None): def _try_youtube_dl(worker, ydl, site, page): attempt = 0 - while attempt < max_ytdlp_attempts: + while attempt < MAX_YTDLP_ATTEMPTS: try: logging.info("trying yt-dlp on %s", ydl.url) # should_download_vid = not ydl.is_youtube_host @@ -367,15 +366,15 @@ def _try_youtube_dl(worker, ydl, site, page): # OSError('Tunnel connection failed: 464 Host Not Allowed') (caused by ProxyError...) # and others... attempt += 1 - if attempt == max_ytdlp_attempts: + if attempt == MAX_YTDLP_ATTEMPTS: logging.warning( - "Failed after %s attempts. Error: %s", max_ytdlp_attempts, e + "Failed after %s attempts. Error: %s", MAX_YTDLP_ATTEMPTS, e ) raise brozzler.VideoExtractorError( "yt-dlp hit error extracting info for %s" % ydl.url ) else: - retry_wait = min(60, ytdlp_wait * (1.5 ** (attempt - 1))) + retry_wait = min(60, YTDLP_WAIT * (1.5 ** (attempt - 1))) logging.info( "Attempt %s failed. Retrying in %s seconds...", attempt, @@ -423,7 +422,7 @@ def do_youtube_dl(worker, site, page): Returns: `list` of `str`: outlink urls """ - with tempfile.TemporaryDirectory(prefix="brzl-ydl-") as tempdir: + with tempfile.TemporaryDirectory(prefix="brzl-ydl-", dir=YTDLP_TMP) as tempdir: ydl = _build_youtube_dl(worker, tempdir, site, page) ie_result = _try_youtube_dl(worker, ydl, site, page) outlinks = set() From 8747bf9fd8ddb87a072f3ded8aea92329aa81cfa Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Mon, 9 Dec 2024 20:38:23 -0800 Subject: [PATCH 4/9] local var max_attempts --- brozzler/ydl.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/brozzler/ydl.py b/brozzler/ydl.py index 211676b..35c509f 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -37,7 +37,6 @@ thread_local = threading.local() YTDLP_PROXY = "" YTDLP_TMP = "/tmp" YTDLP_WAIT = 10 -MAX_YTDLP_ATTEMPTS = 4 if YTDLP_PROXY else 1 def should_ytdlp(site, page, page_status, skip_av_seeds): @@ -327,8 +326,9 @@ def _remember_videos(page, pushed_videos=None): def _try_youtube_dl(worker, ydl, site, page): + max_attempts = 4 if YTDLP_PROXY else 1 attempt = 0 - while attempt < MAX_YTDLP_ATTEMPTS: + while attempt < max_attempts: try: logging.info("trying yt-dlp on %s", ydl.url) # should_download_vid = not ydl.is_youtube_host @@ -366,9 +366,9 @@ def _try_youtube_dl(worker, ydl, site, page): # OSError('Tunnel connection failed: 464 Host Not Allowed') (caused by ProxyError...) # and others... attempt += 1 - if attempt == MAX_YTDLP_ATTEMPTS: + if attempt == max_attempts: logging.warning( - "Failed after %s attempts. Error: %s", MAX_YTDLP_ATTEMPTS, e + "Failed after %s attempts. Error: %s", max_attempts, e ) raise brozzler.VideoExtractorError( "yt-dlp hit error extracting info for %s" % ydl.url From 47d59970a7ddf922e9a2efe9a1e2b52472e36040 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Mon, 9 Dec 2024 20:41:44 -0800 Subject: [PATCH 5/9] ... if isyoutubehost --- brozzler/ydl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/brozzler/ydl.py b/brozzler/ydl.py index 35c509f..7a86fb8 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -326,7 +326,7 @@ def _remember_videos(page, pushed_videos=None): def _try_youtube_dl(worker, ydl, site, page): - max_attempts = 4 if YTDLP_PROXY else 1 + max_attempts = 4 if isyoutubehost(ydl.url) else 1 attempt = 0 while attempt < max_attempts: try: From a390c3fcac75d1d26686e9269d88ba9ae082749d Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Mon, 9 Dec 2024 20:48:30 -0800 Subject: [PATCH 6/9] if ydl.isyoutubehost --- brozzler/ydl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/brozzler/ydl.py b/brozzler/ydl.py index 7a86fb8..0a2037a 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -326,7 +326,7 @@ def _remember_videos(page, pushed_videos=None): def _try_youtube_dl(worker, ydl, site, page): - max_attempts = 4 if isyoutubehost(ydl.url) else 1 + max_attempts = 4 if ydl.isyoutubehost else 1 attempt = 0 while attempt < max_attempts: try: From d235b02abdf40de45e66796f635b6e79fc45aebb Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Tue, 10 Dec 2024 09:40:24 -0800 Subject: [PATCH 7/9] PROXY_ATTEMPTS constant --- brozzler/ydl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/brozzler/ydl.py b/brozzler/ydl.py index 0a2037a..96cbbc3 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -35,7 +35,7 @@ thread_local = threading.local() YTDLP_PROXY = "" -YTDLP_TMP = "/tmp" +PROXY_ATTEMPTS = 4 YTDLP_WAIT = 10 @@ -326,7 +326,7 @@ def _remember_videos(page, pushed_videos=None): def _try_youtube_dl(worker, ydl, site, page): - max_attempts = 4 if ydl.isyoutubehost else 1 + max_attempts = PROXY_ATTEMPTS if ydl.isyoutubehost else 1 attempt = 0 while attempt < max_attempts: try: From d8ebf2824ee6383577d70037701b54bf8e02fe06 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Thu, 12 Dec 2024 12:42:12 -0800 Subject: [PATCH 8/9] minor edits --- brozzler/ydl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/brozzler/ydl.py b/brozzler/ydl.py index 96cbbc3..a6190a1 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -368,7 +368,7 @@ def _try_youtube_dl(worker, ydl, site, page): attempt += 1 if attempt == max_attempts: logging.warning( - "Failed after %s attempts. Error: %s", max_attempts, e + "Failed after %s attempt(s). Error: %s", max_attempts, e ) raise brozzler.VideoExtractorError( "yt-dlp hit error extracting info for %s" % ydl.url @@ -422,7 +422,7 @@ def do_youtube_dl(worker, site, page): Returns: `list` of `str`: outlink urls """ - with tempfile.TemporaryDirectory(prefix="brzl-ydl-", dir=YTDLP_TMP) as tempdir: + with tempfile.TemporaryDirectory(prefix="brzl-ydl-") as tempdir: ydl = _build_youtube_dl(worker, tempdir, site, page) ie_result = _try_youtube_dl(worker, ydl, site, page) outlinks = set() From 5f82b8871dcb3de646c3c1eedeb05c5de1720c08 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Thu, 12 Dec 2024 12:54:57 -0800 Subject: [PATCH 9/9] if ydl.is_youtube_host --- brozzler/ydl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/brozzler/ydl.py b/brozzler/ydl.py index a6190a1..68fe133 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -326,7 +326,7 @@ def _remember_videos(page, pushed_videos=None): def _try_youtube_dl(worker, ydl, site, page): - max_attempts = PROXY_ATTEMPTS if ydl.isyoutubehost else 1 + max_attempts = PROXY_ATTEMPTS if ydl.is_youtube_host else 1 attempt = 0 while attempt < max_attempts: try: