add ytdlp_tmp, and more...

This commit is contained in:
Barbara Miller 2024-12-09 17:21:21 -08:00
parent 905c11d795
commit a49b978c60

View File

@ -33,9 +33,10 @@ import time
thread_local = threading.local() thread_local = threading.local()
YTDLP_PROXY = "" ytdlp_proxy = ""
MAX_YTDLP_ATTEMPTS = 4 ytdlp_tmp = "/tmp"
YTDLP_WAIT = 10 ytdlp_wait = 10
max_ytdlp_attempts = 4
def should_ytdlp(site, page, page_status, skip_av_seeds): def should_ytdlp(site, page, page_status, skip_av_seeds):
@ -284,11 +285,11 @@ def _build_youtube_dl(worker, destdir, site, page):
ytdlp_url = page.redirect_url if page.redirect_url else page.url ytdlp_url = page.redirect_url if page.redirect_url else page.url
is_youtube_host = isyoutubehost(ytdlp_url) is_youtube_host = isyoutubehost(ytdlp_url)
if is_youtube_host and YTDLP_PROXY: if is_youtube_host and ytdlp_proxy:
ydl_opts["proxy"] = YTDLP_PROXY ydl_opts["proxy"] = ytdlp_proxy
# don't log proxy value secrets # don't log proxy value secrets
ytdlp_proxy_for_logs = ( ytdlp_proxy_for_logs = (
YTDLP_PROXY.split("@")[1] if "@" in YTDLP_PROXY else "@@@" ytdlp_proxy.split("@")[1] if "@" in ytdlp_proxy else "@@@"
) )
logging.info("using yt-dlp proxy ... %s", ytdlp_proxy_for_logs) logging.info("using yt-dlp proxy ... %s", ytdlp_proxy_for_logs)
@ -326,7 +327,7 @@ def _remember_videos(page, pushed_videos=None):
def _try_youtube_dl(worker, ydl, site, page): def _try_youtube_dl(worker, ydl, site, page):
attempt = 0 attempt = 0
while attempt < MAX_YTDLP_ATTEMPTS: while attempt < max_ytdlp_attempts:
try: try:
logging.info("trying yt-dlp on %s", ydl.url) logging.info("trying yt-dlp on %s", ydl.url)
# should_download_vid = not ydl.is_youtube_host # should_download_vid = not ydl.is_youtube_host
@ -364,15 +365,15 @@ def _try_youtube_dl(worker, ydl, site, page):
# OSError('Tunnel connection failed: 464 Host Not Allowed') (caused by ProxyError...) # OSError('Tunnel connection failed: 464 Host Not Allowed') (caused by ProxyError...)
# and others... # and others...
attempt += 1 attempt += 1
if attempt == MAX_YTDLP_ATTEMPTS: if attempt == max_ytdlp_attempts:
logging.warning( logging.warning(
"Failed after %s attempts. Error: %s", MAX_YTDLP_ATTEMPTS, e "Failed after %s attempts. Error: %s", max_ytdlp_attempts, e
) )
raise brozzler.VideoExtractorError( raise brozzler.VideoExtractorError(
"yt-dlp hit error extracting info for %s" % ydl.url "yt-dlp hit error extracting info for %s" % ydl.url
) )
else: else:
retry_wait = min(60, YTDLP_WAIT * (1.5 ** (attempt - 1))) retry_wait = min(60, ytdlp_wait * (1.5 ** (attempt - 1)))
logging.info( logging.info(
"Attempt %s failed. Retrying in %s seconds...", "Attempt %s failed. Retrying in %s seconds...",
attempt, attempt,
@ -420,7 +421,7 @@ def do_youtube_dl(worker, site, page):
Returns: Returns:
`list` of `str`: outlink urls `list` of `str`: outlink urls
""" """
with tempfile.TemporaryDirectory(prefix="brzl-ydl-", dir="/tmp") as tempdir: with tempfile.TemporaryDirectory(prefix="brzl-ydl-", dir=ytdlp_tmp) as tempdir:
ydl = _build_youtube_dl(worker, tempdir, site, page) ydl = _build_youtube_dl(worker, tempdir, site, page)
ie_result = _try_youtube_dl(worker, ydl, site, page) ie_result = _try_youtube_dl(worker, ydl, site, page)
outlinks = set() outlinks = set()