From 2aa17886372c9fb1ff3488ad17ada00b962a6782 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Wed, 18 Sep 2024 15:58:19 -0700 Subject: [PATCH] mostly black'd --- brozzler/ydl.py | 51 +++++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/brozzler/ydl.py b/brozzler/ydl.py index 7368534..bc59ffd 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -34,7 +34,7 @@ import time thread_local = threading.local() PROXYRACK_PROXY = "@@@" -MAX_YTDLP_ATTEMPTS = 3 +MAX_YTDLP_ATTEMPTS = 4 YTDLP_WAIT = 10 @@ -184,28 +184,31 @@ def _build_youtube_dl(worker, destdir, site, page): worker._proxy_for(site), url, ) - with open(info_dict["filepath"], "rb") as f: - # include content-length header to avoid chunked - # transfer, which warcprox currently rejects - extra_headers = dict(site.extra_headers()) - extra_headers["content-length"] = size - request, response = worker._warcprox_write_record( - warcprox_address=worker._proxy_for(site), - url=url, - warc_type="resource", - content_type=mimetype, - payload=f, - extra_headers=extra_headers, + try: + with open(info_dict["filepath"], "rb") as f: + # include content-length header to avoid chunked + # transfer, which warcprox currently rejects + extra_headers = dict(site.extra_headers()) + extra_headers["content-length"] = size + request, response = worker._warcprox_write_record( + warcprox_address=worker._proxy_for(site), + url=url, + warc_type="resource", + content_type=mimetype, + payload=f, + extra_headers=extra_headers, + ) + # consulted by _remember_videos() + ydl.pushed_videos.append( + { + "url": url, + "response_code": response.code, + "content-type": mimetype, + "content-length": size, + } ) - # consulted by _remember_videos() - ydl.pushed_videos.append( - { - "url": url, - "response_code": response.code, - "content-type": mimetype, - "content-length": size, - } - ) + except: + traceback.print_exc() def maybe_heartbeat_site_last_claimed(*args, **kwargs): # in case yt-dlp takes a long time, heartbeat site.last_claimed @@ -306,7 +309,9 @@ def _remember_videos(page, pushed_videos=None): def _try_youtube_dl(worker, ydl, site, page): ytdlp_url = page.redirect_url if page.redirect_url else page.url - youtube_host = "youtube.com" in ytdlp_url.split("//")[-1].split("/")[0].split("?")[0] + youtube_host = ( + "youtube.com" in ytdlp_url.split("//")[-1].split("/")[0].split("?")[0] + ) attempt = 0 while attempt < MAX_YTDLP_ATTEMPTS: try: