check captured_youtube_watch_pages not None

This commit is contained in:
Barbara Miller 2025-07-28 15:41:07 -07:00
parent 4a73b1e8d8
commit de56dc3d13

View file

@ -635,27 +635,35 @@ def do_youtube_dl(worker, site, page, ytdlp_proxy_endpoints):
site["metadata"]["ait_seed_id"], site["metadata"]["ait_seed_id"],
) )
try: try:
captured_youtube_watch_pages = set() captured_youtube_watch_pages = (
captured_youtube_watch_pages.update(
worker._video_data.get_video_captures(site, source="youtube") worker._video_data.get_video_captures(site, source="youtube")
) )
uncaptured_youtube_watch_pages = [] if captured_youtube_watch_pages:
for e in ie_result.get("entries_no_dl", []): logger.info(
# note: http needed for match "found %s previously captured youtube watch pages for account %s, seed_id %s",
youtube_watch_url = str( len(captured_youtube_watch_pages),
urlcanon.aggressive( site["metadata"]["ait_account_id"],
f"http://www.youtube.com/watch?v={e['id']}" site["metadata"]["ait_seed_id"],
)
) )
if youtube_watch_url in captured_youtube_watch_pages: captured_watch_pages = set()
logger.info( captured_watch_pages.update(captured_youtube_watch_pages)
"skipping adding %s to yt-dlp outlinks", uncaptured_watch_pages = []
youtube_watch_url, for e in ie_result.get("entries_no_dl", []):
# note: http matches, not https
youtube_watch_url = str(
urlcanon.aggressive(
f"http://www.youtube.com/watch?v={e['id']}"
)
)
if youtube_watch_url in captured_watch_pages:
logger.info(
"skipping adding %s to yt-dlp outlinks",
youtube_watch_url,
)
continue
uncaptured_watch_pages.append(
f"https://www.youtube.com/watch?v={e['id']}"
) )
continue
uncaptured_youtube_watch_pages.append(
f"https://www.youtube.com/watch?v={e['id']}"
)
except Exception as e: except Exception as e:
logger.warning("hit exception processing worker._video_data: %s", e) logger.warning("hit exception processing worker._video_data: %s", e)
if uncaptured_youtube_watch_pages: if uncaptured_youtube_watch_pages: