mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-10 15:30:18 -04:00
check captured_youtube_watch_pages not None
This commit is contained in:
parent
4a73b1e8d8
commit
de56dc3d13
1 changed files with 25 additions and 17 deletions
|
@ -635,27 +635,35 @@ def do_youtube_dl(worker, site, page, ytdlp_proxy_endpoints):
|
||||||
site["metadata"]["ait_seed_id"],
|
site["metadata"]["ait_seed_id"],
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
captured_youtube_watch_pages = set()
|
captured_youtube_watch_pages = (
|
||||||
captured_youtube_watch_pages.update(
|
|
||||||
worker._video_data.get_video_captures(site, source="youtube")
|
worker._video_data.get_video_captures(site, source="youtube")
|
||||||
)
|
)
|
||||||
uncaptured_youtube_watch_pages = []
|
if captured_youtube_watch_pages:
|
||||||
for e in ie_result.get("entries_no_dl", []):
|
logger.info(
|
||||||
# note: http needed for match
|
"found %s previously captured youtube watch pages for account %s, seed_id %s",
|
||||||
youtube_watch_url = str(
|
len(captured_youtube_watch_pages),
|
||||||
urlcanon.aggressive(
|
site["metadata"]["ait_account_id"],
|
||||||
f"http://www.youtube.com/watch?v={e['id']}"
|
site["metadata"]["ait_seed_id"],
|
||||||
)
|
|
||||||
)
|
)
|
||||||
if youtube_watch_url in captured_youtube_watch_pages:
|
captured_watch_pages = set()
|
||||||
logger.info(
|
captured_watch_pages.update(captured_youtube_watch_pages)
|
||||||
"skipping adding %s to yt-dlp outlinks",
|
uncaptured_watch_pages = []
|
||||||
youtube_watch_url,
|
for e in ie_result.get("entries_no_dl", []):
|
||||||
|
# note: http matches, not https
|
||||||
|
youtube_watch_url = str(
|
||||||
|
urlcanon.aggressive(
|
||||||
|
f"http://www.youtube.com/watch?v={e['id']}"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if youtube_watch_url in captured_watch_pages:
|
||||||
|
logger.info(
|
||||||
|
"skipping adding %s to yt-dlp outlinks",
|
||||||
|
youtube_watch_url,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
uncaptured_watch_pages.append(
|
||||||
|
f"https://www.youtube.com/watch?v={e['id']}"
|
||||||
)
|
)
|
||||||
continue
|
|
||||||
uncaptured_youtube_watch_pages.append(
|
|
||||||
f"https://www.youtube.com/watch?v={e['id']}"
|
|
||||||
)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("hit exception processing worker._video_data: %s", e)
|
logger.warning("hit exception processing worker._video_data: %s", e)
|
||||||
if uncaptured_youtube_watch_pages:
|
if uncaptured_youtube_watch_pages:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue