initial interface update

This commit is contained in:
Barbara Miller 2025-06-23 19:43:38 -07:00
parent 667feae559
commit f21d312ca9

View file

@ -421,41 +421,41 @@ def _try_youtube_dl(worker, ydl, site, page):
return ie_result return ie_result
def get_video_captures(site, source="youtube") -> List[str]: class VideoDataClient:
if not VIDEO_DATA_SOURCE: import psycopg
return [] from psycopg_pool import ConnectionPool
if VIDEO_DATA_SOURCE and VIDEO_DATA_SOURCE.startswith("postgresql"): def __init__(self, site=None):
import psycopg if VIDEO_DATA_SOURCE and VIDEO_DATA_SOURCE.startswith("postgresql"):
self.pool = ConnectionPool(VIDEO_DATA_SOURCE, min_size=1, max_size=9)
self.account_id = site.account_id if site.account_id else None
self.seed = site.metadata.ait_seed_id if site.metadata.ait_seed_id else None
account_id = site.account_id if site.account_id else None def get_video_captures_from_db(self, source="youtube") -> List[str]:
seed = site.metadata.ait_seed_id if site.metadata.ait_seed_id else None
if source == "youtube": if source == "youtube":
containing_page_url_pattern = "http://youtube.com/watch%" # yes, video data canonicalization uses "http" containing_page_url_pattern = "http://youtube.com/watch%" # yes, video data canonicalization uses "http"
# support other sources here # support other sources here
else: else:
containing_page_url_pattern = None containing_page_url_pattern = None
if account_id and seed and source: if self.account_id and self.seed and source:
pg_query = ( pg_query = (
"SELECT distinct(containing_page_url) from video where account_id = %s and seed = %s and containing_page_url like %s", "SELECT distinct(containing_page_url) from video where account_id = %s and seed = %s and containing_page_url like %s",
( (
account_id, self.account_id,
seed, self.seed,
containing_page_url_pattern, containing_page_url_pattern,
), ),
) )
elif seed and source: elif self.seed and source:
pg_query = ( pg_query = (
"SELECT containing_page_url from video where seed = %s and containing_page_url like %s", "SELECT containing_page_url from video where seed = %s and containing_page_url like %s",
(seed, containing_page_url_pattern), (self.seed, containing_page_url_pattern),
) )
else:
return [] with self.pool.connection() as conn:
with psycopg.connect(VIDEO_DATA_SOURCE) as conn:
with conn.cursor(row_factory=psycopg.rows.scalar_row) as cur: with conn.cursor(row_factory=psycopg.rows.scalar_row) as cur:
cur.execute(pg_query) cur.execute(pg_query)
return cur.fetchall() return cur.fetchall()
return []
@metrics.brozzler_ytdlp_duration_seconds.time() @metrics.brozzler_ytdlp_duration_seconds.time()
@ -485,7 +485,9 @@ def do_youtube_dl(worker, site, page, ytdlp_proxy_endpoints):
or ie_result.get("extractor") == "youtube:tab" or ie_result.get("extractor") == "youtube:tab"
): ):
captured_youtube_watch_pages = set() captured_youtube_watch_pages = set()
captured_youtube_watch_pages.add(get_video_captures(site, source="youtube")) captured_youtube_watch_pages.add(
VideoDataClient.get_video_captures(site, source="youtube")
)
uncaptured_youtube_watch_pages = [] uncaptured_youtube_watch_pages = []
for e in ie_result.get("entries_no_dl", []): for e in ie_result.get("entries_no_dl", []):
youtube_watch_url = f"https://www.youtube.com/watch?v={e['id']}" youtube_watch_url = f"https://www.youtube.com/watch?v={e['id']}"