From 7d58a9ae3b7708dc42c874a0ef72f9cec216e4fc Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Mon, 30 Jun 2025 14:38:21 -0700 Subject: [PATCH] keep it simple for now --- brozzler/ydl.py | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/brozzler/ydl.py b/brozzler/ydl.py index 423dfe8..a5541f0 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -72,13 +72,14 @@ class VideoDataClient: return cur.fetchone() if fetchall: return cur.fetchall() + return None def get_pg_video_captures(self, site=None, source=None) -> List[str]: account_id = site.account_id if site.account_id else None seed = site.metadata.ait_seed_id if site.metadata.ait_seed_id else None # TODO: generalize, maybe make variable? - containing_page_timestamp_pattern = "2025%" + containing_page_timestamp_pattern = "2025%" # for future pre-dup additions if source == "youtube": containing_page_url_pattern = "http://youtube.com/watch%" # yes, video data canonicalization uses "http" @@ -93,34 +94,18 @@ class VideoDataClient: containing_page_url_pattern, ), ) - elif account_id and seed: - pg_query = ( - "SELECT distinct(containing_page_url) from video where account_id = %s and seed = %s and containing_page_timestamp like %s", - ( - account_id, - seed, - containing_page_timestamp_pattern, - ), - ) - elif seed and source: + elif seed and source: # account_id should usually be present pg_query = ( "SELECT distinct(containing_page_url) from video where seed = %s and containing_page_url like %s", (seed, containing_page_url_pattern), ) - elif seed: - pg_query = ( - "SELECT distinct(containing_page_url) from video where seed = %s and containing_page_timestamp like %s", - ( - seed, - containing_page_timestamp_pattern, - ), - ) try: results = self._execute_query( pg_query, row_factory=psycopg.rows.scalar_row, fetchall=True ) except Exception as e: logger.warn("postgres query failed: %s", e) + results = [] return results