keep it simple for now

This commit is contained in:
Barbara Miller 2025-06-30 14:38:21 -07:00
parent 046db4b6cc
commit 7d58a9ae3b

View file

@ -72,13 +72,14 @@ class VideoDataClient:
return cur.fetchone() return cur.fetchone()
if fetchall: if fetchall:
return cur.fetchall() return cur.fetchall()
return None
def get_pg_video_captures(self, site=None, source=None) -> List[str]: def get_pg_video_captures(self, site=None, source=None) -> List[str]:
account_id = site.account_id if site.account_id else None account_id = site.account_id if site.account_id else None
seed = site.metadata.ait_seed_id if site.metadata.ait_seed_id else None seed = site.metadata.ait_seed_id if site.metadata.ait_seed_id else None
# TODO: generalize, maybe make variable? # TODO: generalize, maybe make variable?
containing_page_timestamp_pattern = "2025%" containing_page_timestamp_pattern = "2025%" # for future pre-dup additions
if source == "youtube": if source == "youtube":
containing_page_url_pattern = "http://youtube.com/watch%" # yes, video data canonicalization uses "http" containing_page_url_pattern = "http://youtube.com/watch%" # yes, video data canonicalization uses "http"
@ -93,34 +94,18 @@ class VideoDataClient:
containing_page_url_pattern, containing_page_url_pattern,
), ),
) )
elif account_id and seed: elif seed and source: # account_id should usually be present
pg_query = (
"SELECT distinct(containing_page_url) from video where account_id = %s and seed = %s and containing_page_timestamp like %s",
(
account_id,
seed,
containing_page_timestamp_pattern,
),
)
elif seed and source:
pg_query = ( pg_query = (
"SELECT distinct(containing_page_url) from video where seed = %s and containing_page_url like %s", "SELECT distinct(containing_page_url) from video where seed = %s and containing_page_url like %s",
(seed, containing_page_url_pattern), (seed, containing_page_url_pattern),
) )
elif seed:
pg_query = (
"SELECT distinct(containing_page_url) from video where seed = %s and containing_page_timestamp like %s",
(
seed,
containing_page_timestamp_pattern,
),
)
try: try:
results = self._execute_query( results = self._execute_query(
pg_query, row_factory=psycopg.rows.scalar_row, fetchall=True pg_query, row_factory=psycopg.rows.scalar_row, fetchall=True
) )
except Exception as e: except Exception as e:
logger.warn("postgres query failed: %s", e) logger.warn("postgres query failed: %s", e)
results = []
return results return results