VIDEO_DATA_SOURCE

This commit is contained in:
Barbara Miller 2025-06-23 16:46:37 -07:00
parent f925660eb4
commit fe5ad0c31d

View file

@ -42,7 +42,7 @@ PROXY_ATTEMPTS = 4
YTDLP_WAIT = 10
YTDLP_MAX_REDIRECTS = 5
VIDEO_DATA = ""
VIDEO_DATA_SOURCE = os.getenv("VIDEO_DATA_SOURCE")
logger = structlog.get_logger(logger_name=__name__)
@ -421,23 +421,22 @@ def _try_youtube_dl(worker, ydl, site, page):
def get_video_captures(site, source="youtube"):
if not VIDEO_DATA:
if not VIDEO_DATA_SOURCE:
return None
if VIDEO_DATA and VIDEO_DATA.startswith("postgresql"):
if VIDEO_DATA_SOURCE and VIDEO_DATA_SOURCE.startswith("postgresql"):
import psycopg
pg_url = VIDEO_DATA
account_id = site.account_id if site.account_id else None
seed = site.metadata.ait_seed_id if site.metadata.ait_seed_id else None
if source == "youtube":
containing_page_url_pattern = "http://youtube.com/watch"
containing_page_url_pattern = "http://youtube.com/watch" # yes, video data canonicalization uses "http"
# support other sources here
else:
containing_page_url_pattern = None
if account_id and seed and source:
pg_query = (
"SELECT containing_page_url from video where account_id = %s and seed = %s and containing_page_url like %s",
"SELECT distinct(containing_page_url) from video where account_id = %s and seed = %s and containing_page_url like %s",
(
account_id,
seed,
@ -451,10 +450,11 @@ def get_video_captures(site, source="youtube"):
)
else:
return None
with psycopg.connect(pg_url) as conn:
with psycopg.connect(VIDEO_DATA_SOURCE) as conn:
with conn.cursor(row_factory=psycopg.rows.scalar_row) as cur:
cur.execute(pg_query)
return cur.fetchall()
return None
@metrics.brozzler_ytdlp_duration_seconds.time()