diff --git a/brozzler/ydl.py b/brozzler/ydl.py index 4c3bf12..b406311 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -420,14 +420,25 @@ def _try_youtube_dl(worker, ydl, site, page): def get_video_captures(site, source=None): import psycopg + # todo: read pg_url from environment var pg_url = "postgresql://ait_crawling:archive-it-crawling@db.qa-archive-it.org/ait_crawling" account_id = site.account_id if site.account_id else None seed = site.metadata.ait_seed_id if site.metadata.ait_seed_id else None if account_id and seed and source: - pg_query = ("SELECT containing_page_url from video where account_id = %s and seed = %s and containing_page_url like '%'+%s+'%'", (account_id, seed, source,)) + pg_query = ( + "SELECT containing_page_url from video where account_id = %s and seed = %s and containing_page_url like '%'+%s+'%'", + ( + account_id, + seed, + source, + ), + ) elif seed: - pg_query = ("SELECT containing_page_url from video where seed = %s and containing_page_url like '%'+%s+'%'", (seed, source)) + pg_query = ( + "SELECT containing_page_url from video where seed = %s and containing_page_url like '%'+%s+'%'", + (seed, source), + ) else: return None with psycopg.connect(pg_url) as conn: @@ -435,6 +446,7 @@ def get_video_captures(site, source=None): cur.execute(pg_query) return cur.fetchall() + @metrics.brozzler_ytdlp_duration_seconds.time() @metrics.brozzler_in_progress_ytdlps.track_inprogress() def do_youtube_dl(worker, site, page, ytdlp_proxy_endpoints):