mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-08 06:22:23 -04:00
VIDEO_DATA_SOURCE
This commit is contained in:
parent
f925660eb4
commit
fe5ad0c31d
1 changed files with 7 additions and 7 deletions
|
@ -42,7 +42,7 @@ PROXY_ATTEMPTS = 4
|
|||
YTDLP_WAIT = 10
|
||||
YTDLP_MAX_REDIRECTS = 5
|
||||
|
||||
VIDEO_DATA = ""
|
||||
VIDEO_DATA_SOURCE = os.getenv("VIDEO_DATA_SOURCE")
|
||||
|
||||
|
||||
logger = structlog.get_logger(logger_name=__name__)
|
||||
|
@ -421,23 +421,22 @@ def _try_youtube_dl(worker, ydl, site, page):
|
|||
|
||||
|
||||
def get_video_captures(site, source="youtube"):
|
||||
if not VIDEO_DATA:
|
||||
if not VIDEO_DATA_SOURCE:
|
||||
return None
|
||||
|
||||
if VIDEO_DATA and VIDEO_DATA.startswith("postgresql"):
|
||||
if VIDEO_DATA_SOURCE and VIDEO_DATA_SOURCE.startswith("postgresql"):
|
||||
import psycopg
|
||||
|
||||
pg_url = VIDEO_DATA
|
||||
account_id = site.account_id if site.account_id else None
|
||||
seed = site.metadata.ait_seed_id if site.metadata.ait_seed_id else None
|
||||
if source == "youtube":
|
||||
containing_page_url_pattern = "http://youtube.com/watch"
|
||||
containing_page_url_pattern = "http://youtube.com/watch" # yes, video data canonicalization uses "http"
|
||||
# support other sources here
|
||||
else:
|
||||
containing_page_url_pattern = None
|
||||
if account_id and seed and source:
|
||||
pg_query = (
|
||||
"SELECT containing_page_url from video where account_id = %s and seed = %s and containing_page_url like %s",
|
||||
"SELECT distinct(containing_page_url) from video where account_id = %s and seed = %s and containing_page_url like %s",
|
||||
(
|
||||
account_id,
|
||||
seed,
|
||||
|
@ -451,10 +450,11 @@ def get_video_captures(site, source="youtube"):
|
|||
)
|
||||
else:
|
||||
return None
|
||||
with psycopg.connect(pg_url) as conn:
|
||||
with psycopg.connect(VIDEO_DATA_SOURCE) as conn:
|
||||
with conn.cursor(row_factory=psycopg.rows.scalar_row) as cur:
|
||||
cur.execute(pg_query)
|
||||
return cur.fetchall()
|
||||
return None
|
||||
|
||||
|
||||
@metrics.brozzler_ytdlp_duration_seconds.time()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue