mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-08 14:32:23 -04:00
VIDEO_DATA_SOURCE
This commit is contained in:
parent
f925660eb4
commit
fe5ad0c31d
1 changed files with 7 additions and 7 deletions
|
@ -42,7 +42,7 @@ PROXY_ATTEMPTS = 4
|
||||||
YTDLP_WAIT = 10
|
YTDLP_WAIT = 10
|
||||||
YTDLP_MAX_REDIRECTS = 5
|
YTDLP_MAX_REDIRECTS = 5
|
||||||
|
|
||||||
VIDEO_DATA = ""
|
VIDEO_DATA_SOURCE = os.getenv("VIDEO_DATA_SOURCE")
|
||||||
|
|
||||||
|
|
||||||
logger = structlog.get_logger(logger_name=__name__)
|
logger = structlog.get_logger(logger_name=__name__)
|
||||||
|
@ -421,23 +421,22 @@ def _try_youtube_dl(worker, ydl, site, page):
|
||||||
|
|
||||||
|
|
||||||
def get_video_captures(site, source="youtube"):
|
def get_video_captures(site, source="youtube"):
|
||||||
if not VIDEO_DATA:
|
if not VIDEO_DATA_SOURCE:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if VIDEO_DATA and VIDEO_DATA.startswith("postgresql"):
|
if VIDEO_DATA_SOURCE and VIDEO_DATA_SOURCE.startswith("postgresql"):
|
||||||
import psycopg
|
import psycopg
|
||||||
|
|
||||||
pg_url = VIDEO_DATA
|
|
||||||
account_id = site.account_id if site.account_id else None
|
account_id = site.account_id if site.account_id else None
|
||||||
seed = site.metadata.ait_seed_id if site.metadata.ait_seed_id else None
|
seed = site.metadata.ait_seed_id if site.metadata.ait_seed_id else None
|
||||||
if source == "youtube":
|
if source == "youtube":
|
||||||
containing_page_url_pattern = "http://youtube.com/watch"
|
containing_page_url_pattern = "http://youtube.com/watch" # yes, video data canonicalization uses "http"
|
||||||
# support other sources here
|
# support other sources here
|
||||||
else:
|
else:
|
||||||
containing_page_url_pattern = None
|
containing_page_url_pattern = None
|
||||||
if account_id and seed and source:
|
if account_id and seed and source:
|
||||||
pg_query = (
|
pg_query = (
|
||||||
"SELECT containing_page_url from video where account_id = %s and seed = %s and containing_page_url like %s",
|
"SELECT distinct(containing_page_url) from video where account_id = %s and seed = %s and containing_page_url like %s",
|
||||||
(
|
(
|
||||||
account_id,
|
account_id,
|
||||||
seed,
|
seed,
|
||||||
|
@ -451,10 +450,11 @@ def get_video_captures(site, source="youtube"):
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
with psycopg.connect(pg_url) as conn:
|
with psycopg.connect(VIDEO_DATA_SOURCE) as conn:
|
||||||
with conn.cursor(row_factory=psycopg.rows.scalar_row) as cur:
|
with conn.cursor(row_factory=psycopg.rows.scalar_row) as cur:
|
||||||
cur.execute(pg_query)
|
cur.execute(pg_query)
|
||||||
return cur.fetchall()
|
return cur.fetchall()
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
@metrics.brozzler_ytdlp_duration_seconds.time()
|
@metrics.brozzler_ytdlp_duration_seconds.time()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue