mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-09 15:02:28 -04:00
keep it simple for now
This commit is contained in:
parent
046db4b6cc
commit
7d58a9ae3b
1 changed files with 4 additions and 19 deletions
|
@ -72,13 +72,14 @@ class VideoDataClient:
|
||||||
return cur.fetchone()
|
return cur.fetchone()
|
||||||
if fetchall:
|
if fetchall:
|
||||||
return cur.fetchall()
|
return cur.fetchall()
|
||||||
|
return None
|
||||||
|
|
||||||
def get_pg_video_captures(self, site=None, source=None) -> List[str]:
|
def get_pg_video_captures(self, site=None, source=None) -> List[str]:
|
||||||
account_id = site.account_id if site.account_id else None
|
account_id = site.account_id if site.account_id else None
|
||||||
seed = site.metadata.ait_seed_id if site.metadata.ait_seed_id else None
|
seed = site.metadata.ait_seed_id if site.metadata.ait_seed_id else None
|
||||||
|
|
||||||
# TODO: generalize, maybe make variable?
|
# TODO: generalize, maybe make variable?
|
||||||
containing_page_timestamp_pattern = "2025%"
|
containing_page_timestamp_pattern = "2025%" # for future pre-dup additions
|
||||||
|
|
||||||
if source == "youtube":
|
if source == "youtube":
|
||||||
containing_page_url_pattern = "http://youtube.com/watch%" # yes, video data canonicalization uses "http"
|
containing_page_url_pattern = "http://youtube.com/watch%" # yes, video data canonicalization uses "http"
|
||||||
|
@ -93,34 +94,18 @@ class VideoDataClient:
|
||||||
containing_page_url_pattern,
|
containing_page_url_pattern,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
elif account_id and seed:
|
elif seed and source: # account_id should usually be present
|
||||||
pg_query = (
|
|
||||||
"SELECT distinct(containing_page_url) from video where account_id = %s and seed = %s and containing_page_timestamp like %s",
|
|
||||||
(
|
|
||||||
account_id,
|
|
||||||
seed,
|
|
||||||
containing_page_timestamp_pattern,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
elif seed and source:
|
|
||||||
pg_query = (
|
pg_query = (
|
||||||
"SELECT distinct(containing_page_url) from video where seed = %s and containing_page_url like %s",
|
"SELECT distinct(containing_page_url) from video where seed = %s and containing_page_url like %s",
|
||||||
(seed, containing_page_url_pattern),
|
(seed, containing_page_url_pattern),
|
||||||
)
|
)
|
||||||
elif seed:
|
|
||||||
pg_query = (
|
|
||||||
"SELECT distinct(containing_page_url) from video where seed = %s and containing_page_timestamp like %s",
|
|
||||||
(
|
|
||||||
seed,
|
|
||||||
containing_page_timestamp_pattern,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
try:
|
try:
|
||||||
results = self._execute_query(
|
results = self._execute_query(
|
||||||
pg_query, row_factory=psycopg.rows.scalar_row, fetchall=True
|
pg_query, row_factory=psycopg.rows.scalar_row, fetchall=True
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warn("postgres query failed: %s", e)
|
logger.warn("postgres query failed: %s", e)
|
||||||
|
results = []
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue