Merge branch 'video_predup' into qa

This commit is contained in:
Barbara Miller 2024-04-05 11:28:57 -07:00
commit c669d67539
3 changed files with 6 additions and 4 deletions

View File

@ -252,7 +252,7 @@ class BrozzlerWorker:
except brozzler.PageInterstitialShown:
self.logger.info("page interstitial shown (http auth): %s", page)
if enable_youtube_dl and ydl.should_ytdlp(page, site):
if enable_youtube_dl and ydl.should_ytdlp(self, page, site):
try:
ydl_outlinks = ydl.do_youtube_dl(self, site, page)
except brozzler.ReachedLimit as e:

View File

@ -62,9 +62,10 @@ def _timestamp4datetime(timestamp):
int(timestamp[-2:])
)
def should_ytdlp(page, site):
def should_ytdlp(worker, page, site):
ytdlp_url = page.redirect_url if page.redirect_url else page.url
ytdlp_seed = site.get("warcprox-meta", {}).get("metadata", {}).get("ait_seed_id", "")
ytdlp_seed = site.seed_id if site.seed_id else None
# ytdlp_seed = site.get(site.id).pluck("metadata", "ait_seed_id").default(None) if site.rr else None ???
logging.info("ytdlp_seed: %s", ytdlp_seed)
if ytdlp_seed and "youtube.com/watch?v" in ytdlp_url:

View File

@ -34,7 +34,7 @@ def find_package_data(package):
setuptools.setup(
name="brozzler",
version="1.5.47a1",
version="1.5.47a2",
description="Distributed web crawling with browsers",
url="https://github.com/internetarchive/brozzler",
author="Noah Levitt",
@ -67,6 +67,7 @@ setuptools.setup(
install_requires=[
"PyYAML>=5.1",
"yt_dlp<2023.11.16",
"cassandra-driver==3.29.1"
"reppy==0.3.4",
"requests>=2.21",
"websocket-client>=0.39.0,<=0.48.0",