mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 16:49:56 -05:00
Merge branch 'video_predup' into qa
This commit is contained in:
commit
c669d67539
@ -252,7 +252,7 @@ class BrozzlerWorker:
|
||||
except brozzler.PageInterstitialShown:
|
||||
self.logger.info("page interstitial shown (http auth): %s", page)
|
||||
|
||||
if enable_youtube_dl and ydl.should_ytdlp(page, site):
|
||||
if enable_youtube_dl and ydl.should_ytdlp(self, page, site):
|
||||
try:
|
||||
ydl_outlinks = ydl.do_youtube_dl(self, site, page)
|
||||
except brozzler.ReachedLimit as e:
|
||||
|
@ -62,9 +62,10 @@ def _timestamp4datetime(timestamp):
|
||||
int(timestamp[-2:])
|
||||
)
|
||||
|
||||
def should_ytdlp(page, site):
|
||||
def should_ytdlp(worker, page, site):
|
||||
ytdlp_url = page.redirect_url if page.redirect_url else page.url
|
||||
ytdlp_seed = site.get("warcprox-meta", {}).get("metadata", {}).get("ait_seed_id", "")
|
||||
ytdlp_seed = site.seed_id if site.seed_id else None
|
||||
# ytdlp_seed = site.get(site.id).pluck("metadata", "ait_seed_id").default(None) if site.rr else None ???
|
||||
logging.info("ytdlp_seed: %s", ytdlp_seed)
|
||||
|
||||
if ytdlp_seed and "youtube.com/watch?v" in ytdlp_url:
|
||||
|
3
setup.py
3
setup.py
@ -34,7 +34,7 @@ def find_package_data(package):
|
||||
|
||||
setuptools.setup(
|
||||
name="brozzler",
|
||||
version="1.5.47a1",
|
||||
version="1.5.47a2",
|
||||
description="Distributed web crawling with browsers",
|
||||
url="https://github.com/internetarchive/brozzler",
|
||||
author="Noah Levitt",
|
||||
@ -67,6 +67,7 @@ setuptools.setup(
|
||||
install_requires=[
|
||||
"PyYAML>=5.1",
|
||||
"yt_dlp<2023.11.16",
|
||||
"cassandra-driver==3.29.1"
|
||||
"reppy==0.3.4",
|
||||
"requests>=2.21",
|
||||
"websocket-client>=0.39.0,<=0.48.0",
|
||||
|
Loading…
x
Reference in New Issue
Block a user