mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-07-23 15:00:36 -04:00
Merge branch 'video_predup' into qa
This commit is contained in:
commit
c669d67539
3 changed files with 6 additions and 4 deletions
|
@ -252,7 +252,7 @@ class BrozzlerWorker:
|
||||||
except brozzler.PageInterstitialShown:
|
except brozzler.PageInterstitialShown:
|
||||||
self.logger.info("page interstitial shown (http auth): %s", page)
|
self.logger.info("page interstitial shown (http auth): %s", page)
|
||||||
|
|
||||||
if enable_youtube_dl and ydl.should_ytdlp(page, site):
|
if enable_youtube_dl and ydl.should_ytdlp(self, page, site):
|
||||||
try:
|
try:
|
||||||
ydl_outlinks = ydl.do_youtube_dl(self, site, page)
|
ydl_outlinks = ydl.do_youtube_dl(self, site, page)
|
||||||
except brozzler.ReachedLimit as e:
|
except brozzler.ReachedLimit as e:
|
||||||
|
|
|
@ -62,9 +62,10 @@ def _timestamp4datetime(timestamp):
|
||||||
int(timestamp[-2:])
|
int(timestamp[-2:])
|
||||||
)
|
)
|
||||||
|
|
||||||
def should_ytdlp(page, site):
|
def should_ytdlp(worker, page, site):
|
||||||
ytdlp_url = page.redirect_url if page.redirect_url else page.url
|
ytdlp_url = page.redirect_url if page.redirect_url else page.url
|
||||||
ytdlp_seed = site.get("warcprox-meta", {}).get("metadata", {}).get("ait_seed_id", "")
|
ytdlp_seed = site.seed_id if site.seed_id else None
|
||||||
|
# ytdlp_seed = site.get(site.id).pluck("metadata", "ait_seed_id").default(None) if site.rr else None ???
|
||||||
logging.info("ytdlp_seed: %s", ytdlp_seed)
|
logging.info("ytdlp_seed: %s", ytdlp_seed)
|
||||||
|
|
||||||
if ytdlp_seed and "youtube.com/watch?v" in ytdlp_url:
|
if ytdlp_seed and "youtube.com/watch?v" in ytdlp_url:
|
||||||
|
|
3
setup.py
3
setup.py
|
@ -34,7 +34,7 @@ def find_package_data(package):
|
||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name="brozzler",
|
name="brozzler",
|
||||||
version="1.5.47a1",
|
version="1.5.47a2",
|
||||||
description="Distributed web crawling with browsers",
|
description="Distributed web crawling with browsers",
|
||||||
url="https://github.com/internetarchive/brozzler",
|
url="https://github.com/internetarchive/brozzler",
|
||||||
author="Noah Levitt",
|
author="Noah Levitt",
|
||||||
|
@ -67,6 +67,7 @@ setuptools.setup(
|
||||||
install_requires=[
|
install_requires=[
|
||||||
"PyYAML>=5.1",
|
"PyYAML>=5.1",
|
||||||
"yt_dlp<2023.11.16",
|
"yt_dlp<2023.11.16",
|
||||||
|
"cassandra-driver==3.29.1"
|
||||||
"reppy==0.3.4",
|
"reppy==0.3.4",
|
||||||
"requests>=2.21",
|
"requests>=2.21",
|
||||||
"websocket-client>=0.39.0,<=0.48.0",
|
"websocket-client>=0.39.0,<=0.48.0",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue