mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-20 23:56:34 -04:00
ytdlp_seed = site["metadata"]["ait_seed_id"] (?!)
This commit is contained in:
parent
dfd4d7da84
commit
8c4f774504
@ -111,7 +111,6 @@ def new_job(frontier, job_conf):
|
||||
merged_conf["seed"] = merged_conf.pop("url")
|
||||
site = brozzler.Site(frontier.rr, merged_conf)
|
||||
site.id = str(uuid.uuid4())
|
||||
site.seed_id = seed_conf['metadata']['ait_seed_id']
|
||||
sites.append(site)
|
||||
pages.append(new_seed_page(frontier, site))
|
||||
|
||||
|
@ -64,11 +64,11 @@ def _timestamp4datetime(timestamp):
|
||||
|
||||
def should_ytdlp(worker, page, site):
|
||||
ytdlp_url = page.redirect_url if page.redirect_url else page.url
|
||||
ytdlp_seed = site.seed_id if site.seed_id else None
|
||||
# ytdlp_seed = site.get(site.id).pluck("metadata", "ait_seed_id").default(None) if site.rr else None ???
|
||||
logging.info("ytdlp_seed: %s", ytdlp_seed)
|
||||
ytdlp_seed = site["metadata"]["ait_seed_id"]
|
||||
logging.info("checking containing page %r, site %r", ytdlp_url, ytdlp_seed)
|
||||
|
||||
if ytdlp_seed and "youtube.com/watch?v" in ytdlp_url:
|
||||
logging.info("found youtube watch page %r", ytdlp_url)
|
||||
# connect to bmiller-dev cluster, keyspace video; we can modify default timeout in cassandra.yaml
|
||||
cluster = Cluster(["207.241.235.189"], protocol_version=5)
|
||||
session = cluster.connect("video")
|
||||
|
Loading…
x
Reference in New Issue
Block a user