diff --git a/brozzler/model.py b/brozzler/model.py index bc05099..b0f216d 100644 --- a/brozzler/model.py +++ b/brozzler/model.py @@ -111,7 +111,6 @@ def new_job(frontier, job_conf): merged_conf["seed"] = merged_conf.pop("url") site = brozzler.Site(frontier.rr, merged_conf) site.id = str(uuid.uuid4()) - site.seed_id = seed_conf['metadata']['ait_seed_id'] sites.append(site) pages.append(new_seed_page(frontier, site)) diff --git a/brozzler/ydl.py b/brozzler/ydl.py index e3fa21a..133b46e 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -64,11 +64,11 @@ def _timestamp4datetime(timestamp): def should_ytdlp(worker, page, site): ytdlp_url = page.redirect_url if page.redirect_url else page.url - ytdlp_seed = site.seed_id if site.seed_id else None - # ytdlp_seed = site.get(site.id).pluck("metadata", "ait_seed_id").default(None) if site.rr else None ??? - logging.info("ytdlp_seed: %s", ytdlp_seed) + ytdlp_seed = site["metadata"]["ait_seed_id"] + logging.info("checking containing page %r, site %r", ytdlp_url, ytdlp_seed) if ytdlp_seed and "youtube.com/watch?v" in ytdlp_url: + logging.info("found youtube watch page %r", ytdlp_url) # connect to bmiller-dev cluster, keyspace video; we can modify default timeout in cassandra.yaml cluster = Cluster(["207.241.235.189"], protocol_version=5) session = cluster.connect("video")