diff --git a/brozzler/frontier.py b/brozzler/frontier.py index 9a95a53..dd3ceb3 100644 --- a/brozzler/frontier.py +++ b/brozzler/frontier.py @@ -63,7 +63,9 @@ class RethinkDbFrontier: self.logger.info("inserting into 'sites' table %s", site) result = self.r.table("sites").insert(site.to_dict()).run() self._vet_result(result, inserted=1) - site.id = result["generated_keys"][0] + if not site.id: + # only if "id" has not already been set + site.id = result["generated_keys"][0] def update_job(self, job): self.logger.debug("updating 'jobs' table entry %s", job) diff --git a/brozzler/job.py b/brozzler/job.py index e36f401..276f924 100644 --- a/brozzler/job.py +++ b/brozzler/job.py @@ -25,8 +25,8 @@ def new_job_file(frontier, job_conf_file): new_job(frontier, job_conf) def new_job(frontier, job_conf): - job = Job(id=job_conf.get("id"), conf=job_conf, status="ACTIVE", started=datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")) - frontier.new_job(job) + job = Job(id=job_conf.get("id"), conf=job_conf, status="ACTIVE", + started=datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")) sites = [] for seed_conf in job_conf["seeds"]: @@ -48,9 +48,12 @@ def new_job(frontier, job_conf): extra_headers=extra_headers) sites.append(site) + # insert all the sites into database before the job for site in sites: new_site(frontier, site) + frontier.new_job(job) + def new_site(frontier, site): site.id = str(uuid.uuid4()) logging.info("new site {}".format(site))