also when adding new job, insert all sites before the job, to prevent brozzler workers thinking the job is finished before all the sites are in the db

This commit is contained in:
Noah Levitt 2015-11-14 03:10:58 +00:00
parent 3260fe4e9e
commit ca0053e3be
2 changed files with 8 additions and 3 deletions

View File

@ -63,7 +63,9 @@ class RethinkDbFrontier:
self.logger.info("inserting into 'sites' table %s", site)
result = self.r.table("sites").insert(site.to_dict()).run()
self._vet_result(result, inserted=1)
site.id = result["generated_keys"][0]
if not site.id:
# only if "id" has not already been set
site.id = result["generated_keys"][0]
def update_job(self, job):
self.logger.debug("updating 'jobs' table entry %s", job)

View File

@ -25,8 +25,8 @@ def new_job_file(frontier, job_conf_file):
new_job(frontier, job_conf)
def new_job(frontier, job_conf):
job = Job(id=job_conf.get("id"), conf=job_conf, status="ACTIVE", started=datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"))
frontier.new_job(job)
job = Job(id=job_conf.get("id"), conf=job_conf, status="ACTIVE",
started=datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"))
sites = []
for seed_conf in job_conf["seeds"]:
@ -48,9 +48,12 @@ def new_job(frontier, job_conf):
extra_headers=extra_headers)
sites.append(site)
# insert all the sites into database before the job
for site in sites:
new_site(frontier, site)
frontier.new_job(job)
def new_site(frontier, site):
site.id = str(uuid.uuid4())
logging.info("new site {}".format(site))