diff --git a/brozzler/job.py b/brozzler/job.py index dd329e0..225becd 100644 --- a/brozzler/job.py +++ b/brozzler/job.py @@ -26,8 +26,9 @@ def new_job_file(frontier, job_conf_file): new_job(frontier, job_conf) def new_job(frontier, job_conf): - job = Job(id=job_conf.get("id"), conf=job_conf, status="ACTIVE", - started=rethinkstuff.utcnow()) + job = Job( + id=job_conf.get("id"), conf=job_conf, status="ACTIVE", + started=rethinkstuff.utcnow()) sites = [] for seed_conf in job_conf["seeds"]: @@ -36,7 +37,8 @@ def new_job(frontier, job_conf): extra_headers = None if "warcprox_meta" in merged_conf: - warcprox_meta = json.dumps(merged_conf["warcprox_meta"], separators=(',', ':')) + warcprox_meta = json.dumps( + merged_conf["warcprox_meta"], separators=(',', ':')) extra_headers = {"Warcprox-Meta":warcprox_meta} site = brozzler.Site(job_id=job.id, @@ -45,8 +47,10 @@ def new_job(frontier, job_conf): time_limit=merged_conf.get("time_limit"), proxy=merged_conf.get("proxy"), ignore_robots=merged_conf.get("ignore_robots"), - enable_warcprox_features=merged_conf.get("enable_warcprox_features"), - extra_headers=extra_headers) + enable_warcprox_features=merged_conf.get( + "enable_warcprox_features"), + extra_headers=extra_headers, + metadata=merged_conf.get("metadata")) sites.append(site) # insert all the sites into database before the job diff --git a/brozzler/site.py b/brozzler/site.py index 8ea3ab4..c4538ce 100644 --- a/brozzler/site.py +++ b/brozzler/site.py @@ -19,7 +19,7 @@ class Site(brozzler.BaseDictable): enable_warcprox_features=False, reached_limit=None, status="ACTIVE", claimed=False, start_time=None, last_disclaimed=_EPOCH_UTC, last_claimed_by=None, - last_claimed=_EPOCH_UTC): + last_claimed=_EPOCH_UTC, metadata={}): self.seed = seed self.id = id @@ -36,6 +36,7 @@ class Site(brozzler.BaseDictable): self.start_time = start_time or rethinkstuff.utcnow() self.last_disclaimed = last_disclaimed self.last_claimed = last_claimed + self.metadata = metadata self.scope = scope or {} if not "surt" in self.scope: