add "metadata" field to site object

This commit is contained in:
Noah Levitt 2016-04-25 17:01:22 +00:00
parent 8d9fc7d3e3
commit 2c7c713f00
2 changed files with 11 additions and 6 deletions

View File

@ -26,8 +26,9 @@ def new_job_file(frontier, job_conf_file):
new_job(frontier, job_conf)
def new_job(frontier, job_conf):
job = Job(id=job_conf.get("id"), conf=job_conf, status="ACTIVE",
started=rethinkstuff.utcnow())
job = Job(
id=job_conf.get("id"), conf=job_conf, status="ACTIVE",
started=rethinkstuff.utcnow())
sites = []
for seed_conf in job_conf["seeds"]:
@ -36,7 +37,8 @@ def new_job(frontier, job_conf):
extra_headers = None
if "warcprox_meta" in merged_conf:
warcprox_meta = json.dumps(merged_conf["warcprox_meta"], separators=(',', ':'))
warcprox_meta = json.dumps(
merged_conf["warcprox_meta"], separators=(',', ':'))
extra_headers = {"Warcprox-Meta":warcprox_meta}
site = brozzler.Site(job_id=job.id,
@ -45,8 +47,10 @@ def new_job(frontier, job_conf):
time_limit=merged_conf.get("time_limit"),
proxy=merged_conf.get("proxy"),
ignore_robots=merged_conf.get("ignore_robots"),
enable_warcprox_features=merged_conf.get("enable_warcprox_features"),
extra_headers=extra_headers)
enable_warcprox_features=merged_conf.get(
"enable_warcprox_features"),
extra_headers=extra_headers,
metadata=merged_conf.get("metadata"))
sites.append(site)
# insert all the sites into database before the job

View File

@ -19,7 +19,7 @@ class Site(brozzler.BaseDictable):
enable_warcprox_features=False, reached_limit=None,
status="ACTIVE", claimed=False, start_time=None,
last_disclaimed=_EPOCH_UTC, last_claimed_by=None,
last_claimed=_EPOCH_UTC):
last_claimed=_EPOCH_UTC, metadata={}):
self.seed = seed
self.id = id
@ -36,6 +36,7 @@ class Site(brozzler.BaseDictable):
self.start_time = start_time or rethinkstuff.utcnow()
self.last_disclaimed = last_disclaimed
self.last_claimed = last_claimed
self.metadata = metadata
self.scope = scope or {}
if not "surt" in self.scope: