mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
add "metadata" field to site object
This commit is contained in:
parent
8d9fc7d3e3
commit
2c7c713f00
@ -26,8 +26,9 @@ def new_job_file(frontier, job_conf_file):
|
||||
new_job(frontier, job_conf)
|
||||
|
||||
def new_job(frontier, job_conf):
|
||||
job = Job(id=job_conf.get("id"), conf=job_conf, status="ACTIVE",
|
||||
started=rethinkstuff.utcnow())
|
||||
job = Job(
|
||||
id=job_conf.get("id"), conf=job_conf, status="ACTIVE",
|
||||
started=rethinkstuff.utcnow())
|
||||
|
||||
sites = []
|
||||
for seed_conf in job_conf["seeds"]:
|
||||
@ -36,7 +37,8 @@ def new_job(frontier, job_conf):
|
||||
|
||||
extra_headers = None
|
||||
if "warcprox_meta" in merged_conf:
|
||||
warcprox_meta = json.dumps(merged_conf["warcprox_meta"], separators=(',', ':'))
|
||||
warcprox_meta = json.dumps(
|
||||
merged_conf["warcprox_meta"], separators=(',', ':'))
|
||||
extra_headers = {"Warcprox-Meta":warcprox_meta}
|
||||
|
||||
site = brozzler.Site(job_id=job.id,
|
||||
@ -45,8 +47,10 @@ def new_job(frontier, job_conf):
|
||||
time_limit=merged_conf.get("time_limit"),
|
||||
proxy=merged_conf.get("proxy"),
|
||||
ignore_robots=merged_conf.get("ignore_robots"),
|
||||
enable_warcprox_features=merged_conf.get("enable_warcprox_features"),
|
||||
extra_headers=extra_headers)
|
||||
enable_warcprox_features=merged_conf.get(
|
||||
"enable_warcprox_features"),
|
||||
extra_headers=extra_headers,
|
||||
metadata=merged_conf.get("metadata"))
|
||||
sites.append(site)
|
||||
|
||||
# insert all the sites into database before the job
|
||||
|
@ -19,7 +19,7 @@ class Site(brozzler.BaseDictable):
|
||||
enable_warcprox_features=False, reached_limit=None,
|
||||
status="ACTIVE", claimed=False, start_time=None,
|
||||
last_disclaimed=_EPOCH_UTC, last_claimed_by=None,
|
||||
last_claimed=_EPOCH_UTC):
|
||||
last_claimed=_EPOCH_UTC, metadata={}):
|
||||
|
||||
self.seed = seed
|
||||
self.id = id
|
||||
@ -36,6 +36,7 @@ class Site(brozzler.BaseDictable):
|
||||
self.start_time = start_time or rethinkstuff.utcnow()
|
||||
self.last_disclaimed = last_disclaimed
|
||||
self.last_claimed = last_claimed
|
||||
self.metadata = metadata
|
||||
|
||||
self.scope = scope or {}
|
||||
if not "surt" in self.scope:
|
||||
|
Loading…
x
Reference in New Issue
Block a user