mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-10 23:40:30 -04:00
add "metadata" field to site object
This commit is contained in:
parent
8d9fc7d3e3
commit
2c7c713f00
2 changed files with 11 additions and 6 deletions
|
@ -26,7 +26,8 @@ def new_job_file(frontier, job_conf_file):
|
||||||
new_job(frontier, job_conf)
|
new_job(frontier, job_conf)
|
||||||
|
|
||||||
def new_job(frontier, job_conf):
|
def new_job(frontier, job_conf):
|
||||||
job = Job(id=job_conf.get("id"), conf=job_conf, status="ACTIVE",
|
job = Job(
|
||||||
|
id=job_conf.get("id"), conf=job_conf, status="ACTIVE",
|
||||||
started=rethinkstuff.utcnow())
|
started=rethinkstuff.utcnow())
|
||||||
|
|
||||||
sites = []
|
sites = []
|
||||||
|
@ -36,7 +37,8 @@ def new_job(frontier, job_conf):
|
||||||
|
|
||||||
extra_headers = None
|
extra_headers = None
|
||||||
if "warcprox_meta" in merged_conf:
|
if "warcprox_meta" in merged_conf:
|
||||||
warcprox_meta = json.dumps(merged_conf["warcprox_meta"], separators=(',', ':'))
|
warcprox_meta = json.dumps(
|
||||||
|
merged_conf["warcprox_meta"], separators=(',', ':'))
|
||||||
extra_headers = {"Warcprox-Meta":warcprox_meta}
|
extra_headers = {"Warcprox-Meta":warcprox_meta}
|
||||||
|
|
||||||
site = brozzler.Site(job_id=job.id,
|
site = brozzler.Site(job_id=job.id,
|
||||||
|
@ -45,8 +47,10 @@ def new_job(frontier, job_conf):
|
||||||
time_limit=merged_conf.get("time_limit"),
|
time_limit=merged_conf.get("time_limit"),
|
||||||
proxy=merged_conf.get("proxy"),
|
proxy=merged_conf.get("proxy"),
|
||||||
ignore_robots=merged_conf.get("ignore_robots"),
|
ignore_robots=merged_conf.get("ignore_robots"),
|
||||||
enable_warcprox_features=merged_conf.get("enable_warcprox_features"),
|
enable_warcprox_features=merged_conf.get(
|
||||||
extra_headers=extra_headers)
|
"enable_warcprox_features"),
|
||||||
|
extra_headers=extra_headers,
|
||||||
|
metadata=merged_conf.get("metadata"))
|
||||||
sites.append(site)
|
sites.append(site)
|
||||||
|
|
||||||
# insert all the sites into database before the job
|
# insert all the sites into database before the job
|
||||||
|
|
|
@ -19,7 +19,7 @@ class Site(brozzler.BaseDictable):
|
||||||
enable_warcprox_features=False, reached_limit=None,
|
enable_warcprox_features=False, reached_limit=None,
|
||||||
status="ACTIVE", claimed=False, start_time=None,
|
status="ACTIVE", claimed=False, start_time=None,
|
||||||
last_disclaimed=_EPOCH_UTC, last_claimed_by=None,
|
last_disclaimed=_EPOCH_UTC, last_claimed_by=None,
|
||||||
last_claimed=_EPOCH_UTC):
|
last_claimed=_EPOCH_UTC, metadata={}):
|
||||||
|
|
||||||
self.seed = seed
|
self.seed = seed
|
||||||
self.id = id
|
self.id = id
|
||||||
|
@ -36,6 +36,7 @@ class Site(brozzler.BaseDictable):
|
||||||
self.start_time = start_time or rethinkstuff.utcnow()
|
self.start_time = start_time or rethinkstuff.utcnow()
|
||||||
self.last_disclaimed = last_disclaimed
|
self.last_disclaimed = last_disclaimed
|
||||||
self.last_claimed = last_claimed
|
self.last_claimed = last_claimed
|
||||||
|
self.metadata = metadata
|
||||||
|
|
||||||
self.scope = scope or {}
|
self.scope = scope or {}
|
||||||
if not "surt" in self.scope:
|
if not "surt" in self.scope:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue