mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-19 03:28:04 -04:00
if a site is marked "claimed" in rethinkdb, but last_disclaimed is more than 2 hours ago, claim it and log a warning
This commit is contained in:
parent
4874eaccbb
commit
ca9e62f5cf
1 changed files with 13 additions and 3 deletions
|
@ -27,7 +27,7 @@ class RethinkDbFrontier:
|
||||||
if not "sites" in tables:
|
if not "sites" in tables:
|
||||||
self.logger.info("creating rethinkdb table 'sites' in database %s", repr(self.r.dbname))
|
self.logger.info("creating rethinkdb table 'sites' in database %s", repr(self.r.dbname))
|
||||||
self.r.table_create("sites", shards=self.shards, replicas=self.replicas).run()
|
self.r.table_create("sites", shards=self.shards, replicas=self.replicas).run()
|
||||||
self.r.table("sites").index_create("sites_last_disclaimed", [self.r.row["status"], self.r.row["claimed"], self.r.row["last_disclaimed"]]).run()
|
self.r.table("sites").index_create("sites_last_disclaimed", [self.r.row["status"], self.r.row["last_disclaimed"]]).run()
|
||||||
self.r.table("sites").index_create("job_id").run()
|
self.r.table("sites").index_create("job_id").run()
|
||||||
if not "pages" in tables:
|
if not "pages" in tables:
|
||||||
self.logger.info("creating rethinkdb table 'pages' in database %s", repr(self.r.dbname))
|
self.logger.info("creating rethinkdb table 'pages' in database %s", repr(self.r.dbname))
|
||||||
|
@ -93,14 +93,24 @@ class RethinkDbFrontier:
|
||||||
while True:
|
while True:
|
||||||
result = (self.r.table("sites")
|
result = (self.r.table("sites")
|
||||||
.between(
|
.between(
|
||||||
["ACTIVE",False,rethinkdb.minval],
|
["ACTIVE",rethinkdb.minval],
|
||||||
["ACTIVE",False,rethinkdb.maxval],
|
["ACTIVE",rethinkdb.maxval],
|
||||||
index="sites_last_disclaimed")
|
index="sites_last_disclaimed")
|
||||||
.order_by(index="sites_last_disclaimed").limit(1)
|
.order_by(index="sites_last_disclaimed").limit(1)
|
||||||
|
.filter(
|
||||||
|
(rethinkdb.row["claimed"] != True) |
|
||||||
|
(rethinkdb.row["last_disclaimed"]
|
||||||
|
< rethinkdb.now() - 2*60*60))
|
||||||
.update({"claimed":True,"last_claimed_by":worker_id},
|
.update({"claimed":True,"last_claimed_by":worker_id},
|
||||||
return_changes=True)).run()
|
return_changes=True)).run()
|
||||||
self._vet_result(result, replaced=[0,1], unchanged=[0,1])
|
self._vet_result(result, replaced=[0,1], unchanged=[0,1])
|
||||||
if result["replaced"] == 1:
|
if result["replaced"] == 1:
|
||||||
|
if result["changes"][0]["old_val"]["claimed"]:
|
||||||
|
self.logger.warn(
|
||||||
|
"re-claimed site that was still marked 'claimed' "
|
||||||
|
"because it was last disclaimed a long time ago "
|
||||||
|
"at %s",
|
||||||
|
result["changes"][0]["old_val"]["last_disclaimed"])
|
||||||
site = brozzler.Site(**result["changes"][0]["new_val"])
|
site = brozzler.Site(**result["changes"][0]["new_val"])
|
||||||
else:
|
else:
|
||||||
raise brozzler.NothingToClaim
|
raise brozzler.NothingToClaim
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue