mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
Merge branch 'master' of github.com:nlevitt/brozzler
* 'master' of github.com:nlevitt/brozzler: fix bug preventing brozzler from simultaneously working on more than one site from the same job
This commit is contained in:
commit
ed0ea24de6
@ -96,11 +96,12 @@ class RethinkDbFrontier:
|
|||||||
["ACTIVE",rethinkdb.minval],
|
["ACTIVE",rethinkdb.minval],
|
||||||
["ACTIVE",rethinkdb.maxval],
|
["ACTIVE",rethinkdb.maxval],
|
||||||
index="sites_last_disclaimed")
|
index="sites_last_disclaimed")
|
||||||
.order_by(index="sites_last_disclaimed").limit(1)
|
.order_by(index="sites_last_disclaimed")
|
||||||
.filter(
|
.filter(
|
||||||
(rethinkdb.row["claimed"] != True) |
|
(rethinkdb.row["claimed"] != True) |
|
||||||
(rethinkdb.row["last_disclaimed"]
|
(rethinkdb.row["last_disclaimed"]
|
||||||
< rethinkdb.now() - 2*60*60))
|
< rethinkdb.now() - 2*60*60))
|
||||||
|
.limit(1)
|
||||||
.update({"claimed":True,"last_claimed_by":worker_id},
|
.update({"claimed":True,"last_claimed_by":worker_id},
|
||||||
return_changes=True)).run()
|
return_changes=True)).run()
|
||||||
self._vet_result(result, replaced=[0,1], unchanged=[0,1])
|
self._vet_result(result, replaced=[0,1], unchanged=[0,1])
|
||||||
|
@ -43,6 +43,9 @@ class Site(brozzler.BaseDictable):
|
|||||||
repr(self.proxy), self.enable_warcprox_features,
|
repr(self.proxy), self.enable_warcprox_features,
|
||||||
self.ignore_robots, self.extra_headers, self.reached_limit)
|
self.ignore_robots, self.extra_headers, self.reached_limit)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return "site-%s-%s" % (self.id, self.seed)
|
||||||
|
|
||||||
def _to_surt(self, url):
|
def _to_surt(self, url):
|
||||||
hurl = surt.handyurl.parse(url)
|
hurl = surt.handyurl.parse(url)
|
||||||
surt.GoogleURLCanonicalizer.canonicalize(hurl)
|
surt.GoogleURLCanonicalizer.canonicalize(hurl)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user