mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
Merge branch 'master' of github.com:nlevitt/brozzler
* 'master' of github.com:nlevitt/brozzler: fix bug preventing brozzler from simultaneously working on more than one site from the same job
This commit is contained in:
commit
ed0ea24de6
@ -96,11 +96,12 @@ class RethinkDbFrontier:
|
||||
["ACTIVE",rethinkdb.minval],
|
||||
["ACTIVE",rethinkdb.maxval],
|
||||
index="sites_last_disclaimed")
|
||||
.order_by(index="sites_last_disclaimed").limit(1)
|
||||
.order_by(index="sites_last_disclaimed")
|
||||
.filter(
|
||||
(rethinkdb.row["claimed"] != True) |
|
||||
(rethinkdb.row["last_disclaimed"]
|
||||
< rethinkdb.now() - 2*60*60))
|
||||
.limit(1)
|
||||
.update({"claimed":True,"last_claimed_by":worker_id},
|
||||
return_changes=True)).run()
|
||||
self._vet_result(result, replaced=[0,1], unchanged=[0,1])
|
||||
|
@ -43,6 +43,9 @@ class Site(brozzler.BaseDictable):
|
||||
repr(self.proxy), self.enable_warcprox_features,
|
||||
self.ignore_robots, self.extra_headers, self.reached_limit)
|
||||
|
||||
def __str__(self):
|
||||
return "site-%s-%s" % (self.id, self.seed)
|
||||
|
||||
def _to_surt(self, url):
|
||||
hurl = surt.handyurl.parse(url)
|
||||
surt.GoogleURLCanonicalizer.canonicalize(hurl)
|
||||
|
Loading…
x
Reference in New Issue
Block a user