diff --git a/brozzler/frontier.py b/brozzler/frontier.py index c9e0e9b..9f06115 100644 --- a/brozzler/frontier.py +++ b/brozzler/frontier.py @@ -96,11 +96,12 @@ class RethinkDbFrontier: ["ACTIVE",rethinkdb.minval], ["ACTIVE",rethinkdb.maxval], index="sites_last_disclaimed") - .order_by(index="sites_last_disclaimed").limit(1) + .order_by(index="sites_last_disclaimed") .filter( (rethinkdb.row["claimed"] != True) | (rethinkdb.row["last_disclaimed"] < rethinkdb.now() - 2*60*60)) + .limit(1) .update({"claimed":True,"last_claimed_by":worker_id}, return_changes=True)).run() self._vet_result(result, replaced=[0,1], unchanged=[0,1]) diff --git a/brozzler/site.py b/brozzler/site.py index af030da..5fd2396 100644 --- a/brozzler/site.py +++ b/brozzler/site.py @@ -43,6 +43,9 @@ class Site(brozzler.BaseDictable): repr(self.proxy), self.enable_warcprox_features, self.ignore_robots, self.extra_headers, self.reached_limit) + def __str__(self): + return "site-%s-%s" % (self.id, self.seed) + def _to_surt(self, url): hurl = surt.handyurl.parse(url) surt.GoogleURLCanonicalizer.canonicalize(hurl)