mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
uncommented init imports, removed required job_id in Frontier.finished
This commit is contained in:
parent
d1aebb0258
commit
20bde1c482
@ -43,9 +43,9 @@ class BaseDictable:
|
|||||||
return "{}(**{})".format(self.__class__.__name__, self.to_dict())
|
return "{}(**{})".format(self.__class__.__name__, self.to_dict())
|
||||||
|
|
||||||
from brozzler.site import Page, Site
|
from brozzler.site import Page, Site
|
||||||
# from brozzler.worker import BrozzlerWorker
|
from brozzler.worker import BrozzlerWorker
|
||||||
from brozzler.robots import is_permitted_by_robots
|
from brozzler.robots import is_permitted_by_robots
|
||||||
from brozzler.frontier import RethinkDbFrontier
|
from brozzler.frontier import RethinkDbFrontier
|
||||||
# from brozzler.browser import Browser, BrowserPool
|
from brozzler.browser import Browser, BrowserPool
|
||||||
from brozzler.job import new_job, new_site, Job
|
from brozzler.job import new_job, new_site, Job
|
||||||
|
|
||||||
|
@ -11,10 +11,10 @@ class UnexpectedDbResult(Exception):
|
|||||||
class RethinkDbFrontier:
|
class RethinkDbFrontier:
|
||||||
logger = logging.getLogger(__module__ + "." + __qualname__)
|
logger = logging.getLogger(__module__ + "." + __qualname__)
|
||||||
|
|
||||||
def __init__(self, r, shards=3, replicas=3):
|
def __init__(self, r, shards=None, replicas=None):
|
||||||
self.r = r
|
self.r = r
|
||||||
self.shards = shards
|
self.shards = shards or len(r.servers)
|
||||||
self.replicas = replicas
|
self.replicas = replicas or min(len(r.servers), 3)
|
||||||
self._ensure_db()
|
self._ensure_db()
|
||||||
|
|
||||||
def _ensure_db(self):
|
def _ensure_db(self):
|
||||||
@ -185,6 +185,7 @@ class RethinkDbFrontier:
|
|||||||
self.logger.info("%s %s", site, status)
|
self.logger.info("%s %s", site, status)
|
||||||
site.status = status
|
site.status = status
|
||||||
self.update_site(site)
|
self.update_site(site)
|
||||||
|
if site.job_id:
|
||||||
self._maybe_finish_job(site.job_id)
|
self._maybe_finish_job(site.job_id)
|
||||||
|
|
||||||
def disclaim_site(self, site, page=None):
|
def disclaim_site(self, site, page=None):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user