brozzler/brozzler/__init__.py

52 lines
1.6 KiB
Python

import json as _json
import logging as _logging
from pkg_resources import get_distribution as _get_distribution
__version__ = _get_distribution('brozzler').version
class ShutdownRequested(Exception):
pass
class NothingToClaim(Exception):
pass
class ReachedLimit(Exception):
def __init__(self, http_error=None, warcprox_meta=None, http_payload=None):
if http_error:
if "warcprox-meta" in http_error.headers:
self.warcprox_meta = _json.loads(http_error.headers["warcprox-meta"])
else:
self.warcprox_meta = None
self.http_payload = http_error.read()
elif warcprox_meta:
self.warcprox_meta = warcprox_meta
self.http_payload = http_payload
def __repr__(self):
return "ReachedLimit(warcprox_meta={},http_payload={})".format(repr(self.warcprox_meta), repr(self.http_payload))
def __str__(self):
return self.__repr__()
class BaseDictable:
def to_dict(self):
d = dict(vars(self))
for k in vars(self):
if k.startswith("_") or d[k] is None:
del d[k]
return d
def to_json(self):
return json.dumps(self.to_dict(), separators=(',', ':'))
def __repr__(self):
return "{}(**{})".format(self.__class__.__name__, self.to_dict())
from brozzler.site import Page, Site
from brozzler.worker import BrozzlerWorker
from brozzler.robots import is_permitted_by_robots
from brozzler.frontier import RethinkDbFrontier
from brozzler.browser import Browser, BrowserPool
from brozzler.job import new_job, new_site, Job