mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-25 00:59:52 -05:00
52 lines
1.6 KiB
Python
52 lines
1.6 KiB
Python
import json as _json
|
|
import logging as _logging
|
|
from pkg_resources import get_distribution as _get_distribution
|
|
|
|
__version__ = _get_distribution('brozzler').version
|
|
|
|
class ShutdownRequested(Exception):
|
|
pass
|
|
|
|
class NothingToClaim(Exception):
|
|
pass
|
|
|
|
class ReachedLimit(Exception):
|
|
def __init__(self, http_error=None, warcprox_meta=None, http_payload=None):
|
|
if http_error:
|
|
if "warcprox-meta" in http_error.headers:
|
|
self.warcprox_meta = _json.loads(http_error.headers["warcprox-meta"])
|
|
else:
|
|
self.warcprox_meta = None
|
|
self.http_payload = http_error.read()
|
|
elif warcprox_meta:
|
|
self.warcprox_meta = warcprox_meta
|
|
self.http_payload = http_payload
|
|
|
|
def __repr__(self):
|
|
return "ReachedLimit(warcprox_meta={},http_payload={})".format(repr(self.warcprox_meta), repr(self.http_payload))
|
|
|
|
def __str__(self):
|
|
return self.__repr__()
|
|
|
|
class BaseDictable:
|
|
def to_dict(self):
|
|
d = dict(vars(self))
|
|
for k in vars(self):
|
|
if k.startswith("_") or d[k] is None:
|
|
del d[k]
|
|
return d
|
|
|
|
def to_json(self):
|
|
return json.dumps(self.to_dict(), separators=(',', ':'))
|
|
|
|
def __repr__(self):
|
|
return "{}(**{})".format(self.__class__.__name__, self.to_dict())
|
|
|
|
from brozzler.site import Page, Site
|
|
from brozzler.worker import BrozzlerWorker
|
|
from brozzler.robots import is_permitted_by_robots
|
|
from brozzler.frontier import RethinkDbFrontier
|
|
from brozzler.browser import Browser, BrowserPool
|
|
from brozzler.job import new_job, new_site, Job
|
|
|