mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-05-02 14:46:18 -04:00
some additional logging and error handling to avoid mysterious messages
This commit is contained in:
parent
1e56bc8686
commit
b5cb94fc8b
5 changed files with 20 additions and 11 deletions
|
@ -36,8 +36,8 @@ class Site:
|
|||
self._robots_cache = reppy.cache.RobotsCache(session=req_sesh)
|
||||
|
||||
def __repr__(self):
|
||||
return """Site(seed="{}",scope_surt="{}",proxy="{}",enable_warcprox_features={},ignore_robots={},extra_headers={})""".format(
|
||||
self.seed, self.scope_surt, self.proxy, self.enable_warcprox_features, self.ignore_robots, self.extra_headers)
|
||||
return """Site(seed={},scope_surt={},proxy={},enable_warcprox_features={},ignore_robots={},extra_headers={})""".format(
|
||||
repr(self.seed), repr(self.scope_surt), repr(self.proxy), self.enable_warcprox_features, self.ignore_robots, self.extra_headers)
|
||||
|
||||
def note_seed_redirect(self, url):
|
||||
new_scope_surt = surt.surt(url, canonicalizer=surt.GoogleURLCanonicalizer, trailing_comma=True)
|
||||
|
@ -77,8 +77,8 @@ class Page:
|
|||
self.redirect_url = redirect_url
|
||||
|
||||
def __repr__(self):
|
||||
return """Page(url="{}",site_id={},hops_from_seed={})""".format(
|
||||
self.url, self.site_id, self.hops_from_seed)
|
||||
return """Page(url={},site_id={},hops_from_seed={})""".format(
|
||||
repr(self.url), self.site_id, self.hops_from_seed)
|
||||
|
||||
def note_redirect(self, url):
|
||||
self.redirect_url = url
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue