handle exceptions in robots.txt fetching/parsing

This commit is contained in:
Noah Levitt 2015-07-22 00:54:49 +00:00
parent f00571f7bd
commit 6a09f2095c

View File

@ -46,7 +46,11 @@ class Site:
self.scope_surt = new_scope_surt
def is_permitted_by_robots(self, url):
try:
return self.ignore_robots or self._robots_cache.allowed(url, "brozzler")
except BaseException as e:
self.logger.error("problem with robots.txt for {}: {}".format(url, e))
return False
def is_in_scope(self, url):
try: