restore scoping out of urls with unsupported schemes

This commit is contained in:
Noah Levitt 2016-04-21 11:40:08 -07:00
parent fee008266f
commit 1e52d1cf98

View File

@ -65,7 +65,11 @@ class Site(brozzler.BaseDictable):
self.scope["surt"] = new_scope_surt self.scope["surt"] = new_scope_surt
def is_in_scope(self, surt_, parent_page=None): def is_in_scope(self, surt_, parent_page=None):
if (parent_page and "max_hops" in self.scope if not surt_.startswith("http://") and not surt_.startswith("https://"):
# XXX doesn't belong here maybe (where? worker ignores unknown
# schemes?)
return False
elif (parent_page and "max_hops" in self.scope
and parent_page.hops_from_seed >= self.scope["max_hops"]): and parent_page.hops_from_seed >= self.scope["max_hops"]):
return False return False
elif surt_.startswith(self.scope["surt"]): elif surt_.startswith(self.scope["surt"]):