mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
restore scoping out of urls with unsupported schemes
This commit is contained in:
parent
fee008266f
commit
1e52d1cf98
@ -65,7 +65,11 @@ class Site(brozzler.BaseDictable):
|
||||
self.scope["surt"] = new_scope_surt
|
||||
|
||||
def is_in_scope(self, surt_, parent_page=None):
|
||||
if (parent_page and "max_hops" in self.scope
|
||||
if not surt_.startswith("http://") and not surt_.startswith("https://"):
|
||||
# XXX doesn't belong here maybe (where? worker ignores unknown
|
||||
# schemes?)
|
||||
return False
|
||||
elif (parent_page and "max_hops" in self.scope
|
||||
and parent_page.hops_from_seed >= self.scope["max_hops"]):
|
||||
return False
|
||||
elif surt_.startswith(self.scope["surt"]):
|
||||
|
Loading…
x
Reference in New Issue
Block a user