mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 16:49:56 -05:00
restore scoping out of urls with unsupported schemes
This commit is contained in:
parent
fee008266f
commit
1e52d1cf98
@ -65,7 +65,11 @@ class Site(brozzler.BaseDictable):
|
|||||||
self.scope["surt"] = new_scope_surt
|
self.scope["surt"] = new_scope_surt
|
||||||
|
|
||||||
def is_in_scope(self, surt_, parent_page=None):
|
def is_in_scope(self, surt_, parent_page=None):
|
||||||
if (parent_page and "max_hops" in self.scope
|
if not surt_.startswith("http://") and not surt_.startswith("https://"):
|
||||||
|
# XXX doesn't belong here maybe (where? worker ignores unknown
|
||||||
|
# schemes?)
|
||||||
|
return False
|
||||||
|
elif (parent_page and "max_hops" in self.scope
|
||||||
and parent_page.hops_from_seed >= self.scope["max_hops"]):
|
and parent_page.hops_from_seed >= self.scope["max_hops"]):
|
||||||
return False
|
return False
|
||||||
elif surt_.startswith(self.scope["surt"]):
|
elif surt_.startswith(self.scope["surt"]):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user