mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-19 03:28:04 -04:00
restore scoping out of urls with unsupported schemes
This commit is contained in:
parent
fee008266f
commit
1e52d1cf98
1 changed files with 5 additions and 1 deletions
|
@ -65,7 +65,11 @@ class Site(brozzler.BaseDictable):
|
|||
self.scope["surt"] = new_scope_surt
|
||||
|
||||
def is_in_scope(self, surt_, parent_page=None):
|
||||
if (parent_page and "max_hops" in self.scope
|
||||
if not surt_.startswith("http://") and not surt_.startswith("https://"):
|
||||
# XXX doesn't belong here maybe (where? worker ignores unknown
|
||||
# schemes?)
|
||||
return False
|
||||
elif (parent_page and "max_hops" in self.scope
|
||||
and parent_page.hops_from_seed >= self.scope["max_hops"]):
|
||||
return False
|
||||
elif surt_.startswith(self.scope["surt"]):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue