mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-20 23:56:34 -04:00
Merge pull request #146 from nlevitt/https-redirect
least surprise on http/https seed redirects
This commit is contained in:
commit
e1ceb87ca2
@ -242,8 +242,19 @@ class Site(doublethink.Document, ElapsedMixIn):
|
||||
self.scope["accepts"].append({"ssurt": ssurt})
|
||||
|
||||
def note_seed_redirect(self, url):
|
||||
canon_seed_redirect = brozzler.site_surt_canon(url)
|
||||
canon_seed = brozzler.site_surt_canon(self.seed)
|
||||
|
||||
# if http://foo.com/ redirects to https://foo.com/a/b/c let's also
|
||||
# put all of https://foo.com/ in scope
|
||||
if (canon_seed_redirect.authority == canon_seed.authority
|
||||
and canon_seed_redirect.scheme != canon_seed.scheme):
|
||||
canon_seed.scheme = canon_seed_redirect.scheme
|
||||
self._accept_ssurt_if_not_redundant(
|
||||
canon_seed.ssurt().decode('ascii'))
|
||||
|
||||
self._accept_ssurt_if_not_redundant(
|
||||
brozzler.site_surt_canon(url).ssurt().decode('ascii'))
|
||||
canon_seed_redirect.ssurt().decode('ascii'))
|
||||
|
||||
def extra_headers(self):
|
||||
hdrs = {}
|
||||
|
@ -420,3 +420,22 @@ def test_needs_browsing():
|
||||
assert not brozzler.worker.BrozzlerWorker._needs_browsing(
|
||||
None, page, spy.fetches)
|
||||
|
||||
def test_seed_redirect():
|
||||
site = brozzler.Site(None, {'seed': 'http://foo.com/'})
|
||||
site.note_seed_redirect('https://foo.com/a/b/c')
|
||||
assert site.scope == {'accepts': [
|
||||
{'ssurt': 'com,foo,//http:/',},
|
||||
{'ssurt': 'com,foo,//https:/',}]}
|
||||
|
||||
site = brozzler.Site(None, {'seed': 'https://foo.com/'})
|
||||
site.note_seed_redirect('http://foo.com/a/b/c')
|
||||
assert site.scope == {'accepts': [
|
||||
{'ssurt': 'com,foo,//https:/',},
|
||||
{'ssurt': 'com,foo,//http:/',}]}
|
||||
|
||||
site = brozzler.Site(None, {'seed': 'http://foo.com/'})
|
||||
site.note_seed_redirect('https://bar.com/a/b/c')
|
||||
assert site.scope == {'accepts': [
|
||||
{'ssurt': 'com,foo,//http:/',},
|
||||
{'ssurt': 'com,bar,//https:/a/b/c',}]}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user