Merge pull request #146 from nlevitt/https-redirect

least surprise on http/https seed redirects
This commit is contained in:
Barbara Miller 2018-12-21 15:26:04 -08:00 committed by GitHub
commit e1ceb87ca2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 31 additions and 1 deletions

View File

@ -242,8 +242,19 @@ class Site(doublethink.Document, ElapsedMixIn):
self.scope["accepts"].append({"ssurt": ssurt})
def note_seed_redirect(self, url):
canon_seed_redirect = brozzler.site_surt_canon(url)
canon_seed = brozzler.site_surt_canon(self.seed)
# if http://foo.com/ redirects to https://foo.com/a/b/c let's also
# put all of https://foo.com/ in scope
if (canon_seed_redirect.authority == canon_seed.authority
and canon_seed_redirect.scheme != canon_seed.scheme):
canon_seed.scheme = canon_seed_redirect.scheme
self._accept_ssurt_if_not_redundant(
canon_seed.ssurt().decode('ascii'))
self._accept_ssurt_if_not_redundant(
brozzler.site_surt_canon(url).ssurt().decode('ascii'))
canon_seed_redirect.ssurt().decode('ascii'))
def extra_headers(self):
hdrs = {}

View File

@ -420,3 +420,22 @@ def test_needs_browsing():
assert not brozzler.worker.BrozzlerWorker._needs_browsing(
None, page, spy.fetches)
def test_seed_redirect():
site = brozzler.Site(None, {'seed': 'http://foo.com/'})
site.note_seed_redirect('https://foo.com/a/b/c')
assert site.scope == {'accepts': [
{'ssurt': 'com,foo,//http:/',},
{'ssurt': 'com,foo,//https:/',}]}
site = brozzler.Site(None, {'seed': 'https://foo.com/'})
site.note_seed_redirect('http://foo.com/a/b/c')
assert site.scope == {'accepts': [
{'ssurt': 'com,foo,//https:/',},
{'ssurt': 'com,foo,//http:/',}]}
site = brozzler.Site(None, {'seed': 'http://foo.com/'})
site.note_seed_redirect('https://bar.com/a/b/c')
assert site.scope == {'accepts': [
{'ssurt': 'com,foo,//http:/',},
{'ssurt': 'com,bar,//https:/a/b/c',}]}