least surprise on http/https seed redirects

if http://foo.com/ redirects to https://foo.com/a/b/c let's also
put all of https://foo.com/ in scope
This commit is contained in:
Noah Levitt 2018-12-21 15:17:31 -08:00
parent 6b8e597a43
commit a74f46dc53
2 changed files with 31 additions and 1 deletions

View file

@ -420,3 +420,22 @@ def test_needs_browsing():
assert not brozzler.worker.BrozzlerWorker._needs_browsing(
None, page, spy.fetches)
def test_seed_redirect():
site = brozzler.Site(None, {'seed': 'http://foo.com/'})
site.note_seed_redirect('https://foo.com/a/b/c')
assert site.scope == {'accepts': [
{'ssurt': 'com,foo,//http:/',},
{'ssurt': 'com,foo,//https:/',}]}
site = brozzler.Site(None, {'seed': 'https://foo.com/'})
site.note_seed_redirect('http://foo.com/a/b/c')
assert site.scope == {'accepts': [
{'ssurt': 'com,foo,//https:/',},
{'ssurt': 'com,foo,//http:/',}]}
site = brozzler.Site(None, {'seed': 'http://foo.com/'})
site.note_seed_redirect('https://bar.com/a/b/c')
assert site.scope == {'accepts': [
{'ssurt': 'com,foo,//http:/',},
{'ssurt': 'com,bar,//https:/a/b/c',}]}