mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-07-19 21:18:46 -04:00
least surprise on http/https seed redirects
if http://foo.com/ redirects to https://foo.com/a/b/c let's also put all of https://foo.com/ in scope
This commit is contained in:
parent
6b8e597a43
commit
a74f46dc53
2 changed files with 31 additions and 1 deletions
|
@ -420,3 +420,22 @@ def test_needs_browsing():
|
|||
assert not brozzler.worker.BrozzlerWorker._needs_browsing(
|
||||
None, page, spy.fetches)
|
||||
|
||||
def test_seed_redirect():
|
||||
site = brozzler.Site(None, {'seed': 'http://foo.com/'})
|
||||
site.note_seed_redirect('https://foo.com/a/b/c')
|
||||
assert site.scope == {'accepts': [
|
||||
{'ssurt': 'com,foo,//http:/',},
|
||||
{'ssurt': 'com,foo,//https:/',}]}
|
||||
|
||||
site = brozzler.Site(None, {'seed': 'https://foo.com/'})
|
||||
site.note_seed_redirect('http://foo.com/a/b/c')
|
||||
assert site.scope == {'accepts': [
|
||||
{'ssurt': 'com,foo,//https:/',},
|
||||
{'ssurt': 'com,foo,//http:/',}]}
|
||||
|
||||
site = brozzler.Site(None, {'seed': 'http://foo.com/'})
|
||||
site.note_seed_redirect('https://bar.com/a/b/c')
|
||||
assert site.scope == {'accepts': [
|
||||
{'ssurt': 'com,foo,//http:/',},
|
||||
{'ssurt': 'com,bar,//https:/a/b/c',}]}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue