fix more tests for new approach sans scope['surt']

This commit is contained in:
Noah Levitt 2018-03-23 10:43:08 -07:00
parent 85a4757527
commit 05f8ab3495
2 changed files with 20 additions and 17 deletions

View File

@ -448,13 +448,14 @@ def test_login(httpd):
assert ('WARCPROX_WRITE_RECORD thumbnail:http://localhost:%s/site2/login.html' % httpd.server_port) in meth_url assert ('WARCPROX_WRITE_RECORD thumbnail:http://localhost:%s/site2/login.html' % httpd.server_port) in meth_url
def test_seed_redirect(httpd): def test_seed_redirect(httpd):
test_id = 'test_login-%s' % datetime.datetime.utcnow().isoformat() test_id = 'test_seed_redirect-%s' % datetime.datetime.utcnow().isoformat()
rr = doublethink.Rethinker('localhost', db='brozzler') rr = doublethink.Rethinker('localhost', db='brozzler')
seed_url = 'http://localhost:%s/site5/redirect/' % httpd.server_port seed_url = 'http://localhost:%s/site5/redirect/' % httpd.server_port
site = brozzler.Site(rr, { site = brozzler.Site(rr, {
'seed': 'http://localhost:%s/site5/redirect/' % httpd.server_port, 'seed': 'http://localhost:%s/site5/redirect/' % httpd.server_port,
'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}}) 'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}})
assert site.scope['surt'] == 'http://(localhost:%s,)/site5/redirect/' % httpd.server_port assert site.scope == {
'accepts': [{'ssurt': b'localhost,//%s:http:/site5/redirect/' % httpd.server_port}]}
frontier = brozzler.RethinkDbFrontier(rr) frontier = brozzler.RethinkDbFrontier(rr)
brozzler.new_site(frontier, site) brozzler.new_site(frontier, site)
@ -478,7 +479,9 @@ def test_seed_redirect(httpd):
assert pages[1].url == 'http://localhost:%s/site5/destination/page2.html' % httpd.server_port assert pages[1].url == 'http://localhost:%s/site5/destination/page2.html' % httpd.server_port
# check that scope has been updated properly # check that scope has been updated properly
assert site.scope['surt'] == 'http://(localhost:%s,)/site5/destination/' % httpd.server_port assert site.scope == {'accepts': [
{'ssurt': b'localhost,//%s:http:/site5/redirect/' % httpd.server_port},
{'ssurt': b'localhost,//%s:http:/site5/destination/' % httpd.server_port}]}
def test_hashtags(httpd): def test_hashtags(httpd):
test_id = 'test_hashtags-%s' % datetime.datetime.utcnow().isoformat() test_id = 'test_hashtags-%s' % datetime.datetime.utcnow().isoformat()

View File

@ -94,28 +94,28 @@ blocks:
'url': 'http://example.com/foo/bar?baz=quux#monkey', 'url': 'http://example.com/foo/bar?baz=quux#monkey',
'site_id': site.id}) 'site_id': site.id})
assert site.is_in_scope('http://example.com/foo/bar', page) assert site.accept_reject_or_neither('http://example.com/foo/bar', page) is True
assert not site.is_in_scope('http://example.com/foo/baz', page) assert site.accept_reject_or_neither('http://example.com/foo/baz', page) is None
assert not site.is_in_scope('http://foo.com/some.mp3', page) assert site.accept_reject_or_neither('http://foo.com/some.mp3', page) is None
assert site.is_in_scope('http://foo.com/blah/audio_file/some.mp3', page) assert site.accept_reject_or_neither('http://foo.com/blah/audio_file/some.mp3', page) is True
assert site.is_in_scope('http://a.b.vimeocdn.com/blahblah', page) assert site.accept_reject_or_neither('http://a.b.vimeocdn.com/blahblah', page) is True
assert not site.is_in_scope('https://a.b.vimeocdn.com/blahblah', page) assert site.accept_reject_or_neither('https://a.b.vimeocdn.com/blahblah', page) is None
assert site.is_in_scope('https://twitter.com/twit', page) assert site.accept_reject_or_neither('https://twitter.com/twit', page) is True
assert site.is_in_scope('https://twitter.com/twit?lang=en', page) assert site.accept_reject_or_neither('https://twitter.com/twit?lang=en', page) is True
assert not site.is_in_scope('https://twitter.com/twit?lang=es', page) assert site.accept_reject_or_neither('https://twitter.com/twit?lang=es', page) is False
assert site.is_in_scope('https://www.facebook.com/whatevz', page) assert site.accept_reject_or_neither('https://www.facebook.com/whatevz', page) is True
assert not site.is_in_scope( assert site.accept_reject_or_neither(
'https://www.youtube.com/watch?v=dUIn5OAPS5s', page) 'https://www.youtube.com/watch?v=dUIn5OAPS5s', page) is None
yt_user_page = brozzler.Page(None, { yt_user_page = brozzler.Page(None, {
'url': 'https://www.youtube.com/user/SonoraSantaneraVEVO', 'url': 'https://www.youtube.com/user/SonoraSantaneraVEVO',
'site_id': site.id, 'hops_from_seed': 10}) 'site_id': site.id, 'hops_from_seed': 10})
assert site.is_in_scope( assert site.accept_reject_or_neither(
'https://www.youtube.com/watch?v=dUIn5OAPS5s', yt_user_page) 'https://www.youtube.com/watch?v=dUIn5OAPS5s', yt_user_page) is True
def test_proxy_down(): def test_proxy_down():
''' '''