mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-20 23:56:34 -04:00
remove temporary trace logging
This commit is contained in:
parent
b01227b986
commit
16bdffba50
@ -313,9 +313,6 @@ class RethinkDbFrontier:
|
||||
representing the same url but with possibly different metadata.
|
||||
'''
|
||||
existing_page.priority += fresh_page.priority
|
||||
self.logger.trace(
|
||||
'adding hashtags %r to existing hashtags %r for page %s',
|
||||
existing_page.hashtags, fresh_page.hashtags, fresh_page.url)
|
||||
existing_page.hashtags = list(set(
|
||||
(existing_page.hashtags or []) + (fresh_page.hashtags or [])))
|
||||
existing_page.hops_off = min(
|
||||
@ -371,8 +368,6 @@ class RethinkDbFrontier:
|
||||
# get existing pages from rethinkdb
|
||||
results = self.rr.table('pages').get_all(*fresh_pages.keys()).run()
|
||||
pages = {doc['id']: brozzler.Page(self.rr, doc) for doc in results}
|
||||
self.logger.trace('fresh_pages.keys()=%r', fresh_pages.keys())
|
||||
self.logger.trace('existing pages.keys()=%r', pages.keys())
|
||||
|
||||
# build list of pages to save, consisting of new pages, and existing
|
||||
# pages updated with higher priority and new hashtags
|
||||
@ -422,10 +417,6 @@ class RethinkDbFrontier:
|
||||
counts['added'], counts['updated'], counts['rejected'],
|
||||
counts['blocked'], parent_page)
|
||||
|
||||
for page_id in pages:
|
||||
page = self.rr.table('pages').get(page_id).run()
|
||||
self.logger.trace('retrieved page after save: %r', page)
|
||||
|
||||
def reached_limit(self, site, e):
|
||||
self.logger.info("reached_limit site=%s e=%s", site, e)
|
||||
assert isinstance(e, brozzler.ReachedLimit)
|
||||
|
@ -510,10 +510,7 @@ def test_seed_redirect(httpd):
|
||||
{'ssurt': '%s//%s:http:/site5/redirect/' % (local_address, httpd.server_port)},
|
||||
{'ssurt': '%s//%s:http:/site5/destination/' % (local_address, httpd.server_port)}]}
|
||||
|
||||
def test_hashtags(httpd, caplog):
|
||||
caplog.set_level(0) # https://docs.pytest.org/en/latest/logging.html
|
||||
logging.trace('here we are in test_hashtags')
|
||||
|
||||
def test_hashtags(httpd):
|
||||
test_id = 'test_hashtags-%s' % datetime.datetime.utcnow().isoformat()
|
||||
rr = doublethink.Rethinker('localhost', db='brozzler')
|
||||
seed_url = make_url(httpd, '/site7/')
|
||||
@ -532,8 +529,6 @@ def test_hashtags(httpd, caplog):
|
||||
site.refresh()
|
||||
assert site.status == 'FINISHED'
|
||||
|
||||
print(datetime.datetime.utcnow().isoformat() + ' finished brozzling site, loading pages from rethinkdb')
|
||||
|
||||
# check that we the page we expected
|
||||
pages = sorted(list(frontier.site_pages(site.id)), key=lambda p: p.url)
|
||||
assert len(pages) == 2
|
||||
|
@ -732,9 +732,7 @@ def test_hashtag_seed():
|
||||
assert pages[0].url == 'http://example.org/'
|
||||
assert pages[0].hashtags == ['#hash',]
|
||||
|
||||
def test_hashtag_links(caplog):
|
||||
caplog.set_level(0) # https://docs.pytest.org/en/latest/logging.html
|
||||
|
||||
def test_hashtag_links():
|
||||
rr = doublethink.Rethinker('localhost', db='test_hashtag_links')
|
||||
frontier = brozzler.RethinkDbFrontier(rr)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user