tests expect outlinks to be a set

This commit is contained in:
Noah Levitt 2018-10-12 11:03:54 -07:00
parent 054ba6d7a0
commit 7497b7e5ac
3 changed files with 9 additions and 12 deletions

View File

@ -186,10 +186,10 @@ class BrozzlerWorker:
on_request=None, enable_youtube_dl=True): on_request=None, enable_youtube_dl=True):
self.logger.info("brozzling {}".format(page)) self.logger.info("brozzling {}".format(page))
ydl_fetches = None ydl_fetches = None
ydl_outlinks = [] outlinks = set()
if enable_youtube_dl: if enable_youtube_dl:
try: try:
ydl_fetches, ydl_outlinks = ydl.do_youtube_dl(self, site, page) ydl_fetches, outlinks = ydl.do_youtube_dl(self, site, page)
except brozzler.ReachedLimit as e: except brozzler.ReachedLimit as e:
raise raise
except brozzler.ShutdownRequested: except brozzler.ShutdownRequested:
@ -208,11 +208,11 @@ class BrozzlerWorker:
'youtube_dl raised exception on %s', page, 'youtube_dl raised exception on %s', page,
exc_info=True) exc_info=True)
browser_outlinks = []
if self._needs_browsing(page, ydl_fetches): if self._needs_browsing(page, ydl_fetches):
self.logger.info('needs browsing: %s', page) self.logger.info('needs browsing: %s', page)
browser_outlinks = self._browse_page( browser_outlinks = self._browse_page(
browser, site, page, on_screenshot, on_request) browser, site, page, on_screenshot, on_request)
outlinks.update(browser_outlinks)
else: else:
if not self._already_fetched(page, ydl_fetches): if not self._already_fetched(page, ydl_fetches):
self.logger.info('needs fetch: %s', page) self.logger.info('needs fetch: %s', page)
@ -220,10 +220,7 @@ class BrozzlerWorker:
else: else:
self.logger.info('already fetched: %s', page) self.logger.info('already fetched: %s', page)
outlinks = set() return outlinks
outlinks.update(ydl_outlinks)
outlinks.update(browser_outlinks)
return list(outlinks)
def _browse_page(self, browser, site, page, on_screenshot=None, on_request=None): def _browse_page(self, browser, site, page, on_screenshot=None, on_request=None):
def _on_screenshot(screenshot_png): def _on_screenshot(screenshot_png):

View File

@ -379,10 +379,10 @@ def do_youtube_dl(worker, site, page):
with tempfile.TemporaryDirectory(prefix='brzl-ydl-') as tempdir: with tempfile.TemporaryDirectory(prefix='brzl-ydl-') as tempdir:
ydl = _build_youtube_dl(worker, tempdir, site) ydl = _build_youtube_dl(worker, tempdir, site)
ie_result = _try_youtube_dl(worker, ydl, site, page) ie_result = _try_youtube_dl(worker, ydl, site, page)
outlinks = [] outlinks = set()
if ie_result['extractor'] == 'youtube:playlist': if ie_result and ie_result.get('extractor') == 'youtube:playlist':
# youtube watch pages as outlinks # youtube watch pages as outlinks
outlinks = ['https://www.youtube.com/watch?v=%s' % e['id'] outlinks = {'https://www.youtube.com/watch?v=%s' % e['id']
for e in ie_result.get('entries_no_dl', [])] for e in ie_result.get('entries_no_dl', [])}
# any outlinks for other cases? # any outlinks for other cases?
return ydl.fetch_spy.fetches, outlinks return ydl.fetch_spy.fetches, outlinks

View File

@ -2,7 +2,7 @@
''' '''
test_brozzling.py - XXX explain test_brozzling.py - XXX explain
Copyright (C) 2016-2017 Internet Archive Copyright (C) 2016-2018 Internet Archive
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.