initialize page.videos correctly in all cases

This commit is contained in:
Noah Levitt 2017-03-21 11:10:57 -07:00
parent eeee523b18
commit 95ba334b89
2 changed files with 6 additions and 5 deletions

View File

@ -200,7 +200,8 @@ class BrozzlerWorker:
e.getcode(), e.info()) e.getcode(), e.info())
def _remember_videos(self, page, ydl_spy): def _remember_videos(self, page, ydl_spy):
videos = [] if not 'videos' in page:
page.videos = []
for txn in ydl_spy.transactions: for txn in ydl_spy.transactions:
if (txn['response_headers'].get_content_type().startswith('video/') if (txn['response_headers'].get_content_type().startswith('video/')
and txn['method'] == 'GET' and txn['method'] == 'GET'
@ -218,14 +219,12 @@ class BrozzlerWorker:
video['content-range'] = txn[ video['content-range'] = txn[
'response_headers']['content-range'] 'response_headers']['content-range']
logging.debug('embedded video %s', video) logging.debug('embedded video %s', video)
videos.append(video) page.videos.append(video)
page.videos = videos
def _try_youtube_dl(self, ydl, site, page): def _try_youtube_dl(self, ydl, site, page):
try: try:
self.logger.info("trying youtube-dl on {}".format(page)) self.logger.info("trying youtube-dl on {}".format(page))
info = ydl.extract_info(page.url) info = ydl.extract_info(page.url)
self._remember_videos(page, ydl.brozzler_spy)
# logging.info('XXX %s', json.dumps(info)) # logging.info('XXX %s', json.dumps(info))
if self._proxy(site) and self._enable_warcprox_features(site): if self._proxy(site) and self._enable_warcprox_features(site):
info_json = json.dumps(info, sort_keys=True, indent=4) info_json = json.dumps(info, sort_keys=True, indent=4)
@ -344,6 +343,8 @@ class BrozzlerWorker:
if 'content-range' in response_headers: if 'content-range' in response_headers:
video['content-range'] = response_headers['content-range'] video['content-range'] = response_headers['content-range']
logging.debug('embedded video %s', video) logging.debug('embedded video %s', video)
if not 'videos' in page:
page.videos = []
page.videos.append(video) page.videos.append(video)
if not browser.is_running(): if not browser.is_running():

View File

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup( setuptools.setup(
name='brozzler', name='brozzler',
version='1.1b9.dev212', version='1.1b9.dev213',
description='Distributed web crawling with browsers', description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler', url='https://github.com/internetarchive/brozzler',
author='Noah Levitt', author='Noah Levitt',