Merge branch 'master' into qa

* master:
  pass canonicalized url to youtube-dl
This commit is contained in:
Noah Levitt 2018-01-22 12:48:28 -08:00
commit 190a159188
2 changed files with 6 additions and 2 deletions

View File

@ -291,8 +291,12 @@ class BrozzlerWorker:
def _try_youtube_dl(self, ydl, site, page):
try:
self.logger.info("trying youtube-dl on {}".format(page))
with brozzler.thread_accept_exceptions():
info = ydl.extract_info(page.url)
# we do whatwg canonicalization here to avoid "<urlopen error
# no host given>" resulting in ProxyError
# needs automated test
info = ydl.extract_info(urlcanon.whatwg(page.url))
self._remember_videos(page, ydl.brozzler_spy)
# logging.info('XXX %s', json.dumps(info))
if self._using_warcprox(site):

View File

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup(
name='brozzler',
version='1.1b12.dev277',
version='1.1b12.dev278',
description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler',
author='Noah Levitt',