mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-09-24 22:48:33 -04:00
Merge branch 'master' into qa
* master: pass canonicalized url to youtube-dl
This commit is contained in:
commit
190a159188
2 changed files with 6 additions and 2 deletions
|
@ -291,8 +291,12 @@ class BrozzlerWorker:
|
|||
def _try_youtube_dl(self, ydl, site, page):
|
||||
try:
|
||||
self.logger.info("trying youtube-dl on {}".format(page))
|
||||
|
||||
with brozzler.thread_accept_exceptions():
|
||||
info = ydl.extract_info(page.url)
|
||||
# we do whatwg canonicalization here to avoid "<urlopen error
|
||||
# no host given>" resulting in ProxyError
|
||||
# needs automated test
|
||||
info = ydl.extract_info(urlcanon.whatwg(page.url))
|
||||
self._remember_videos(page, ydl.brozzler_spy)
|
||||
# logging.info('XXX %s', json.dumps(info))
|
||||
if self._using_warcprox(site):
|
||||
|
|
2
setup.py
2
setup.py
|
@ -32,7 +32,7 @@ def find_package_data(package):
|
|||
|
||||
setuptools.setup(
|
||||
name='brozzler',
|
||||
version='1.1b12.dev277',
|
||||
version='1.1b12.dev278',
|
||||
description='Distributed web crawling with browsers',
|
||||
url='https://github.com/internetarchive/brozzler',
|
||||
author='Noah Levitt',
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue