mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-21 16:16:28 -04:00
pass canonicalized url to youtube-dl
avoids this kind of error: wbgrp-svc294 2018-01-19 21:04:43,973 648 ERROR BrozzlingThread:39295 youtube_dl.to_stderr(YoutubeDL.py:514) ERROR: Unable to download webpage: <urlopen error no host given> (caused by URLError('no host given',)) wbgrp-svc294 2018-01-19 21:04:43,973 648 ERROR BrozzlingThread:39295 root.brozzle_site(worker.py:521) proxy error (site.proxy=wbgrp-svc400.us.archive.org:8002), will try to choose a healthy instance next time site is brozzled: youtube-dl hit apparent proxy error from https:/www.laphil.com/press1718
This commit is contained in:
parent
c22e81341a
commit
4ddd76f542
@ -290,8 +290,12 @@ class BrozzlerWorker:
|
||||
def _try_youtube_dl(self, ydl, site, page):
|
||||
try:
|
||||
self.logger.info("trying youtube-dl on {}".format(page))
|
||||
|
||||
with brozzler.thread_accept_exceptions():
|
||||
info = ydl.extract_info(page.url)
|
||||
# we do whatwg canonicalization here to avoid "<urlopen error
|
||||
# no host given>" resulting in ProxyError
|
||||
# needs automated test
|
||||
info = ydl.extract_info(urlcanon.whatwg(page.url))
|
||||
self._remember_videos(page, ydl.brozzler_spy)
|
||||
# logging.info('XXX %s', json.dumps(info))
|
||||
if self._using_warcprox(site):
|
||||
|
Loading…
x
Reference in New Issue
Block a user