diff --git a/brozzler/worker.py b/brozzler/worker.py index 8805a25..ba93bec 100644 --- a/brozzler/worker.py +++ b/brozzler/worker.py @@ -291,8 +291,12 @@ class BrozzlerWorker: def _try_youtube_dl(self, ydl, site, page): try: self.logger.info("trying youtube-dl on {}".format(page)) + with brozzler.thread_accept_exceptions(): - info = ydl.extract_info(page.url) + # we do whatwg canonicalization here to avoid "" resulting in ProxyError + # needs automated test + info = ydl.extract_info(urlcanon.whatwg(page.url)) self._remember_videos(page, ydl.brozzler_spy) # logging.info('XXX %s', json.dumps(info)) if self._using_warcprox(site): diff --git a/setup.py b/setup.py index 3164919..f21dbef 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ def find_package_data(package): setuptools.setup( name='brozzler', - version='1.1b12.dev277', + version='1.1b12.dev278', description='Distributed web crawling with browsers', url='https://github.com/internetarchive/brozzler', author='Noah Levitt',