diff --git a/brozzler/worker.py b/brozzler/worker.py index 907ed61..6399040 100644 --- a/brozzler/worker.py +++ b/brozzler/worker.py @@ -198,7 +198,7 @@ class BrozzlerWorker: self.logger.info("brozzling {}".format(page)) ydl_fetches = None outlinks = set() - if enable_youtube_dl: + if enable_youtube_dl and not page.url.lower().endswith(".pdf"): try: ydl_fetches, outlinks = ydl.do_youtube_dl(self, site, page) except brozzler.ReachedLimit as e: diff --git a/setup.py b/setup.py index e176497..a1b89d8 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ def find_package_data(package): setuptools.setup( name='brozzler', - version='1.5.42', + version='1.5.43', description='Distributed web crawling with browsers', url='https://github.com/internetarchive/brozzler', author='Noah Levitt', @@ -65,14 +65,14 @@ setuptools.setup( }, install_requires=[ 'PyYAML>=5.1', - 'yt_dlp', + 'yt_dlp<2023.11.16', 'reppy==0.3.4', 'requests>=2.21', 'websocket-client>=0.39.0,<=0.48.0', 'pillow>=5.2.0', 'urlcanon>=0.1.dev23', 'doublethink @ git+https://github.com/internetarchive/doublethink.git@Py311', - 'rethinkdb>=2.4.9,<2.5', + 'rethinkdb<2.4.10', 'cerberus>=1.0.1', 'jinja2>=2.10', 'cryptography>=2.3',