Merge pull request #268 from galgeek/skip_ytdlp4pdf

@avdempsey approved elsewhere, for the record
This commit is contained in:
Barbara Miller 2023-12-12 11:39:11 -08:00 committed by GitHub
commit 8afe9b5014
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 4 additions and 4 deletions

View File

@ -198,7 +198,7 @@ class BrozzlerWorker:
self.logger.info("brozzling {}".format(page))
ydl_fetches = None
outlinks = set()
if enable_youtube_dl:
if enable_youtube_dl and not page.url.lower().endswith(".pdf"):
try:
ydl_fetches, outlinks = ydl.do_youtube_dl(self, site, page)
except brozzler.ReachedLimit as e:

View File

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup(
name='brozzler',
version='1.5.42',
version='1.5.43',
description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler',
author='Noah Levitt',
@ -65,14 +65,14 @@ setuptools.setup(
},
install_requires=[
'PyYAML>=5.1',
'yt_dlp',
'yt_dlp<2023.11.16',
'reppy==0.3.4',
'requests>=2.21',
'websocket-client>=0.39.0,<=0.48.0',
'pillow>=5.2.0',
'urlcanon>=0.1.dev23',
'doublethink @ git+https://github.com/internetarchive/doublethink.git@Py311',
'rethinkdb>=2.4.9,<2.5',
'rethinkdb<2.4.10',
'cerberus>=1.0.1',
'jinja2>=2.10',
'cryptography>=2.3',