mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-23 16:19:49 -05:00
Merge pull request #268 from galgeek/skip_ytdlp4pdf
@avdempsey approved elsewhere, for the record
This commit is contained in:
commit
8afe9b5014
@ -198,7 +198,7 @@ class BrozzlerWorker:
|
||||
self.logger.info("brozzling {}".format(page))
|
||||
ydl_fetches = None
|
||||
outlinks = set()
|
||||
if enable_youtube_dl:
|
||||
if enable_youtube_dl and not page.url.lower().endswith(".pdf"):
|
||||
try:
|
||||
ydl_fetches, outlinks = ydl.do_youtube_dl(self, site, page)
|
||||
except brozzler.ReachedLimit as e:
|
||||
|
6
setup.py
6
setup.py
@ -32,7 +32,7 @@ def find_package_data(package):
|
||||
|
||||
setuptools.setup(
|
||||
name='brozzler',
|
||||
version='1.5.42',
|
||||
version='1.5.43',
|
||||
description='Distributed web crawling with browsers',
|
||||
url='https://github.com/internetarchive/brozzler',
|
||||
author='Noah Levitt',
|
||||
@ -65,14 +65,14 @@ setuptools.setup(
|
||||
},
|
||||
install_requires=[
|
||||
'PyYAML>=5.1',
|
||||
'yt_dlp',
|
||||
'yt_dlp<2023.11.16',
|
||||
'reppy==0.3.4',
|
||||
'requests>=2.21',
|
||||
'websocket-client>=0.39.0,<=0.48.0',
|
||||
'pillow>=5.2.0',
|
||||
'urlcanon>=0.1.dev23',
|
||||
'doublethink @ git+https://github.com/internetarchive/doublethink.git@Py311',
|
||||
'rethinkdb>=2.4.9,<2.5',
|
||||
'rethinkdb<2.4.10',
|
||||
'cerberus>=1.0.1',
|
||||
'jinja2>=2.10',
|
||||
'cryptography>=2.3',
|
||||
|
Loading…
x
Reference in New Issue
Block a user