mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 00:29:53 -05:00
Merge pull request #268 from galgeek/skip_ytdlp4pdf
@avdempsey approved elsewhere, for the record
This commit is contained in:
commit
8afe9b5014
@ -198,7 +198,7 @@ class BrozzlerWorker:
|
|||||||
self.logger.info("brozzling {}".format(page))
|
self.logger.info("brozzling {}".format(page))
|
||||||
ydl_fetches = None
|
ydl_fetches = None
|
||||||
outlinks = set()
|
outlinks = set()
|
||||||
if enable_youtube_dl:
|
if enable_youtube_dl and not page.url.lower().endswith(".pdf"):
|
||||||
try:
|
try:
|
||||||
ydl_fetches, outlinks = ydl.do_youtube_dl(self, site, page)
|
ydl_fetches, outlinks = ydl.do_youtube_dl(self, site, page)
|
||||||
except brozzler.ReachedLimit as e:
|
except brozzler.ReachedLimit as e:
|
||||||
|
6
setup.py
6
setup.py
@ -32,7 +32,7 @@ def find_package_data(package):
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.5.42',
|
version='1.5.43',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
@ -65,14 +65,14 @@ setuptools.setup(
|
|||||||
},
|
},
|
||||||
install_requires=[
|
install_requires=[
|
||||||
'PyYAML>=5.1',
|
'PyYAML>=5.1',
|
||||||
'yt_dlp',
|
'yt_dlp<2023.11.16',
|
||||||
'reppy==0.3.4',
|
'reppy==0.3.4',
|
||||||
'requests>=2.21',
|
'requests>=2.21',
|
||||||
'websocket-client>=0.39.0,<=0.48.0',
|
'websocket-client>=0.39.0,<=0.48.0',
|
||||||
'pillow>=5.2.0',
|
'pillow>=5.2.0',
|
||||||
'urlcanon>=0.1.dev23',
|
'urlcanon>=0.1.dev23',
|
||||||
'doublethink @ git+https://github.com/internetarchive/doublethink.git@Py311',
|
'doublethink @ git+https://github.com/internetarchive/doublethink.git@Py311',
|
||||||
'rethinkdb>=2.4.9,<2.5',
|
'rethinkdb<2.4.10',
|
||||||
'cerberus>=1.0.1',
|
'cerberus>=1.0.1',
|
||||||
'jinja2>=2.10',
|
'jinja2>=2.10',
|
||||||
'cryptography>=2.3',
|
'cryptography>=2.3',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user