brozzler/setup.py

104 lines
3.9 KiB
Python
Raw Normal View History

#!/usr/bin/env python
'''
setup.py - brozzler setup script
2018-03-22 17:06:46 -07:00
Copyright (C) 2014-2018 Internet Archive
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
2016-04-25 20:02:11 +00:00
import setuptools
2016-07-29 12:56:10 -05:00
import os
def find_package_data(package):
pkg_data = []
depth = len(package.split('.'))
path = os.path.join(*package.split('.'))
for dirpath, dirnames, filenames in os.walk(path):
if not os.path.exists(os.path.join(dirpath, '__init__.py')):
relpath = os.path.join(*dirpath.split(os.sep)[depth:])
pkg_data.extend(os.path.join(relpath, f) for f in filenames)
return pkg_data
2014-01-21 06:41:46 +00:00
setuptools.setup(
name='brozzler',
hopefully fixes lingering ydl concurrency issue which was causing awfulness like this: 2018-09-30 04:39:54,410 19101 INFO BrozzlingThread:58486 brozzler.ydl._build_youtube_dl.<locals>._YoutubeDL._push_stitched_up_vid_to_warcprox(ydl.py:164) pushing '0 - unknown' video stitched-up as application/octet-stream (228243844 bytes) to warcprox at wbgrp-svc408.us.archive.org:8000 with url youtube-dl:00001:https://www.facebook.com/CongresswomanRosaDeLauro/ 2018-09-30 04:39:58,092 19101 INFO BrozzlingThread:58486 brozzler.ydl._build_youtube_dl.<locals>._YoutubeDL._push_stitched_up_vid_to_warcprox(ydl.py:164) pushing '0 - 1080x607' video stitched-up as video/mp4 (228243844 bytes) to warcprox at wbgrp-svc045.us.archive.org:8001 with url youtube-dl:00037:https://instagram.com/p/BfJvqhfnQ0C/ 2018-09-30 04:40:05,120 19101 INFO BrozzlingThread:58486 brozzler.ydl._build_youtube_dl.<locals>._YoutubeDL._push_stitched_up_vid_to_warcprox(ydl.py:164) pushing '0 - unknown' video stitched-up as application/octet-stream (228243844 bytes) to warcprox at wbgrp-svc107.us.archive.org:8000 with url youtube-dl:00009:https://www.facebook.com/LDS 2018-09-30 04:40:09,450 19101 INFO BrozzlingThread:58486 brozzler.ydl._build_youtube_dl.<locals>._YoutubeDL._push_stitched_up_vid_to_warcprox(ydl.py:164) pushing '22 - 1280x720 (hd720)' video stitched-up as video/mp4 (228243844 bytes) to warcprox at wbgrp-svc407.us.archive.org:8000 with url youtube-dl:00048:https://www.youtube.com/watch?v=-gH28zrMmAM 2018-09-30 04:40:14,327 19101 INFO BrozzlingThread:58486 brozzler.ydl._build_youtube_dl.<locals>._YoutubeDL._push_stitched_up_vid_to_warcprox(ydl.py:164) pushing 'hls-2176 - 1280x720' video stitched-up as video/mp4 (228243844 bytes) to warcprox at wbgrp-svc108.us.archive.org:8000 with url youtube-dl:00001:https://twitter.com/RepTedLieu/status/1010212963897233408 2018-09-30 04:40:23,018 19101 INFO BrozzlingThread:58486 brozzler.ydl._build_youtube_dl.<locals>._YoutubeDL._push_stitched_up_vid_to_warcprox(ydl.py:164) pushing '0 - unknown' video stitched-up as application/octet-stream (228243844 bytes) to warcprox at wbgrp-svc048.us.archive.org:8001 with url youtube-dl:00005:https://www.facebook.com/SenDuckworth/ 2018-09-30 04:40:29,553 19101 INFO BrozzlingThread:58486 brozzler.ydl._build_youtube_dl.<locals>._YoutubeDL._push_stitched_up_vid_to_warcprox(ydl.py:164) pushing '0 - unknown' video stitched-up as application/octet-stream (228243844 bytes) to warcprox at wbgrp-svc045.us.archive.org:8000 with url youtube-dl:00009:http://www.facebook.com/repkathleenrice 2018-09-30 04:40:37,057 19101 INFO BrozzlingThread:58486 brozzler.ydl._build_youtube_dl.<locals>._YoutubeDL._push_stitched_up_vid_to_warcprox(ydl.py:164) pushing '22 - 1280x720 (hd720)' video stitched-up as video/mp4 (228243844 bytes) to warcprox at wbgrp-svc406.us.archive.org:8000 with url youtube-dl:00023:https://www.youtube.com/watch?v=MaamqVF87mE 2018-09-30 04:40:41,298 19101 INFO BrozzlingThread:58486 brozzler.ydl._build_youtube_dl.<locals>._YoutubeDL._push_stitched_up_vid_to_warcprox(ydl.py:164) pushing '22 - 1280x720 (hd720)' video stitched-up as video/mp4 (228243844 bytes) to warcprox at wbgrp-svc403.us.archive.org:8000 with url youtube-dl:00039:https://www.youtube.com/watch?v=pRpMp4H8El0 2018-09-30 04:40:45,613 19101 INFO BrozzlingThread:58486 brozzler.ydl._build_youtube_dl.<locals>._YoutubeDL._push_stitched_up_vid_to_warcprox(ydl.py:164) pushing 'hls-2176 - 1280x720' video stitched-up as video/mp4 (228243844 bytes) to warcprox at wbgrp-svc408.us.archive.org:8000 with url youtube-dl:00001:https://twitter.com/RepKevinCramer/status/999771072206639104 i.e. pushing the same stitched-up video to a bunch of wrong places :(
2018-10-11 13:40:57 -07:00
version='1.5.dev308',
description='Distributed web crawling with browsers',
2016-06-16 14:18:24 -05:00
url='https://github.com/internetarchive/brozzler',
author='Noah Levitt',
author_email='nlevitt@archive.org',
2018-06-25 19:06:12 +00:00
long_description=open('README.rst', mode='rb').read().decode('UTF-8'),
license='Apache License 2.0',
2016-11-04 17:46:23 -07:00
packages=['brozzler', 'brozzler.dashboard'],
2016-07-29 12:56:10 -05:00
package_data={
'brozzler': [
'js-templates/*.js*', 'behaviors.yaml', 'job_schema.yaml'],
2016-11-04 17:46:23 -07:00
'brozzler.dashboard': find_package_data('brozzler.dashboard'),
2016-07-29 12:56:10 -05:00
},
entry_points={
'console_scripts': [
'brozzle-page=brozzler.cli:brozzle_page',
'brozzler-new-job=brozzler.cli:brozzler_new_job',
'brozzler-new-site=brozzler.cli:brozzler_new_site',
'brozzler-worker=brozzler.cli:brozzler_worker',
'brozzler-ensure-tables=brozzler.cli:brozzler_ensure_tables',
'brozzler-list-captures=brozzler.cli:brozzler_list_captures',
'brozzler-list-jobs=brozzler.cli:brozzler_list_jobs',
'brozzler-list-sites=brozzler.cli:brozzler_list_sites',
'brozzler-list-pages=brozzler.cli:brozzler_list_pages',
'brozzler-stop-crawl=brozzler.cli:brozzler_stop_crawl',
2018-09-25 14:56:26 -07:00
'brozzler-purge=brozzler.cli:brozzler_purge',
2016-11-04 17:46:23 -07:00
'brozzler-dashboard=brozzler.dashboard:main',
'brozzler-easy=brozzler.easy:main',
'brozzler-wayback=brozzler.pywb:main',
],
},
install_requires=[
2018-09-04 10:52:34 -07:00
'PyYAML>=3.12',
'youtube-dl>=2018.7.21',
'reppy==0.3.4',
2018-09-04 10:52:34 -07:00
'requests>=2.18.4',
'websocket-client>=0.39.0,<=0.48.0',
2018-09-04 10:52:34 -07:00
'pillow>=5.2.0',
'urlcanon>=0.1.dev23',
2018-09-27 14:25:49 -07:00
'doublethink>=0.2.0.dev90',
2018-09-04 10:52:34 -07:00
'rethinkdb>=2.3',
'cerberus>=1.0.1',
'jinja2>=2.10',
'cryptography>=2.3',
'python-magic>=0.4.15',
],
extras_require={
'dashboard': [
2018-09-04 10:52:34 -07:00
'flask>=0.11',
'gunicorn>=19.8.1'
],
'easy': [
2018-09-04 10:52:34 -07:00
'warcprox>=2.4b2.dev173',
'pywb>=0.33.2,<2',
2018-09-04 10:52:34 -07:00
'flask>=0.11',
'gunicorn>=19.8.1'
],
},
2014-01-21 06:41:46 +00:00
zip_safe=False,
classifiers=[
2018-08-21 15:15:38 -07:00
'Development Status :: 5 - Production/Stable',
2014-01-21 06:41:46 +00:00
'Environment :: Console',
'License :: OSI Approved :: Apache Software License',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Topic :: Internet :: WWW/HTTP',
2014-01-21 06:41:46 +00:00
'Topic :: System :: Archiving',
])