convert command-line executables to entry_points console_scripts, best practice according to Python Packaging Authority (eases testing, etc)

This commit is contained in:
Noah Levitt 2016-06-27 14:12:56 -05:00
parent e4f8efe376
commit 89474cb430
7 changed files with 291 additions and 330 deletions

View file

@ -1,79 +0,0 @@
#!/usr/bin/env python
#
# brozzle-page - command line utility for brozzling a single page, i.e. opening
# it in a browser, running some javascript behaviors, and printing outlinks
#
# Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import argparse
import os
import sys
import logging
import brozzler
import re
import warnings
import requests
import string
import datetime
arg_parser = argparse.ArgumentParser(prog=os.path.basename(__file__),
description="brozzle-page - brozzle a single page",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
arg_parser.add_argument('url', metavar='URL', help='page url')
arg_parser.add_argument('-e', '--executable', dest='chrome_exe', default='chromium-browser',
help='executable to use to invoke chrome')
arg_parser.add_argument("--proxy", dest="proxy", default=None, help="http proxy for this site")
arg_parser.add_argument('--enable-warcprox-features', dest='enable_warcprox_features',
action='store_true', help='enable special features for this site that assume the configured proxy is warcprox')
arg_parser.add_argument("-v", "--verbose", dest="log_level",
action="store_const", default=logging.INFO, const=logging.DEBUG)
arg_parser.add_argument("--version", action="version",
version="brozzler {} - {}".format(brozzler.__version__, os.path.basename(__file__)))
args = arg_parser.parse_args(args=sys.argv[1:])
logging.basicConfig(stream=sys.stdout, level=args.log_level,
format="%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s")
logging.getLogger("requests.packages.urllib3").setLevel(logging.WARN)
warnings.simplefilter("ignore", category=requests.packages.urllib3.exceptions.InsecureRequestWarning)
warnings.simplefilter("ignore", category=requests.packages.urllib3.exceptions.InsecurePlatformWarning)
site = brozzler.Site(
id=-1, seed=args.url, proxy=args.proxy,
enable_warcprox_features=args.enable_warcprox_features)
page = brozzler.Page(url=args.url, site_id=site.id)
worker = brozzler.BrozzlerWorker(frontier=None)
ydl = worker._youtube_dl(site)
def on_screenshot(screenshot_png):
OK_CHARS = (string.ascii_letters + string.digits)
filename = "/tmp/{}-{:%Y%m%d%H%M%S}.png".format(
"".join(ch if ch in OK_CHARS else "_" for ch in args.url),
datetime.datetime.now())
# logging.info("len(screenshot_png)=%s", len(screenshot_png))
with open(filename, 'wb') as f:
f.write(screenshot_png)
logging.info("wrote screenshot to %s", filename)
browser = brozzler.Browser(chrome_exe=args.chrome_exe)
browser.start(proxy=site.proxy)
try:
outlinks = worker.brozzle_page(
browser, ydl, site, page, on_screenshot=on_screenshot)
logging.info("outlinks: \n\t%s", "\n\t".join(sorted(outlinks)))
except brozzler.ReachedLimit as e:
logging.error("reached limit %s", e)
finally:
browser.stop()

View file

@ -1,56 +0,0 @@
#!/usr/bin/env python
#
# brozzler-new-job - takes a yaml brozzler job configuration file, creates
# job, sites, and pages objects in rethinkdb, which brozzler-workers will look
# at and start crawling
#
# Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import argparse
import os
import sys
import logging
import brozzler
import yaml
import json
import rethinkstuff
import warnings
import requests
arg_parser = argparse.ArgumentParser(prog=os.path.basename(__file__),
description="brozzler-new-job - queue new job with brozzler",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
arg_parser.add_argument('job_conf_file', metavar='JOB_CONF_FILE', help='brozzler job configuration file in yaml')
arg_parser.add_argument('--rethinkdb-servers', dest='rethinkdb_servers', default="localhost",
help='rethinkdb servers, e.g. db0.foo.org,db0.foo.org:38015,db1.foo.org')
arg_parser.add_argument('--rethinkdb-db', dest='rethinkdb_db', default="brozzler",
help='rethinkdb database name')
arg_parser.add_argument("-v", "--verbose", dest="log_level",
action="store_const", default=logging.INFO, const=logging.DEBUG)
arg_parser.add_argument("--version", action="version",
version="brozzler {} - {}".format(brozzler.__version__, os.path.basename(__file__)))
args = arg_parser.parse_args(args=sys.argv[1:])
logging.basicConfig(stream=sys.stdout, level=args.log_level,
format="%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s")
logging.getLogger("requests.packages.urllib3").setLevel(logging.WARN)
warnings.simplefilter("ignore", category=requests.packages.urllib3.exceptions.InsecureRequestWarning)
warnings.simplefilter("ignore", category=requests.packages.urllib3.exceptions.InsecurePlatformWarning)
r = rethinkstuff.Rethinker(args.rethinkdb_servers.split(","), args.rethinkdb_db)
frontier = brozzler.RethinkDbFrontier(r)
brozzler.job.new_job_file(frontier, args.job_conf_file)

View file

@ -1,72 +0,0 @@
#!/usr/bin/env python
#
# brozzler-new-site - takes a seed url and creates a site and page object in
# rethinkdb, which brozzler-workers will look at and start crawling
#
# Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import argparse
import os
import sys
import logging
import brozzler
import re
import rethinkstuff
import warnings
import requests
import json
arg_parser = argparse.ArgumentParser(prog=os.path.basename(__file__),
description="brozzler-new-site - register site to brozzle",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
arg_parser.add_argument('seed', metavar='SEED', help='seed url')
arg_parser.add_argument('--rethinkdb-servers', dest='rethinkdb_servers', default="localhost",
help='rethinkdb servers, e.g. db0.foo.org,db0.foo.org:38015,db1.foo.org')
arg_parser.add_argument('--rethinkdb-db', dest='rethinkdb_db', default="brozzler",
help='rethinkdb database name')
arg_parser.add_argument("--proxy", dest="proxy", default=None, help="http proxy for this site")
arg_parser.add_argument("--time-limit", dest="time_limit", default=None, help="time limit in seconds for this site")
arg_parser.add_argument("--ignore-robots", dest="ignore_robots",
action="store_true", help="ignore robots.txt for this site")
arg_parser.add_argument('--enable-warcprox-features', dest='enable_warcprox_features',
action='store_true', help='enable special features for this site that assume the configured proxy is warcprox')
arg_parser.add_argument(
'--warcprox-meta', dest='warcprox_meta',
help='Warcprox-Meta http request header to send with each request; '
'must be a json blob, ignored unless warcprox features are enabled')
arg_parser.add_argument("-v", "--verbose", dest="log_level",
action="store_const", default=logging.INFO, const=logging.DEBUG)
arg_parser.add_argument("--version", action="version",
version="brozzler {} - {}".format(brozzler.__version__, os.path.basename(__file__)))
args = arg_parser.parse_args(args=sys.argv[1:])
logging.basicConfig(stream=sys.stdout, level=args.log_level,
format="%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s")
logging.getLogger("requests.packages.urllib3").setLevel(logging.WARN)
warnings.simplefilter("ignore", category=requests.packages.urllib3.exceptions.InsecureRequestWarning)
warnings.simplefilter("ignore", category=requests.packages.urllib3.exceptions.InsecurePlatformWarning)
site = brozzler.Site(
seed=args.seed, proxy=args.proxy,
time_limit=int(args.time_limit) if args.time_limit else None,
ignore_robots=args.ignore_robots,
enable_warcprox_features=args.enable_warcprox_features,
warcprox_meta=json.loads(args.warcprox_meta) if args.warcprox_meta else None)
r = rethinkstuff.Rethinker(args.rethinkdb_servers.split(","), args.rethinkdb_db)
frontier = brozzler.RethinkDbFrontier(r)
brozzler.new_site(frontier, site)

View file

@ -1,101 +0,0 @@
#!/usr/bin/env python
#
# brozzler-worker - main entrypoint for brozzler, gets sites and pages to
# brozzle from rethinkdb, brozzles them
#
# Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import argparse
import os
import sys
import logging
import brozzler
import brozzler.worker
import threading
import time
import signal
import pprint
import traceback
import rethinkstuff
import warnings
import requests
arg_parser = argparse.ArgumentParser(prog=os.path.basename(__file__),
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
arg_parser.add_argument('--rethinkdb-servers', dest='rethinkdb_servers', default="localhost",
help='rethinkdb servers, e.g. db0.foo.org,db0.foo.org:38015,db1.foo.org')
arg_parser.add_argument('--rethinkdb-db', dest='rethinkdb_db', default="brozzler",
help='rethinkdb database name')
arg_parser.add_argument('-e', '--executable', dest='chrome_exe', default='chromium-browser',
help='executable to use to invoke chrome')
arg_parser.add_argument('-n', '--max-browsers', dest='max_browsers', default='1',
help='max number of chrome instances simultaneously browsing pages')
arg_parser.add_argument('-v', '--verbose', dest='log_level',
action="store_const", default=logging.INFO, const=logging.DEBUG)
arg_parser.add_argument('--version', action='version',
version="brozzler {} - {}".format(brozzler.__version__, os.path.basename(__file__)))
args = arg_parser.parse_args(args=sys.argv[1:])
logging.basicConfig(stream=sys.stdout, level=args.log_level,
format='%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s')
logging.getLogger("requests.packages.urllib3").setLevel(logging.WARN)
warnings.simplefilter("ignore", category=requests.packages.urllib3.exceptions.InsecureRequestWarning)
warnings.simplefilter("ignore", category=requests.packages.urllib3.exceptions.InsecurePlatformWarning)
def sigterm(signum, frame):
raise brozzler.ShutdownRequested('shutdown requested (caught SIGTERM)')
def sigint(signum, frame):
raise brozzler.ShutdownRequested('shutdown requested (caught SIGINT)')
def dump_state(signum, frame):
pp = pprint.PrettyPrinter(indent=4)
state_strs = []
for th in threading.enumerate():
state_strs.append(str(th))
stack = traceback.format_stack(sys._current_frames()[th.ident])
state_strs.append("".join(stack))
logging.warn("dumping state (caught signal {})\n{}".format(
signum, "\n".join(state_strs)))
signal.signal(signal.SIGQUIT, dump_state)
signal.signal(signal.SIGTERM, sigterm)
signal.signal(signal.SIGINT, sigint)
r = rethinkstuff.Rethinker(args.rethinkdb_servers.split(","), args.rethinkdb_db)
frontier = brozzler.RethinkDbFrontier(r)
service_registry = rethinkstuff.ServiceRegistry(r)
worker = brozzler.worker.BrozzlerWorker(
frontier, service_registry, max_browsers=int(args.max_browsers),
chrome_exe=args.chrome_exe)
worker_thread = worker.start()
try:
while worker_thread.is_alive():
time.sleep(0.5)
logging.critical("worker thread has died, shutting down")
except brozzler.ShutdownRequested as e:
pass
finally:
worker.shutdown_now()
for th in threading.enumerate():
if th != threading.current_thread():
th.join()
logging.info("all done, exiting")

266
brozzler/cli.py Normal file
View file

@ -0,0 +1,266 @@
#!/usr/bin/env python
'''
brozzler/cli.py - brozzler command line executables
Copyright (C) 2014-2016 Internet Archive
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
import argparse
import brozzler
import brozzler.worker
import datetime
import json
import logging
import os
import pprint
import re
import requests
import rethinkstuff
import signal
import string
import sys
import threading
import time
import traceback
import warnings
import yaml
def _add_common_options(arg_parser):
arg_parser.add_argument(
'-v', '--verbose', dest='log_level',
action='store_const', default=logging.INFO, const=logging.DEBUG)
arg_parser.add_argument(
'--version', action='version',
version='brozzler %s - %s' % (
brozzler.__version__, os.path.basename(sys.argv[0])))
def _add_rethinkdb_options(arg_parser):
arg_parser.add_argument(
'--rethinkdb-servers', dest='rethinkdb_servers',
default='localhost', help=(
'rethinkdb servers, e.g. '
'db0.foo.org,db0.foo.org:38015,db1.foo.org'))
arg_parser.add_argument(
'--rethinkdb-db', dest='rethinkdb_db', default='brozzler',
help='rethinkdb database name')
def _add_proxy_options(arg_parser):
arg_parser.add_argument(
'--proxy', dest='proxy', default=None, help='http proxy')
arg_parser.add_argument(
'--enable-warcprox-features', dest='enable_warcprox_features',
action='store_true', help=(
'enable special features that assume the configured proxy is '
'warcprox'))
def _configure_logging(args):
logging.basicConfig(
stream=sys.stderr, level=args.log_level,
format=(
'%(asctime)s %(process)d %(levelname)s %(threadName)s '
'%(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s'))
logging.getLogger('requests.packages.urllib3').setLevel(logging.WARN)
warnings.simplefilter(
'ignore', category=requests.packages.urllib3.exceptions.InsecureRequestWarning)
warnings.simplefilter(
'ignore', category=requests.packages.urllib3.exceptions.InsecurePlatformWarning)
def brozzle_page():
'''
Command line utility entry point for brozzling a single page. Opens url in
a browser, running some javascript behaviors, and prints outlinks.
'''
arg_parser = argparse.ArgumentParser(
prog=os.path.basename(sys.argv[0]),
description='brozzle-page - brozzle a single page',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
arg_parser.add_argument('url', metavar='URL', help='page url')
arg_parser.add_argument(
'-e', '--executable', dest='chrome_exe', default='chromium-browser',
help='executable to use to invoke chrome')
arg_parser.add_argument(
'--proxy', dest='proxy', default=None,
help='http proxy')
arg_parser.add_argument(
'--enable-warcprox-features', dest='enable_warcprox_features',
action='store_true', help=(
'enable special features that assume the configured proxy '
'is warcprox'))
_add_common_options(arg_parser)
args = arg_parser.parse_args(args=sys.argv[1:])
_configure_logging(args)
site = brozzler.Site(
id=-1, seed=args.url, proxy=args.proxy,
enable_warcprox_features=args.enable_warcprox_features)
page = brozzler.Page(url=args.url, site_id=site.id)
worker = brozzler.BrozzlerWorker(frontier=None)
ydl = worker._youtube_dl(site)
def on_screenshot(screenshot_png):
OK_CHARS = (string.ascii_letters + string.digits)
filename = '/tmp/{}-{:%Y%m%d%H%M%S}.png'.format(
''.join(ch if ch in OK_CHARS else '_' for ch in args.url),
datetime.datetime.now())
# logging.info('len(screenshot_png)=%s', len(screenshot_png))
with open(filename, 'wb') as f:
f.write(screenshot_png)
logging.info('wrote screenshot to %s', filename)
browser = brozzler.Browser(chrome_exe=args.chrome_exe)
browser.start(proxy=site.proxy)
try:
outlinks = worker.brozzle_page(
browser, ydl, site, page, on_screenshot=on_screenshot)
logging.info('outlinks: \n\t%s', '\n\t'.join(sorted(outlinks)))
except brozzler.ReachedLimit as e:
logging.error('reached limit %s', e)
finally:
browser.stop()
def brozzler_new_job():
'''
Command line utility entry point for queuing a new brozzler job. Takes a
yaml brozzler job configuration file, creates job, sites, and pages objects
in rethinkdb, which brozzler-workers will look at and start crawling.
'''
arg_parser = argparse.ArgumentParser(
prog=os.path.basename(sys.argv[0]),
description='brozzler-new-job - queue new job with brozzler',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
arg_parser.add_argument(
'job_conf_file', metavar='JOB_CONF_FILE',
help='brozzler job configuration file in yaml')
_add_rethinkdb_options(arg_parser)
_add_common_options(arg_parser)
args = arg_parser.parse_args(args=sys.argv[1:])
_configure_logging(args)
r = rethinkstuff.Rethinker(
args.rethinkdb_servers.split(','), args.rethinkdb_db)
frontier = brozzler.RethinkDbFrontier(r)
brozzler.job.new_job_file(frontier, args.job_conf_file)
def brozzler_new_site():
'''
Command line utility entry point for queuing a new brozzler site.
Takes a seed url and creates a site and page object in rethinkdb, which
brozzler-workers will look at and start crawling.
'''
arg_parser = argparse.ArgumentParser(
prog=os.path.basename(sys.argv[0]),
description='brozzler-new-site - register site to brozzle',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
arg_parser.add_argument('seed', metavar='SEED', help='seed url')
_add_rethinkdb_options(arg_parser)
_add_proxy_options(arg_parser)
arg_parser.add_argument(
'--time-limit', dest='time_limit', default=None,
help='time limit in seconds for this site')
arg_parser.add_argument(
'--ignore-robots', dest='ignore_robots', action='store_true',
help='ignore robots.txt for this site')
arg_parser.add_argument(
'--warcprox-meta', dest='warcprox_meta',
help=(
'Warcprox-Meta http request header to send with each request; '
'must be a json blob, ignored unless warcprox features are '
'enabled'))
_add_common_options(arg_parser)
args = arg_parser.parse_args(args=sys.argv[1:])
_configure_logging(args)
site = brozzler.Site(
seed=args.seed, proxy=args.proxy,
time_limit=int(args.time_limit) if args.time_limit else None,
ignore_robots=args.ignore_robots,
enable_warcprox_features=args.enable_warcprox_features,
warcprox_meta=(
json.loads(args.warcprox_meta) if args.warcprox_meta else None))
r = rethinkstuff.Rethinker(
args.rethinkdb_servers.split(","), args.rethinkdb_db)
frontier = brozzler.RethinkDbFrontier(r)
brozzler.new_site(frontier, site)
def brozzler_worker():
'''
Main entrypoint for brozzler, gets sites and pages to brozzle from
rethinkdb, brozzles them.
'''
arg_parser = argparse.ArgumentParser(
prog=os.path.basename(__file__),
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
_add_rethinkdb_options(arg_parser)
arg_parser.add_argument(
'-e', '--executable', dest='chrome_exe', default='chromium-browser',
help='executable to use to invoke chrome')
arg_parser.add_argument(
'-n', '--max-browsers', dest='max_browsers', default='1',
help='max number of chrome instances simultaneously browsing pages')
_add_common_options(arg_parser)
args = arg_parser.parse_args(args=sys.argv[1:])
_configure_logging(args)
def sigterm(signum, frame):
raise brozzler.ShutdownRequested('shutdown requested (caught SIGTERM)')
def sigint(signum, frame):
raise brozzler.ShutdownRequested('shutdown requested (caught SIGINT)')
def dump_state(signum, frame):
pp = pprint.PrettyPrinter(indent=4)
state_strs = []
for th in threading.enumerate():
state_strs.append(str(th))
stack = traceback.format_stack(sys._current_frames()[th.ident])
state_strs.append("".join(stack))
logging.warn("dumping state (caught signal {})\n{}".format(
signum, "\n".join(state_strs)))
signal.signal(signal.SIGQUIT, dump_state)
signal.signal(signal.SIGTERM, sigterm)
signal.signal(signal.SIGINT, sigint)
r = rethinkstuff.Rethinker(
args.rethinkdb_servers.split(","), args.rethinkdb_db)
frontier = brozzler.RethinkDbFrontier(r)
service_registry = rethinkstuff.ServiceRegistry(r)
worker = brozzler.worker.BrozzlerWorker(
frontier, service_registry, max_browsers=int(args.max_browsers),
chrome_exe=args.chrome_exe)
worker_thread = worker.start()
try:
while worker_thread.is_alive():
time.sleep(0.5)
logging.critical("worker thread has died, shutting down")
except brozzler.ShutdownRequested as e:
pass
finally:
worker.shutdown_now()
for th in threading.enumerate():
if th != threading.current_thread():
th.join()
logging.info("brozzler-worker is all done, exiting")

View file

@ -1,5 +1,5 @@
''' '''
brozzler-webconsole/__init__.py - flask app for brozzler web console, defines brozzler/webconsole/__init__.py - flask app for brozzler web console, defines
api endspoints etc api endspoints etc
Copyright (C) 2014-2016 Internet Archive Copyright (C) 2014-2016 Internet Archive

View file

@ -1,27 +1,27 @@
# #!/usr/bin/env python
# setup.py - brozzler setup script '''
# setup.py - brozzler setup script
# Copyright (C) 2014-2016 Internet Archive
# Copyright (C) 2014-2016 Internet Archive
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. Licensed under the Apache License, Version 2.0 (the "License");
# You may obtain a copy of the License at you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, Unless required by applicable law or agreed to in writing, software
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# limitations under the License. See the License for the specific language governing permissions and
# limitations under the License.
'''
import setuptools import setuptools
import glob
setuptools.setup( setuptools.setup(
name='brozzler', name='brozzler',
version='1.1.dev21', version='1.1.dev22',
description='Distributed web crawling with browsers', description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler', url='https://github.com/internetarchive/brozzler',
author='Noah Levitt', author='Noah Levitt',
@ -30,10 +30,13 @@ setuptools.setup(
license='Apache License 2.0', license='Apache License 2.0',
packages=['brozzler'], packages=['brozzler'],
package_data={'brozzler': ['behaviors.d/*.js*', 'behaviors.yaml']}, package_data={'brozzler': ['behaviors.d/*.js*', 'behaviors.yaml']},
scripts=glob.glob('bin/*'),
entry_points={ entry_points={
'console_scripts': [ 'console_scripts': [
'brozzler-webconsole = brozzler.webconsole:run', 'brozzle-page=brozzler.cli:brozzle_page',
'brozzler-new-job=brozzler.cli:brozzler_new_job',
'brozzler-new-site=brozzler.cli:brozzler_new_site',
'brozzler-worker=brozzler.cli:brozzler_worker',
'brozzler-webconsole=brozzler.webconsole:run',
], ],
}, },
install_requires=[ install_requires=[