parameterize command line entry points and add tests of --version, a rudimentary check that the commands at least run

This commit is contained in:
Noah Levitt 2017-04-14 11:46:26 -07:00
parent b3cf746f53
commit fae60e9960
5 changed files with 121 additions and 49 deletions

View File

@ -39,7 +39,8 @@ import shutil
import base64 import base64
import rethinkdb as r import rethinkdb as r
def add_common_options(arg_parser): def add_common_options(arg_parser, argv=None):
argv = argv or sys.argv
arg_parser.add_argument( arg_parser.add_argument(
'-q', '--quiet', dest='log_level', action='store_const', '-q', '--quiet', dest='log_level', action='store_const',
default=logging.INFO, const=logging.WARN, help=( default=logging.INFO, const=logging.WARN, help=(
@ -58,7 +59,7 @@ def add_common_options(arg_parser):
arg_parser.add_argument( arg_parser.add_argument(
'--version', action='version', '--version', action='version',
version='brozzler %s - %s' % ( version='brozzler %s - %s' % (
brozzler.__version__, os.path.basename(sys.argv[0]))) brozzler.__version__, os.path.basename(argv[0])))
def add_rethinkdb_options(arg_parser): def add_rethinkdb_options(arg_parser):
arg_parser.add_argument( arg_parser.add_argument(
@ -124,13 +125,14 @@ class BetterArgumentDefaultsHelpFormatter(
else: else:
return super()._get_help_string(action) return super()._get_help_string(action)
def brozzle_page(): def brozzle_page(argv=None):
''' '''
Command line utility entry point for brozzling a single page. Opens url in Command line utility entry point for brozzling a single page. Opens url in
a browser, running some javascript behaviors, and prints outlinks. a browser, running some javascript behaviors, and prints outlinks.
''' '''
argv = argv or sys.argv
arg_parser = argparse.ArgumentParser( arg_parser = argparse.ArgumentParser(
prog=os.path.basename(sys.argv[0]), prog=os.path.basename(argv[0]),
description='brozzle-page - brozzle a single page', description='brozzle-page - brozzle a single page',
formatter_class=BetterArgumentDefaultsHelpFormatter) formatter_class=BetterArgumentDefaultsHelpFormatter)
arg_parser.add_argument('url', metavar='URL', help='page url') arg_parser.add_argument('url', metavar='URL', help='page url')
@ -152,9 +154,9 @@ def brozzle_page():
help='use this password to try to log in if a login form is found') help='use this password to try to log in if a login form is found')
arg_parser.add_argument( arg_parser.add_argument(
'--proxy', dest='proxy', default=None, help='http proxy') '--proxy', dest='proxy', default=None, help='http proxy')
add_common_options(arg_parser) add_common_options(arg_parser, argv)
args = arg_parser.parse_args(args=sys.argv[1:]) args = arg_parser.parse_args(args=argv[1:])
configure_logging(args) configure_logging(args)
behavior_parameters = {} behavior_parameters = {}
@ -187,23 +189,24 @@ def brozzle_page():
finally: finally:
browser.stop() browser.stop()
def brozzler_new_job(): def brozzler_new_job(argv=None):
''' '''
Command line utility entry point for queuing a new brozzler job. Takes a Command line utility entry point for queuing a new brozzler job. Takes a
yaml brozzler job configuration file, creates job, sites, and pages objects yaml brozzler job configuration file, creates job, sites, and pages objects
in rethinkdb, which brozzler-workers will look at and start crawling. in rethinkdb, which brozzler-workers will look at and start crawling.
''' '''
argv = argv or sys.argv
arg_parser = argparse.ArgumentParser( arg_parser = argparse.ArgumentParser(
prog=os.path.basename(sys.argv[0]), prog=os.path.basename(argv[0]),
description='brozzler-new-job - queue new job with brozzler', description='brozzler-new-job - queue new job with brozzler',
formatter_class=BetterArgumentDefaultsHelpFormatter) formatter_class=BetterArgumentDefaultsHelpFormatter)
arg_parser.add_argument( arg_parser.add_argument(
'job_conf_file', metavar='JOB_CONF_FILE', 'job_conf_file', metavar='JOB_CONF_FILE',
help='brozzler job configuration file in yaml') help='brozzler job configuration file in yaml')
add_rethinkdb_options(arg_parser) add_rethinkdb_options(arg_parser)
add_common_options(arg_parser) add_common_options(arg_parser, argv)
args = arg_parser.parse_args(args=sys.argv[1:]) args = arg_parser.parse_args(args=argv[1:])
configure_logging(args) configure_logging(args)
rr = rethinker(args) rr = rethinker(args)
@ -215,14 +218,15 @@ def brozzler_new_job():
print(' ' + yaml.dump(e.errors).rstrip().replace('\n', '\n '), file=sys.stderr) print(' ' + yaml.dump(e.errors).rstrip().replace('\n', '\n '), file=sys.stderr)
sys.exit(1) sys.exit(1)
def brozzler_new_site(): def brozzler_new_site(argv=None):
''' '''
Command line utility entry point for queuing a new brozzler site. Command line utility entry point for queuing a new brozzler site.
Takes a seed url and creates a site and page object in rethinkdb, which Takes a seed url and creates a site and page object in rethinkdb, which
brozzler-workers will look at and start crawling. brozzler-workers will look at and start crawling.
''' '''
argv = argv or sys.argv
arg_parser = argparse.ArgumentParser( arg_parser = argparse.ArgumentParser(
prog=os.path.basename(sys.argv[0]), prog=os.path.basename(argv[0]),
description='brozzler-new-site - register site to brozzle', description='brozzler-new-site - register site to brozzle',
formatter_class=BetterArgumentDefaultsHelpFormatter) formatter_class=BetterArgumentDefaultsHelpFormatter)
arg_parser.add_argument('seed', metavar='SEED', help='seed url') arg_parser.add_argument('seed', metavar='SEED', help='seed url')
@ -251,9 +255,9 @@ def brozzler_new_site():
arg_parser.add_argument( arg_parser.add_argument(
'--password', dest='password', default=None, '--password', dest='password', default=None,
help='use this password to try to log in if a login form is found') help='use this password to try to log in if a login form is found')
add_common_options(arg_parser) add_common_options(arg_parser, argv)
args = arg_parser.parse_args(args=sys.argv[1:]) args = arg_parser.parse_args(args=argv[1:])
configure_logging(args) configure_logging(args)
rr = rethinker(args) rr = rethinker(args)
@ -271,13 +275,14 @@ def brozzler_new_site():
frontier = brozzler.RethinkDbFrontier(rr) frontier = brozzler.RethinkDbFrontier(rr)
brozzler.new_site(frontier, site) brozzler.new_site(frontier, site)
def brozzler_worker(): def brozzler_worker(argv=None):
''' '''
Main entry point for brozzler, gets sites and pages to brozzle from Main entry point for brozzler, gets sites and pages to brozzle from
rethinkdb, brozzles them. rethinkdb, brozzles them.
''' '''
argv = argv or sys.argv
arg_parser = argparse.ArgumentParser( arg_parser = argparse.ArgumentParser(
prog=os.path.basename(sys.argv[0]), prog=os.path.basename(argv[0]),
formatter_class=BetterArgumentDefaultsHelpFormatter) formatter_class=BetterArgumentDefaultsHelpFormatter)
add_rethinkdb_options(arg_parser) add_rethinkdb_options(arg_parser)
arg_parser.add_argument( arg_parser.add_argument(
@ -294,9 +299,9 @@ def brozzler_worker():
help=( help=(
'when needed, choose an available instance of warcprox from ' 'when needed, choose an available instance of warcprox from '
'the rethinkdb service registry')) 'the rethinkdb service registry'))
add_common_options(arg_parser) add_common_options(arg_parser, argv)
args = arg_parser.parse_args(args=sys.argv[1:]) args = arg_parser.parse_args(args=argv[1:])
configure_logging(args) configure_logging(args)
def sigterm(signum, frame): def sigterm(signum, frame):
@ -341,7 +346,7 @@ def brozzler_worker():
logging.info('brozzler-worker is all done, exiting') logging.info('brozzler-worker is all done, exiting')
def brozzler_ensure_tables(): def brozzler_ensure_tables(argv=None):
''' '''
Creates rethinkdb tables if they don't already exist. Brozzler Creates rethinkdb tables if they don't already exist. Brozzler
(brozzler-worker, brozzler-new-job, etc) normally creates the tables it (brozzler-worker, brozzler-new-job, etc) normally creates the tables it
@ -349,13 +354,14 @@ def brozzler_ensure_tables():
the same time, you can end up with duplicate broken tables. So it's a good the same time, you can end up with duplicate broken tables. So it's a good
idea to use this utility at an early step when spinning up a cluster. idea to use this utility at an early step when spinning up a cluster.
''' '''
argv = argv or sys.argv
arg_parser = argparse.ArgumentParser( arg_parser = argparse.ArgumentParser(
prog=os.path.basename(sys.argv[0]), prog=os.path.basename(argv[0]),
formatter_class=BetterArgumentDefaultsHelpFormatter) formatter_class=BetterArgumentDefaultsHelpFormatter)
add_rethinkdb_options(arg_parser) add_rethinkdb_options(arg_parser)
add_common_options(arg_parser) add_common_options(arg_parser, argv)
args = arg_parser.parse_args(args=sys.argv[1:]) args = arg_parser.parse_args(args=argv[1:])
configure_logging(args) configure_logging(args)
rr = rethinker(args) rr = rethinker(args)
@ -375,9 +381,10 @@ class Jsonner(json.JSONEncoder):
else: else:
return json.JSONEncoder.default(self, o) return json.JSONEncoder.default(self, o)
def brozzler_list_jobs(): def brozzler_list_jobs(argv=None):
argv = argv or sys.argv
arg_parser = argparse.ArgumentParser( arg_parser = argparse.ArgumentParser(
prog=os.path.basename(sys.argv[0]), prog=os.path.basename(argv[0]),
formatter_class=BetterArgumentDefaultsHelpFormatter) formatter_class=BetterArgumentDefaultsHelpFormatter)
arg_parser.add_argument( arg_parser.add_argument(
'--yaml', dest='yaml', action='store_true', help=( '--yaml', dest='yaml', action='store_true', help=(
@ -393,9 +400,9 @@ def brozzler_list_jobs():
'--job', dest='job', metavar='JOB_ID', help=( '--job', dest='job', metavar='JOB_ID', help=(
'list only the specified job')) 'list only the specified job'))
add_rethinkdb_options(arg_parser) add_rethinkdb_options(arg_parser)
add_common_options(arg_parser) add_common_options(arg_parser, argv)
args = arg_parser.parse_args(args=sys.argv[1:]) args = arg_parser.parse_args(args=argv[1:])
configure_logging(args) configure_logging(args)
rr = rethinker(args) rr = rethinker(args)
@ -426,9 +433,10 @@ def brozzler_list_jobs():
for result in results: for result in results:
print(json.dumps(result, cls=Jsonner, indent=2)) print(json.dumps(result, cls=Jsonner, indent=2))
def brozzler_list_sites(): def brozzler_list_sites(argv=None):
argv = argv or sys.argv
arg_parser = argparse.ArgumentParser( arg_parser = argparse.ArgumentParser(
prog=os.path.basename(sys.argv[0]), prog=os.path.basename(argv[0]),
formatter_class=BetterArgumentDefaultsHelpFormatter) formatter_class=BetterArgumentDefaultsHelpFormatter)
arg_parser.add_argument( arg_parser.add_argument(
'--yaml', dest='yaml', action='store_true', help=( '--yaml', dest='yaml', action='store_true', help=(
@ -450,9 +458,9 @@ def brozzler_list_sites():
'--all', dest='all', action='store_true', help=( '--all', dest='all', action='store_true', help=(
'list all sites')) 'list all sites'))
add_rethinkdb_options(arg_parser) add_rethinkdb_options(arg_parser)
add_common_options(arg_parser) add_common_options(arg_parser, argv)
args = arg_parser.parse_args(args=sys.argv[1:]) args = arg_parser.parse_args(args=argv[1:])
configure_logging(args) configure_logging(args)
rr = rethinker(args) rr = rethinker(args)
@ -478,9 +486,10 @@ def brozzler_list_sites():
for result in results: for result in results:
print(json.dumps(result, cls=Jsonner, indent=2)) print(json.dumps(result, cls=Jsonner, indent=2))
def brozzler_list_pages(): def brozzler_list_pages(argv=None):
argv = argv or sys.argv
arg_parser = argparse.ArgumentParser( arg_parser = argparse.ArgumentParser(
prog=os.path.basename(sys.argv[0]), prog=os.path.basename(argv[0]),
formatter_class=BetterArgumentDefaultsHelpFormatter) formatter_class=BetterArgumentDefaultsHelpFormatter)
arg_parser.add_argument( arg_parser.add_argument(
'--yaml', dest='yaml', action='store_true', help=( '--yaml', dest='yaml', action='store_true', help=(
@ -507,9 +516,9 @@ def brozzler_list_pages():
'limit to pages that are currently claimed by a brozzler ' 'limit to pages that are currently claimed by a brozzler '
'worker')) 'worker'))
add_rethinkdb_options(arg_parser) add_rethinkdb_options(arg_parser)
add_common_options(arg_parser) add_common_options(arg_parser, argv)
args = arg_parser.parse_args(args=sys.argv[1:]) args = arg_parser.parse_args(args=argv[1:])
configure_logging(args) configure_logging(args)
rr = rethinker(args) rr = rethinker(args)
@ -554,15 +563,16 @@ def brozzler_list_pages():
for result in results: for result in results:
print(json.dumps(result, cls=Jsonner, indent=2)) print(json.dumps(result, cls=Jsonner, indent=2))
def brozzler_list_captures(): def brozzler_list_captures(argv=None):
''' '''
Handy utility for looking up entries in the rethinkdb "captures" table by Handy utility for looking up entries in the rethinkdb "captures" table by
url or sha1. url or sha1.
''' '''
import urlcanon import urlcanon
argv = argv or sys.argv
arg_parser = argparse.ArgumentParser( arg_parser = argparse.ArgumentParser(
prog=os.path.basename(sys.argv[0]), prog=os.path.basename(argv[0]),
formatter_class=BetterArgumentDefaultsHelpFormatter) formatter_class=BetterArgumentDefaultsHelpFormatter)
arg_parser.add_argument( arg_parser.add_argument(
'-p', '--prefix', dest='prefix', action='store_true', help=( '-p', '--prefix', dest='prefix', action='store_true', help=(
@ -573,12 +583,12 @@ def brozzler_list_captures():
'--yaml', dest='yaml', action='store_true', help=( '--yaml', dest='yaml', action='store_true', help=(
'yaml output (default is json)')) 'yaml output (default is json)'))
add_rethinkdb_options(arg_parser) add_rethinkdb_options(arg_parser)
add_common_options(arg_parser) add_common_options(arg_parser, argv)
arg_parser.add_argument( arg_parser.add_argument(
'url_or_sha1', metavar='URL_or_SHA1', 'url_or_sha1', metavar='URL_or_SHA1',
help='url or sha1 to look up in captures table') help='url or sha1 to look up in captures table')
args = arg_parser.parse_args(args=sys.argv[1:]) args = arg_parser.parse_args(args=argv[1:])
configure_logging(args) configure_logging(args)
rr = rethinker(args) rr = rethinker(args)

View File

@ -270,11 +270,12 @@ except ImportError:
logging.info("running brozzler-dashboard using simple flask app.run") logging.info("running brozzler-dashboard using simple flask app.run")
app.run() app.run()
def main(): def main(argv=None):
import argparse import argparse
import brozzler.cli import brozzler.cli
argv = argv or sys.argv
arg_parser = argparse.ArgumentParser( arg_parser = argparse.ArgumentParser(
prog=os.path.basename(sys.argv[0]), prog=os.path.basename(argv[0]),
formatter_class=argparse.RawDescriptionHelpFormatter, formatter_class=argparse.RawDescriptionHelpFormatter,
description=( description=(
'brozzler-dashboard - web application for viewing brozzler ' 'brozzler-dashboard - web application for viewing brozzler '
@ -289,8 +290,8 @@ def main():
'(default: brozzler)\n' '(default: brozzler)\n'
' WAYBACK_BASEURL base url for constructing wayback ' ' WAYBACK_BASEURL base url for constructing wayback '
'links (default http://localhost:8880/brozzler)')) 'links (default http://localhost:8880/brozzler)'))
brozzler.cli.add_common_options(arg_parser) brozzler.cli.add_common_options(arg_parser, argv)
args = arg_parser.parse_args(args=sys.argv[1:]) args = arg_parser.parse_args(args=argv[1:])
brozzler.cli.configure_logging(args) brozzler.cli.configure_logging(args)
run() run()

View File

@ -46,10 +46,11 @@ import doublethink
import traceback import traceback
import socketserver import socketserver
def _build_arg_parser(prog=os.path.basename(sys.argv[0])): def _build_arg_parser(argv=None):
argv = argv or sys.argv
arg_parser = argparse.ArgumentParser( arg_parser = argparse.ArgumentParser(
formatter_class=brozzler.cli.BetterArgumentDefaultsHelpFormatter, formatter_class=brozzler.cli.BetterArgumentDefaultsHelpFormatter,
prog=prog, description=( prog=os.path.basename(argv[0]), description=(
'brozzler-easy - easy deployment of brozzler, with ' 'brozzler-easy - easy deployment of brozzler, with '
'brozzler-worker, warcprox, pywb, and brozzler-dashboard all ' 'brozzler-worker, warcprox, pywb, and brozzler-dashboard all '
'running in a single process')) 'running in a single process'))
@ -107,7 +108,7 @@ def _build_arg_parser(prog=os.path.basename(sys.argv[0])):
type=int, default=8881, help='brozzler dashboard port') type=int, default=8881, help='brozzler dashboard port')
# common at the bottom args # common at the bottom args
brozzler.cli.add_common_options(arg_parser) brozzler.cli.add_common_options(arg_parser, argv)
return arg_parser return arg_parser
@ -264,9 +265,10 @@ class BrozzlerEasyController:
logging.warn('dumping state (caught signal {})\n{}'.format( logging.warn('dumping state (caught signal {})\n{}'.format(
signum, '\n'.join(state_strs))) signum, '\n'.join(state_strs)))
def main(): def main(argv=None):
arg_parser = _build_arg_parser() argv = argv or sys.argv
args = arg_parser.parse_args(args=sys.argv[1:]) arg_parser = _build_arg_parser(argv)
args = arg_parser.parse_args(args=argv[1:])
brozzler.cli.configure_logging(args) brozzler.cli.configure_logging(args)
controller = BrozzlerEasyController(args) controller = BrozzlerEasyController(args)

View File

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup( setuptools.setup(
name='brozzler', name='brozzler',
version='1.1b11.dev227', version='1.1b11.dev228',
description='Distributed web crawling with browsers', description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler', url='https://github.com/internetarchive/brozzler',
author='Noah Levitt', author='Noah Levitt',

59
tests/test_cli.py Normal file
View File

@ -0,0 +1,59 @@
#!/usr/bin/env python
'''
test_cli.py - test brozzler commands
Copyright (C) 2017 Internet Archive
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
import brozzler.cli
import pkg_resources
import pytest
import subprocess
def cli_commands():
commands = set(pkg_resources.get_entry_map(
'brozzler')['console_scripts'].keys())
commands.remove('brozzler-wayback')
try:
import gunicorn
except ImportError:
commands.remove('brozzler-dashboard')
try:
import pywb
except ImportError:
commands.remove('brozzler-easy')
return commands
@pytest.mark.parametrize('cmd', cli_commands())
def test_call_entrypoint(capsys, cmd):
entrypoint = pkg_resources.get_entry_map(
'brozzler')['console_scripts'][cmd]
callable = entrypoint.resolve()
with pytest.raises(SystemExit):
callable(['/whatever/bin/%s' % cmd, '--version'])
out, err = capsys.readouterr()
assert out == 'brozzler %s - %s\n' % (brozzler.__version__, cmd)
assert err == ''
@pytest.mark.parametrize('cmd', cli_commands())
def test_run_command(capsys, cmd):
proc = subprocess.Popen(
[cmd, '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = proc.communicate()
assert out == ('brozzler %s - %s\n' % (
brozzler.__version__, cmd)).encode('ascii')
assert err == b''