diff --git a/brozzler/cli.py b/brozzler/cli.py index f5f9dc1..fbd274e 100644 --- a/brozzler/cli.py +++ b/brozzler/cli.py @@ -39,7 +39,8 @@ import shutil import base64 import rethinkdb as r -def add_common_options(arg_parser): +def add_common_options(arg_parser, argv=None): + argv = argv or sys.argv arg_parser.add_argument( '-q', '--quiet', dest='log_level', action='store_const', default=logging.INFO, const=logging.WARN, help=( @@ -58,7 +59,7 @@ def add_common_options(arg_parser): arg_parser.add_argument( '--version', action='version', version='brozzler %s - %s' % ( - brozzler.__version__, os.path.basename(sys.argv[0]))) + brozzler.__version__, os.path.basename(argv[0]))) def add_rethinkdb_options(arg_parser): arg_parser.add_argument( @@ -124,13 +125,14 @@ class BetterArgumentDefaultsHelpFormatter( else: return super()._get_help_string(action) -def brozzle_page(): +def brozzle_page(argv=None): ''' Command line utility entry point for brozzling a single page. Opens url in a browser, running some javascript behaviors, and prints outlinks. ''' + argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(sys.argv[0]), + prog=os.path.basename(argv[0]), description='brozzle-page - brozzle a single page', formatter_class=BetterArgumentDefaultsHelpFormatter) arg_parser.add_argument('url', metavar='URL', help='page url') @@ -152,9 +154,9 @@ def brozzle_page(): help='use this password to try to log in if a login form is found') arg_parser.add_argument( '--proxy', dest='proxy', default=None, help='http proxy') - add_common_options(arg_parser) + add_common_options(arg_parser, argv) - args = arg_parser.parse_args(args=sys.argv[1:]) + args = arg_parser.parse_args(args=argv[1:]) configure_logging(args) behavior_parameters = {} @@ -187,23 +189,24 @@ def brozzle_page(): finally: browser.stop() -def brozzler_new_job(): +def brozzler_new_job(argv=None): ''' Command line utility entry point for queuing a new brozzler job. Takes a yaml brozzler job configuration file, creates job, sites, and pages objects in rethinkdb, which brozzler-workers will look at and start crawling. ''' + argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(sys.argv[0]), + prog=os.path.basename(argv[0]), description='brozzler-new-job - queue new job with brozzler', formatter_class=BetterArgumentDefaultsHelpFormatter) arg_parser.add_argument( 'job_conf_file', metavar='JOB_CONF_FILE', help='brozzler job configuration file in yaml') add_rethinkdb_options(arg_parser) - add_common_options(arg_parser) + add_common_options(arg_parser, argv) - args = arg_parser.parse_args(args=sys.argv[1:]) + args = arg_parser.parse_args(args=argv[1:]) configure_logging(args) rr = rethinker(args) @@ -215,14 +218,15 @@ def brozzler_new_job(): print(' ' + yaml.dump(e.errors).rstrip().replace('\n', '\n '), file=sys.stderr) sys.exit(1) -def brozzler_new_site(): +def brozzler_new_site(argv=None): ''' Command line utility entry point for queuing a new brozzler site. Takes a seed url and creates a site and page object in rethinkdb, which brozzler-workers will look at and start crawling. ''' + argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(sys.argv[0]), + prog=os.path.basename(argv[0]), description='brozzler-new-site - register site to brozzle', formatter_class=BetterArgumentDefaultsHelpFormatter) arg_parser.add_argument('seed', metavar='SEED', help='seed url') @@ -251,9 +255,9 @@ def brozzler_new_site(): arg_parser.add_argument( '--password', dest='password', default=None, help='use this password to try to log in if a login form is found') - add_common_options(arg_parser) + add_common_options(arg_parser, argv) - args = arg_parser.parse_args(args=sys.argv[1:]) + args = arg_parser.parse_args(args=argv[1:]) configure_logging(args) rr = rethinker(args) @@ -271,13 +275,14 @@ def brozzler_new_site(): frontier = brozzler.RethinkDbFrontier(rr) brozzler.new_site(frontier, site) -def brozzler_worker(): +def brozzler_worker(argv=None): ''' Main entry point for brozzler, gets sites and pages to brozzle from rethinkdb, brozzles them. ''' + argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(sys.argv[0]), + prog=os.path.basename(argv[0]), formatter_class=BetterArgumentDefaultsHelpFormatter) add_rethinkdb_options(arg_parser) arg_parser.add_argument( @@ -294,9 +299,9 @@ def brozzler_worker(): help=( 'when needed, choose an available instance of warcprox from ' 'the rethinkdb service registry')) - add_common_options(arg_parser) + add_common_options(arg_parser, argv) - args = arg_parser.parse_args(args=sys.argv[1:]) + args = arg_parser.parse_args(args=argv[1:]) configure_logging(args) def sigterm(signum, frame): @@ -341,7 +346,7 @@ def brozzler_worker(): logging.info('brozzler-worker is all done, exiting') -def brozzler_ensure_tables(): +def brozzler_ensure_tables(argv=None): ''' Creates rethinkdb tables if they don't already exist. Brozzler (brozzler-worker, brozzler-new-job, etc) normally creates the tables it @@ -349,13 +354,14 @@ def brozzler_ensure_tables(): the same time, you can end up with duplicate broken tables. So it's a good idea to use this utility at an early step when spinning up a cluster. ''' + argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(sys.argv[0]), + prog=os.path.basename(argv[0]), formatter_class=BetterArgumentDefaultsHelpFormatter) add_rethinkdb_options(arg_parser) - add_common_options(arg_parser) + add_common_options(arg_parser, argv) - args = arg_parser.parse_args(args=sys.argv[1:]) + args = arg_parser.parse_args(args=argv[1:]) configure_logging(args) rr = rethinker(args) @@ -375,9 +381,10 @@ class Jsonner(json.JSONEncoder): else: return json.JSONEncoder.default(self, o) -def brozzler_list_jobs(): +def brozzler_list_jobs(argv=None): + argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(sys.argv[0]), + prog=os.path.basename(argv[0]), formatter_class=BetterArgumentDefaultsHelpFormatter) arg_parser.add_argument( '--yaml', dest='yaml', action='store_true', help=( @@ -393,9 +400,9 @@ def brozzler_list_jobs(): '--job', dest='job', metavar='JOB_ID', help=( 'list only the specified job')) add_rethinkdb_options(arg_parser) - add_common_options(arg_parser) + add_common_options(arg_parser, argv) - args = arg_parser.parse_args(args=sys.argv[1:]) + args = arg_parser.parse_args(args=argv[1:]) configure_logging(args) rr = rethinker(args) @@ -426,9 +433,10 @@ def brozzler_list_jobs(): for result in results: print(json.dumps(result, cls=Jsonner, indent=2)) -def brozzler_list_sites(): +def brozzler_list_sites(argv=None): + argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(sys.argv[0]), + prog=os.path.basename(argv[0]), formatter_class=BetterArgumentDefaultsHelpFormatter) arg_parser.add_argument( '--yaml', dest='yaml', action='store_true', help=( @@ -450,9 +458,9 @@ def brozzler_list_sites(): '--all', dest='all', action='store_true', help=( 'list all sites')) add_rethinkdb_options(arg_parser) - add_common_options(arg_parser) + add_common_options(arg_parser, argv) - args = arg_parser.parse_args(args=sys.argv[1:]) + args = arg_parser.parse_args(args=argv[1:]) configure_logging(args) rr = rethinker(args) @@ -478,9 +486,10 @@ def brozzler_list_sites(): for result in results: print(json.dumps(result, cls=Jsonner, indent=2)) -def brozzler_list_pages(): +def brozzler_list_pages(argv=None): + argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(sys.argv[0]), + prog=os.path.basename(argv[0]), formatter_class=BetterArgumentDefaultsHelpFormatter) arg_parser.add_argument( '--yaml', dest='yaml', action='store_true', help=( @@ -507,9 +516,9 @@ def brozzler_list_pages(): 'limit to pages that are currently claimed by a brozzler ' 'worker')) add_rethinkdb_options(arg_parser) - add_common_options(arg_parser) + add_common_options(arg_parser, argv) - args = arg_parser.parse_args(args=sys.argv[1:]) + args = arg_parser.parse_args(args=argv[1:]) configure_logging(args) rr = rethinker(args) @@ -554,15 +563,16 @@ def brozzler_list_pages(): for result in results: print(json.dumps(result, cls=Jsonner, indent=2)) -def brozzler_list_captures(): +def brozzler_list_captures(argv=None): ''' Handy utility for looking up entries in the rethinkdb "captures" table by url or sha1. ''' import urlcanon + argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(sys.argv[0]), + prog=os.path.basename(argv[0]), formatter_class=BetterArgumentDefaultsHelpFormatter) arg_parser.add_argument( '-p', '--prefix', dest='prefix', action='store_true', help=( @@ -573,12 +583,12 @@ def brozzler_list_captures(): '--yaml', dest='yaml', action='store_true', help=( 'yaml output (default is json)')) add_rethinkdb_options(arg_parser) - add_common_options(arg_parser) + add_common_options(arg_parser, argv) arg_parser.add_argument( 'url_or_sha1', metavar='URL_or_SHA1', help='url or sha1 to look up in captures table') - args = arg_parser.parse_args(args=sys.argv[1:]) + args = arg_parser.parse_args(args=argv[1:]) configure_logging(args) rr = rethinker(args) diff --git a/brozzler/dashboard/__init__.py b/brozzler/dashboard/__init__.py index 9efa8dc..54e74ec 100644 --- a/brozzler/dashboard/__init__.py +++ b/brozzler/dashboard/__init__.py @@ -270,11 +270,12 @@ except ImportError: logging.info("running brozzler-dashboard using simple flask app.run") app.run() -def main(): +def main(argv=None): import argparse import brozzler.cli + argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(sys.argv[0]), + prog=os.path.basename(argv[0]), formatter_class=argparse.RawDescriptionHelpFormatter, description=( 'brozzler-dashboard - web application for viewing brozzler ' @@ -289,8 +290,8 @@ def main(): '(default: brozzler)\n' ' WAYBACK_BASEURL base url for constructing wayback ' 'links (default http://localhost:8880/brozzler)')) - brozzler.cli.add_common_options(arg_parser) - args = arg_parser.parse_args(args=sys.argv[1:]) + brozzler.cli.add_common_options(arg_parser, argv) + args = arg_parser.parse_args(args=argv[1:]) brozzler.cli.configure_logging(args) run() diff --git a/brozzler/easy.py b/brozzler/easy.py index 6ba406b..e41f013 100644 --- a/brozzler/easy.py +++ b/brozzler/easy.py @@ -46,10 +46,11 @@ import doublethink import traceback import socketserver -def _build_arg_parser(prog=os.path.basename(sys.argv[0])): +def _build_arg_parser(argv=None): + argv = argv or sys.argv arg_parser = argparse.ArgumentParser( formatter_class=brozzler.cli.BetterArgumentDefaultsHelpFormatter, - prog=prog, description=( + prog=os.path.basename(argv[0]), description=( 'brozzler-easy - easy deployment of brozzler, with ' 'brozzler-worker, warcprox, pywb, and brozzler-dashboard all ' 'running in a single process')) @@ -107,7 +108,7 @@ def _build_arg_parser(prog=os.path.basename(sys.argv[0])): type=int, default=8881, help='brozzler dashboard port') # common at the bottom args - brozzler.cli.add_common_options(arg_parser) + brozzler.cli.add_common_options(arg_parser, argv) return arg_parser @@ -264,9 +265,10 @@ class BrozzlerEasyController: logging.warn('dumping state (caught signal {})\n{}'.format( signum, '\n'.join(state_strs))) -def main(): - arg_parser = _build_arg_parser() - args = arg_parser.parse_args(args=sys.argv[1:]) +def main(argv=None): + argv = argv or sys.argv + arg_parser = _build_arg_parser(argv) + args = arg_parser.parse_args(args=argv[1:]) brozzler.cli.configure_logging(args) controller = BrozzlerEasyController(args) diff --git a/setup.py b/setup.py index 26f1f18..e28e7c3 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ def find_package_data(package): setuptools.setup( name='brozzler', - version='1.1b11.dev227', + version='1.1b11.dev228', description='Distributed web crawling with browsers', url='https://github.com/internetarchive/brozzler', author='Noah Levitt', diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..5eec62a --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +''' +test_cli.py - test brozzler commands + +Copyright (C) 2017 Internet Archive + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +import brozzler.cli +import pkg_resources +import pytest +import subprocess + +def cli_commands(): + commands = set(pkg_resources.get_entry_map( + 'brozzler')['console_scripts'].keys()) + commands.remove('brozzler-wayback') + try: + import gunicorn + except ImportError: + commands.remove('brozzler-dashboard') + try: + import pywb + except ImportError: + commands.remove('brozzler-easy') + return commands + + +@pytest.mark.parametrize('cmd', cli_commands()) +def test_call_entrypoint(capsys, cmd): + entrypoint = pkg_resources.get_entry_map( + 'brozzler')['console_scripts'][cmd] + callable = entrypoint.resolve() + with pytest.raises(SystemExit): + callable(['/whatever/bin/%s' % cmd, '--version']) + out, err = capsys.readouterr() + assert out == 'brozzler %s - %s\n' % (brozzler.__version__, cmd) + assert err == '' + +@pytest.mark.parametrize('cmd', cli_commands()) +def test_run_command(capsys, cmd): + proc = subprocess.Popen( + [cmd, '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = proc.communicate() + assert out == ('brozzler %s - %s\n' % ( + brozzler.__version__, cmd)).encode('ascii') + assert err == b'' +