diff --git a/brozzler/cli.py b/brozzler/cli.py index f33104b..f5f9dc1 100644 --- a/brozzler/cli.py +++ b/brozzler/cli.py @@ -58,7 +58,7 @@ def add_common_options(arg_parser): arg_parser.add_argument( '--version', action='version', version='brozzler %s - %s' % ( - brozzler.__version__, os.path.basename(argv[0]))) + brozzler.__version__, os.path.basename(sys.argv[0]))) def add_rethinkdb_options(arg_parser): arg_parser.add_argument( @@ -124,14 +124,13 @@ class BetterArgumentDefaultsHelpFormatter( else: return super()._get_help_string(action) -def brozzle_page(argv=None): +def brozzle_page(): ''' Command line utility entry point for brozzling a single page. Opens url in a browser, running some javascript behaviors, and prints outlinks. ''' - argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(argv[0]), + prog=os.path.basename(sys.argv[0]), description='brozzle-page - brozzle a single page', formatter_class=BetterArgumentDefaultsHelpFormatter) arg_parser.add_argument('url', metavar='URL', help='page url') @@ -155,7 +154,7 @@ def brozzle_page(argv=None): '--proxy', dest='proxy', default=None, help='http proxy') add_common_options(arg_parser) - args = arg_parser.parse_args(args=argv[1:]) + args = arg_parser.parse_args(args=sys.argv[1:]) configure_logging(args) behavior_parameters = {} @@ -188,15 +187,14 @@ def brozzle_page(argv=None): finally: browser.stop() -def brozzler_new_job(argv=None): +def brozzler_new_job(): ''' Command line utility entry point for queuing a new brozzler job. Takes a yaml brozzler job configuration file, creates job, sites, and pages objects in rethinkdb, which brozzler-workers will look at and start crawling. ''' - argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(argv[0]), + prog=os.path.basename(sys.argv[0]), description='brozzler-new-job - queue new job with brozzler', formatter_class=BetterArgumentDefaultsHelpFormatter) arg_parser.add_argument( @@ -205,7 +203,7 @@ def brozzler_new_job(argv=None): add_rethinkdb_options(arg_parser) add_common_options(arg_parser) - args = arg_parser.parse_args(args=argv[1:]) + args = arg_parser.parse_args(args=sys.argv[1:]) configure_logging(args) rr = rethinker(args) @@ -217,15 +215,14 @@ def brozzler_new_job(argv=None): print(' ' + yaml.dump(e.errors).rstrip().replace('\n', '\n '), file=sys.stderr) sys.exit(1) -def brozzler_new_site(argv=None): +def brozzler_new_site(): ''' Command line utility entry point for queuing a new brozzler site. Takes a seed url and creates a site and page object in rethinkdb, which brozzler-workers will look at and start crawling. ''' - argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(argv[0]), + prog=os.path.basename(sys.argv[0]), description='brozzler-new-site - register site to brozzle', formatter_class=BetterArgumentDefaultsHelpFormatter) arg_parser.add_argument('seed', metavar='SEED', help='seed url') @@ -256,7 +253,7 @@ def brozzler_new_site(argv=None): help='use this password to try to log in if a login form is found') add_common_options(arg_parser) - args = arg_parser.parse_args(args=argv[1:]) + args = arg_parser.parse_args(args=sys.argv[1:]) configure_logging(args) rr = rethinker(args) @@ -274,14 +271,13 @@ def brozzler_new_site(argv=None): frontier = brozzler.RethinkDbFrontier(rr) brozzler.new_site(frontier, site) -def brozzler_worker(argv=None): +def brozzler_worker(): ''' Main entry point for brozzler, gets sites and pages to brozzle from rethinkdb, brozzles them. ''' - argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(argv[0]), + prog=os.path.basename(sys.argv[0]), formatter_class=BetterArgumentDefaultsHelpFormatter) add_rethinkdb_options(arg_parser) arg_parser.add_argument( @@ -300,7 +296,7 @@ def brozzler_worker(argv=None): 'the rethinkdb service registry')) add_common_options(arg_parser) - args = arg_parser.parse_args(args=argv[1:]) + args = arg_parser.parse_args(args=sys.argv[1:]) configure_logging(args) def sigterm(signum, frame): @@ -345,7 +341,7 @@ def brozzler_worker(argv=None): logging.info('brozzler-worker is all done, exiting') -def brozzler_ensure_tables(argv=None): +def brozzler_ensure_tables(): ''' Creates rethinkdb tables if they don't already exist. Brozzler (brozzler-worker, brozzler-new-job, etc) normally creates the tables it @@ -353,14 +349,13 @@ def brozzler_ensure_tables(argv=None): the same time, you can end up with duplicate broken tables. So it's a good idea to use this utility at an early step when spinning up a cluster. ''' - argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(argv[0]), + prog=os.path.basename(sys.argv[0]), formatter_class=BetterArgumentDefaultsHelpFormatter) add_rethinkdb_options(arg_parser) add_common_options(arg_parser) - args = arg_parser.parse_args(args=argv[1:]) + args = arg_parser.parse_args(args=sys.argv[1:]) configure_logging(args) rr = rethinker(args) @@ -380,10 +375,9 @@ class Jsonner(json.JSONEncoder): else: return json.JSONEncoder.default(self, o) -def brozzler_list_jobs(argv=None): - argv = argv or sys.argv +def brozzler_list_jobs(): arg_parser = argparse.ArgumentParser( - prog=os.path.basename(argv[0]), + prog=os.path.basename(sys.argv[0]), formatter_class=BetterArgumentDefaultsHelpFormatter) arg_parser.add_argument( '--yaml', dest='yaml', action='store_true', help=( @@ -401,7 +395,7 @@ def brozzler_list_jobs(argv=None): add_rethinkdb_options(arg_parser) add_common_options(arg_parser) - args = arg_parser.parse_args(args=argv[1:]) + args = arg_parser.parse_args(args=sys.argv[1:]) configure_logging(args) rr = rethinker(args) @@ -432,10 +426,9 @@ def brozzler_list_jobs(argv=None): for result in results: print(json.dumps(result, cls=Jsonner, indent=2)) -def brozzler_list_sites(argv=None): - argv = argv or sys.argv +def brozzler_list_sites(): arg_parser = argparse.ArgumentParser( - prog=os.path.basename(argv[0]), + prog=os.path.basename(sys.argv[0]), formatter_class=BetterArgumentDefaultsHelpFormatter) arg_parser.add_argument( '--yaml', dest='yaml', action='store_true', help=( @@ -459,7 +452,7 @@ def brozzler_list_sites(argv=None): add_rethinkdb_options(arg_parser) add_common_options(arg_parser) - args = arg_parser.parse_args(args=argv[1:]) + args = arg_parser.parse_args(args=sys.argv[1:]) configure_logging(args) rr = rethinker(args) @@ -485,10 +478,9 @@ def brozzler_list_sites(argv=None): for result in results: print(json.dumps(result, cls=Jsonner, indent=2)) -def brozzler_list_pages(argv=None): - argv = argv or sys.argv +def brozzler_list_pages(): arg_parser = argparse.ArgumentParser( - prog=os.path.basename(argv[0]), + prog=os.path.basename(sys.argv[0]), formatter_class=BetterArgumentDefaultsHelpFormatter) arg_parser.add_argument( '--yaml', dest='yaml', action='store_true', help=( @@ -517,7 +509,7 @@ def brozzler_list_pages(argv=None): add_rethinkdb_options(arg_parser) add_common_options(arg_parser) - args = arg_parser.parse_args(args=argv[1:]) + args = arg_parser.parse_args(args=sys.argv[1:]) configure_logging(args) rr = rethinker(args) @@ -562,16 +554,15 @@ def brozzler_list_pages(argv=None): for result in results: print(json.dumps(result, cls=Jsonner, indent=2)) -def brozzler_list_captures(argv=None): +def brozzler_list_captures(): ''' Handy utility for looking up entries in the rethinkdb "captures" table by url or sha1. ''' import urlcanon - argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(argv[0]), + prog=os.path.basename(sys.argv[0]), formatter_class=BetterArgumentDefaultsHelpFormatter) arg_parser.add_argument( '-p', '--prefix', dest='prefix', action='store_true', help=( @@ -587,7 +578,7 @@ def brozzler_list_captures(argv=None): 'url_or_sha1', metavar='URL_or_SHA1', help='url or sha1 to look up in captures table') - args = arg_parser.parse_args(args=argv[1:]) + args = arg_parser.parse_args(args=sys.argv[1:]) configure_logging(args) rr = rethinker(args)