From d192fc269eddeb8b06888e95bb6e4a6639e34415 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Wed, 5 Apr 2017 16:15:24 -0700 Subject: [PATCH] bump version number for last pull request --- brozzler/cli.py | 65 ++++++++++++++++++++++++++++--------------------- setup.py | 2 +- 2 files changed, 38 insertions(+), 29 deletions(-) diff --git a/brozzler/cli.py b/brozzler/cli.py index f5f9dc1..f33104b 100644 --- a/brozzler/cli.py +++ b/brozzler/cli.py @@ -58,7 +58,7 @@ def add_common_options(arg_parser): arg_parser.add_argument( '--version', action='version', version='brozzler %s - %s' % ( - brozzler.__version__, os.path.basename(sys.argv[0]))) + brozzler.__version__, os.path.basename(argv[0]))) def add_rethinkdb_options(arg_parser): arg_parser.add_argument( @@ -124,13 +124,14 @@ class BetterArgumentDefaultsHelpFormatter( else: return super()._get_help_string(action) -def brozzle_page(): +def brozzle_page(argv=None): ''' Command line utility entry point for brozzling a single page. Opens url in a browser, running some javascript behaviors, and prints outlinks. ''' + argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(sys.argv[0]), + prog=os.path.basename(argv[0]), description='brozzle-page - brozzle a single page', formatter_class=BetterArgumentDefaultsHelpFormatter) arg_parser.add_argument('url', metavar='URL', help='page url') @@ -154,7 +155,7 @@ def brozzle_page(): '--proxy', dest='proxy', default=None, help='http proxy') add_common_options(arg_parser) - args = arg_parser.parse_args(args=sys.argv[1:]) + args = arg_parser.parse_args(args=argv[1:]) configure_logging(args) behavior_parameters = {} @@ -187,14 +188,15 @@ def brozzle_page(): finally: browser.stop() -def brozzler_new_job(): +def brozzler_new_job(argv=None): ''' Command line utility entry point for queuing a new brozzler job. Takes a yaml brozzler job configuration file, creates job, sites, and pages objects in rethinkdb, which brozzler-workers will look at and start crawling. ''' + argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(sys.argv[0]), + prog=os.path.basename(argv[0]), description='brozzler-new-job - queue new job with brozzler', formatter_class=BetterArgumentDefaultsHelpFormatter) arg_parser.add_argument( @@ -203,7 +205,7 @@ def brozzler_new_job(): add_rethinkdb_options(arg_parser) add_common_options(arg_parser) - args = arg_parser.parse_args(args=sys.argv[1:]) + args = arg_parser.parse_args(args=argv[1:]) configure_logging(args) rr = rethinker(args) @@ -215,14 +217,15 @@ def brozzler_new_job(): print(' ' + yaml.dump(e.errors).rstrip().replace('\n', '\n '), file=sys.stderr) sys.exit(1) -def brozzler_new_site(): +def brozzler_new_site(argv=None): ''' Command line utility entry point for queuing a new brozzler site. Takes a seed url and creates a site and page object in rethinkdb, which brozzler-workers will look at and start crawling. ''' + argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(sys.argv[0]), + prog=os.path.basename(argv[0]), description='brozzler-new-site - register site to brozzle', formatter_class=BetterArgumentDefaultsHelpFormatter) arg_parser.add_argument('seed', metavar='SEED', help='seed url') @@ -253,7 +256,7 @@ def brozzler_new_site(): help='use this password to try to log in if a login form is found') add_common_options(arg_parser) - args = arg_parser.parse_args(args=sys.argv[1:]) + args = arg_parser.parse_args(args=argv[1:]) configure_logging(args) rr = rethinker(args) @@ -271,13 +274,14 @@ def brozzler_new_site(): frontier = brozzler.RethinkDbFrontier(rr) brozzler.new_site(frontier, site) -def brozzler_worker(): +def brozzler_worker(argv=None): ''' Main entry point for brozzler, gets sites and pages to brozzle from rethinkdb, brozzles them. ''' + argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(sys.argv[0]), + prog=os.path.basename(argv[0]), formatter_class=BetterArgumentDefaultsHelpFormatter) add_rethinkdb_options(arg_parser) arg_parser.add_argument( @@ -296,7 +300,7 @@ def brozzler_worker(): 'the rethinkdb service registry')) add_common_options(arg_parser) - args = arg_parser.parse_args(args=sys.argv[1:]) + args = arg_parser.parse_args(args=argv[1:]) configure_logging(args) def sigterm(signum, frame): @@ -341,7 +345,7 @@ def brozzler_worker(): logging.info('brozzler-worker is all done, exiting') -def brozzler_ensure_tables(): +def brozzler_ensure_tables(argv=None): ''' Creates rethinkdb tables if they don't already exist. Brozzler (brozzler-worker, brozzler-new-job, etc) normally creates the tables it @@ -349,13 +353,14 @@ def brozzler_ensure_tables(): the same time, you can end up with duplicate broken tables. So it's a good idea to use this utility at an early step when spinning up a cluster. ''' + argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(sys.argv[0]), + prog=os.path.basename(argv[0]), formatter_class=BetterArgumentDefaultsHelpFormatter) add_rethinkdb_options(arg_parser) add_common_options(arg_parser) - args = arg_parser.parse_args(args=sys.argv[1:]) + args = arg_parser.parse_args(args=argv[1:]) configure_logging(args) rr = rethinker(args) @@ -375,9 +380,10 @@ class Jsonner(json.JSONEncoder): else: return json.JSONEncoder.default(self, o) -def brozzler_list_jobs(): +def brozzler_list_jobs(argv=None): + argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(sys.argv[0]), + prog=os.path.basename(argv[0]), formatter_class=BetterArgumentDefaultsHelpFormatter) arg_parser.add_argument( '--yaml', dest='yaml', action='store_true', help=( @@ -395,7 +401,7 @@ def brozzler_list_jobs(): add_rethinkdb_options(arg_parser) add_common_options(arg_parser) - args = arg_parser.parse_args(args=sys.argv[1:]) + args = arg_parser.parse_args(args=argv[1:]) configure_logging(args) rr = rethinker(args) @@ -426,9 +432,10 @@ def brozzler_list_jobs(): for result in results: print(json.dumps(result, cls=Jsonner, indent=2)) -def brozzler_list_sites(): +def brozzler_list_sites(argv=None): + argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(sys.argv[0]), + prog=os.path.basename(argv[0]), formatter_class=BetterArgumentDefaultsHelpFormatter) arg_parser.add_argument( '--yaml', dest='yaml', action='store_true', help=( @@ -452,7 +459,7 @@ def brozzler_list_sites(): add_rethinkdb_options(arg_parser) add_common_options(arg_parser) - args = arg_parser.parse_args(args=sys.argv[1:]) + args = arg_parser.parse_args(args=argv[1:]) configure_logging(args) rr = rethinker(args) @@ -478,9 +485,10 @@ def brozzler_list_sites(): for result in results: print(json.dumps(result, cls=Jsonner, indent=2)) -def brozzler_list_pages(): +def brozzler_list_pages(argv=None): + argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(sys.argv[0]), + prog=os.path.basename(argv[0]), formatter_class=BetterArgumentDefaultsHelpFormatter) arg_parser.add_argument( '--yaml', dest='yaml', action='store_true', help=( @@ -509,7 +517,7 @@ def brozzler_list_pages(): add_rethinkdb_options(arg_parser) add_common_options(arg_parser) - args = arg_parser.parse_args(args=sys.argv[1:]) + args = arg_parser.parse_args(args=argv[1:]) configure_logging(args) rr = rethinker(args) @@ -554,15 +562,16 @@ def brozzler_list_pages(): for result in results: print(json.dumps(result, cls=Jsonner, indent=2)) -def brozzler_list_captures(): +def brozzler_list_captures(argv=None): ''' Handy utility for looking up entries in the rethinkdb "captures" table by url or sha1. ''' import urlcanon + argv = argv or sys.argv arg_parser = argparse.ArgumentParser( - prog=os.path.basename(sys.argv[0]), + prog=os.path.basename(argv[0]), formatter_class=BetterArgumentDefaultsHelpFormatter) arg_parser.add_argument( '-p', '--prefix', dest='prefix', action='store_true', help=( @@ -578,7 +587,7 @@ def brozzler_list_captures(): 'url_or_sha1', metavar='URL_or_SHA1', help='url or sha1 to look up in captures table') - args = arg_parser.parse_args(args=sys.argv[1:]) + args = arg_parser.parse_args(args=argv[1:]) configure_logging(args) rr = rethinker(args) diff --git a/setup.py b/setup.py index 2182db2..26f1f18 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ def find_package_data(package): setuptools.setup( name='brozzler', - version='1.1b11.dev226', + version='1.1b11.dev227', description='Distributed web crawling with browsers', url='https://github.com/internetarchive/brozzler', author='Noah Levitt',