From 371590b578d6748001948cda21bb735087cd307c Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 30 Jun 2016 15:16:04 -0500 Subject: [PATCH] command line utility brozzler-ensure-tables, creates rethinkdb tables if they don't already exist... brozzler normally creates them on demand at startup, but if multiple instances are starting up at the same time, you can end up with duplicate broken tables, so it's a good idea to use this utility when spinning up a cluster --- brozzler/cli.py | 33 +++++++++++++++++++++++++++++++++ setup.py | 3 ++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/brozzler/cli.py b/brozzler/cli.py index 9d4cca4..972d521 100644 --- a/brozzler/cli.py +++ b/brozzler/cli.py @@ -38,12 +38,18 @@ import warnings import yaml def _add_common_options(arg_parser): + arg_parser.add_argument( + '-q', '--quiet', dest='log_level', + action='store_const', default=logging.INFO, const=logging.WARN) arg_parser.add_argument( '-v', '--verbose', dest='log_level', action='store_const', default=logging.INFO, const=logging.DEBUG) arg_parser.add_argument( '--trace', dest='log_level', action='store_const', default=logging.INFO, const=brozzler.TRACE) + # arg_parser.add_argument( + # '-s', '--silent', dest='log_level', + # action='store_const', default=logging.INFO, const=logging.CRITICAL) arg_parser.add_argument( '--version', action='version', version='brozzler %s - %s' % ( @@ -266,3 +272,30 @@ def brozzler_worker(): th.join() logging.info("brozzler-worker is all done, exiting") + +def brozzler_ensure_tables(): + ''' + Creates rethinkdb tables if they don't already exist. Brozzler + (brozzler-worker, brozzler-new-job, etc) normally creates the tables it + needs on demand at startup, but if multiple instances are starting up at + the same time, you can end up with duplicate broken tables. So it's a good + idea to use this utility at an early step when spinning up a cluster. + ''' + arg_parser = argparse.ArgumentParser( + prog=os.path.basename(sys.argv[0]), + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + _add_rethinkdb_options(arg_parser) + _add_common_options(arg_parser) + + args = arg_parser.parse_args(args=sys.argv[1:]) + _configure_logging(args) + + r = rethinkstuff.Rethinker( + args.rethinkdb_servers.split(','), args.rethinkdb_db) + + # services table + rethinkstuff.ServiceRegistry(r) + + # sites, pages, jobs tables + brozzler.frontier.RethinkDbFrontier(r) + diff --git a/setup.py b/setup.py index 93d3cfb..0607dcb 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ import setuptools setuptools.setup( name='brozzler', - version='1.1.dev38', + version='1.1.dev39', description='Distributed web crawling with browsers', url='https://github.com/internetarchive/brozzler', author='Noah Levitt', @@ -36,6 +36,7 @@ setuptools.setup( 'brozzler-new-job=brozzler.cli:brozzler_new_job', 'brozzler-new-site=brozzler.cli:brozzler_new_site', 'brozzler-worker=brozzler.cli:brozzler_worker', + 'brozzler-ensure-tables=brozzler.cli:brozzler_ensure_tables', 'brozzler-webconsole=brozzler.webconsole:run', ], },