command line utility brozzler-ensure-tables, creates rethinkdb tables if they don't already exist... brozzler normally creates them on demand at startup, but if multiple instances are starting up at the same time, you can end up with duplicate broken tables, so it's a good idea to use this utility when spinning up a cluster

This commit is contained in:
Noah Levitt 2016-06-30 15:16:04 -05:00
parent 9fd78fdbe8
commit 371590b578
2 changed files with 35 additions and 1 deletions

View File

@ -38,12 +38,18 @@ import warnings
import yaml import yaml
def _add_common_options(arg_parser): def _add_common_options(arg_parser):
arg_parser.add_argument(
'-q', '--quiet', dest='log_level',
action='store_const', default=logging.INFO, const=logging.WARN)
arg_parser.add_argument( arg_parser.add_argument(
'-v', '--verbose', dest='log_level', '-v', '--verbose', dest='log_level',
action='store_const', default=logging.INFO, const=logging.DEBUG) action='store_const', default=logging.INFO, const=logging.DEBUG)
arg_parser.add_argument( arg_parser.add_argument(
'--trace', dest='log_level', '--trace', dest='log_level',
action='store_const', default=logging.INFO, const=brozzler.TRACE) action='store_const', default=logging.INFO, const=brozzler.TRACE)
# arg_parser.add_argument(
# '-s', '--silent', dest='log_level',
# action='store_const', default=logging.INFO, const=logging.CRITICAL)
arg_parser.add_argument( arg_parser.add_argument(
'--version', action='version', '--version', action='version',
version='brozzler %s - %s' % ( version='brozzler %s - %s' % (
@ -266,3 +272,30 @@ def brozzler_worker():
th.join() th.join()
logging.info("brozzler-worker is all done, exiting") logging.info("brozzler-worker is all done, exiting")
def brozzler_ensure_tables():
'''
Creates rethinkdb tables if they don't already exist. Brozzler
(brozzler-worker, brozzler-new-job, etc) normally creates the tables it
needs on demand at startup, but if multiple instances are starting up at
the same time, you can end up with duplicate broken tables. So it's a good
idea to use this utility at an early step when spinning up a cluster.
'''
arg_parser = argparse.ArgumentParser(
prog=os.path.basename(sys.argv[0]),
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
_add_rethinkdb_options(arg_parser)
_add_common_options(arg_parser)
args = arg_parser.parse_args(args=sys.argv[1:])
_configure_logging(args)
r = rethinkstuff.Rethinker(
args.rethinkdb_servers.split(','), args.rethinkdb_db)
# services table
rethinkstuff.ServiceRegistry(r)
# sites, pages, jobs tables
brozzler.frontier.RethinkDbFrontier(r)

View File

@ -21,7 +21,7 @@ import setuptools
setuptools.setup( setuptools.setup(
name='brozzler', name='brozzler',
version='1.1.dev38', version='1.1.dev39',
description='Distributed web crawling with browsers', description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler', url='https://github.com/internetarchive/brozzler',
author='Noah Levitt', author='Noah Levitt',
@ -36,6 +36,7 @@ setuptools.setup(
'brozzler-new-job=brozzler.cli:brozzler_new_job', 'brozzler-new-job=brozzler.cli:brozzler_new_job',
'brozzler-new-site=brozzler.cli:brozzler_new_site', 'brozzler-new-site=brozzler.cli:brozzler_new_site',
'brozzler-worker=brozzler.cli:brozzler_worker', 'brozzler-worker=brozzler.cli:brozzler_worker',
'brozzler-ensure-tables=brozzler.cli:brozzler_ensure_tables',
'brozzler-webconsole=brozzler.webconsole:run', 'brozzler-webconsole=brozzler.webconsole:run',
], ],
}, },