mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 00:29:53 -05:00
command line utility brozzler-ensure-tables, creates rethinkdb tables if they don't already exist... brozzler normally creates them on demand at startup, but if multiple instances are starting up at the same time, you can end up with duplicate broken tables, so it's a good idea to use this utility when spinning up a cluster
This commit is contained in:
parent
9fd78fdbe8
commit
371590b578
@ -38,12 +38,18 @@ import warnings
|
|||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
def _add_common_options(arg_parser):
|
def _add_common_options(arg_parser):
|
||||||
|
arg_parser.add_argument(
|
||||||
|
'-q', '--quiet', dest='log_level',
|
||||||
|
action='store_const', default=logging.INFO, const=logging.WARN)
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
'-v', '--verbose', dest='log_level',
|
'-v', '--verbose', dest='log_level',
|
||||||
action='store_const', default=logging.INFO, const=logging.DEBUG)
|
action='store_const', default=logging.INFO, const=logging.DEBUG)
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
'--trace', dest='log_level',
|
'--trace', dest='log_level',
|
||||||
action='store_const', default=logging.INFO, const=brozzler.TRACE)
|
action='store_const', default=logging.INFO, const=brozzler.TRACE)
|
||||||
|
# arg_parser.add_argument(
|
||||||
|
# '-s', '--silent', dest='log_level',
|
||||||
|
# action='store_const', default=logging.INFO, const=logging.CRITICAL)
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
'--version', action='version',
|
'--version', action='version',
|
||||||
version='brozzler %s - %s' % (
|
version='brozzler %s - %s' % (
|
||||||
@ -266,3 +272,30 @@ def brozzler_worker():
|
|||||||
th.join()
|
th.join()
|
||||||
|
|
||||||
logging.info("brozzler-worker is all done, exiting")
|
logging.info("brozzler-worker is all done, exiting")
|
||||||
|
|
||||||
|
def brozzler_ensure_tables():
|
||||||
|
'''
|
||||||
|
Creates rethinkdb tables if they don't already exist. Brozzler
|
||||||
|
(brozzler-worker, brozzler-new-job, etc) normally creates the tables it
|
||||||
|
needs on demand at startup, but if multiple instances are starting up at
|
||||||
|
the same time, you can end up with duplicate broken tables. So it's a good
|
||||||
|
idea to use this utility at an early step when spinning up a cluster.
|
||||||
|
'''
|
||||||
|
arg_parser = argparse.ArgumentParser(
|
||||||
|
prog=os.path.basename(sys.argv[0]),
|
||||||
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||||
|
_add_rethinkdb_options(arg_parser)
|
||||||
|
_add_common_options(arg_parser)
|
||||||
|
|
||||||
|
args = arg_parser.parse_args(args=sys.argv[1:])
|
||||||
|
_configure_logging(args)
|
||||||
|
|
||||||
|
r = rethinkstuff.Rethinker(
|
||||||
|
args.rethinkdb_servers.split(','), args.rethinkdb_db)
|
||||||
|
|
||||||
|
# services table
|
||||||
|
rethinkstuff.ServiceRegistry(r)
|
||||||
|
|
||||||
|
# sites, pages, jobs tables
|
||||||
|
brozzler.frontier.RethinkDbFrontier(r)
|
||||||
|
|
||||||
|
3
setup.py
3
setup.py
@ -21,7 +21,7 @@ import setuptools
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.1.dev38',
|
version='1.1.dev39',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
@ -36,6 +36,7 @@ setuptools.setup(
|
|||||||
'brozzler-new-job=brozzler.cli:brozzler_new_job',
|
'brozzler-new-job=brozzler.cli:brozzler_new_job',
|
||||||
'brozzler-new-site=brozzler.cli:brozzler_new_site',
|
'brozzler-new-site=brozzler.cli:brozzler_new_site',
|
||||||
'brozzler-worker=brozzler.cli:brozzler_worker',
|
'brozzler-worker=brozzler.cli:brozzler_worker',
|
||||||
|
'brozzler-ensure-tables=brozzler.cli:brozzler_ensure_tables',
|
||||||
'brozzler-webconsole=brozzler.webconsole:run',
|
'brozzler-webconsole=brozzler.webconsole:run',
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
Loading…
x
Reference in New Issue
Block a user