make brozzler-list-* a little more intuitive, maybe

This commit is contained in:
Noah Levitt 2017-03-16 13:01:41 -07:00
parent 6c81b40e28
commit 701f7654a8
2 changed files with 34 additions and 18 deletions

View File

@ -391,12 +391,19 @@ def brozzler_list_jobs():
arg_parser = argparse.ArgumentParser( arg_parser = argparse.ArgumentParser(
prog=os.path.basename(sys.argv[0]), prog=os.path.basename(sys.argv[0]),
formatter_class=BetterArgumentDefaultsHelpFormatter) formatter_class=BetterArgumentDefaultsHelpFormatter)
arg_parser.add_argument(
'-a', '--all', dest='all', action='store_true', help=(
'list all jobs (by default, only active jobs are listed)'))
arg_parser.add_argument( arg_parser.add_argument(
'--yaml', dest='yaml', action='store_true', help=( '--yaml', dest='yaml', action='store_true', help=(
'yaml output (default is json)')) 'yaml output (default is json)'))
group = arg_parser.add_mutually_exclusive_group(required=True)
group.add_argument(
'--active', dest='active', action='store_true', help=(
'list active jobs'))
group.add_argument(
'--all', dest='all', action='store_true', help=(
'list all jobs'))
group.add_argument(
'--job', dest='job', metavar='JOB_ID', help=(
'list only the specified job'))
add_rethinkdb_options(arg_parser) add_rethinkdb_options(arg_parser)
add_common_options(arg_parser) add_common_options(arg_parser)
@ -421,19 +428,25 @@ def brozzler_list_sites():
arg_parser = argparse.ArgumentParser( arg_parser = argparse.ArgumentParser(
prog=os.path.basename(sys.argv[0]), prog=os.path.basename(sys.argv[0]),
formatter_class=BetterArgumentDefaultsHelpFormatter) formatter_class=BetterArgumentDefaultsHelpFormatter)
arg_parser.add_argument(
'-a', '--all', dest='all', action='store_true', help=(
'list all sites (by default, only active sites are listed)'))
arg_parser.add_argument( arg_parser.add_argument(
'--yaml', dest='yaml', action='store_true', help=( '--yaml', dest='yaml', action='store_true', help=(
'yaml output (default is json)')) 'yaml output (default is json)'))
group = arg_parser.add_mutually_exclusive_group() group = arg_parser.add_mutually_exclusive_group(required=True)
group.add_argument( group.add_argument(
'--jobless', dest='jobless', action='store_true', help=( '--active', dest='active', action='store_true', help=(
'list only jobless sites')) 'list all active sites'))
group.add_argument( group.add_argument(
'--job', dest='job', metavar='JOB_ID', help=( '--job', dest='job', metavar='JOB_ID', help=(
'list only sites for the supplied job')) 'list sites for a particular job'))
group.add_argument(
'--jobless', dest='jobless', action='store_true', help=(
'list all jobless sites'))
group.add_argument(
'--site', dest='site', metavar='SITE_ID', help=(
'list only the specified site'))
group.add_argument(
'--all', dest='all', action='store_true', help=(
'list all sites'))
add_rethinkdb_options(arg_parser) add_rethinkdb_options(arg_parser)
add_common_options(arg_parser) add_common_options(arg_parser)
@ -451,7 +464,7 @@ def brozzler_list_sites():
reql = reql.get_all(job_id, index='job_id') reql = reql.get_all(job_id, index='job_id')
elif args.jobless: elif args.jobless:
reql = reql.filter(~r.row.has_fields('job_id')) reql = reql.filter(~r.row.has_fields('job_id'))
if not args.all: elif args.active:
reql = reql.filter({'status': 'ACTIVE'}) reql = reql.filter({'status': 'ACTIVE'})
logging.debug('querying rethinkdb: %s', reql) logging.debug('querying rethinkdb: %s', reql)
results = reql.run() results = reql.run()
@ -473,20 +486,23 @@ def brozzler_list_pages():
group = arg_parser.add_mutually_exclusive_group(required=True) group = arg_parser.add_mutually_exclusive_group(required=True)
group.add_argument( group.add_argument(
'--job', dest='job', metavar='JOB_ID', help=( '--job', dest='job', metavar='JOB_ID', help=(
'list pages for all sites of the supplied job')) 'list pages for all sites of a particular job'))
group.add_argument( group.add_argument(
'--site', dest='site', metavar='SITE_ID', help=( '--site', dest='site', metavar='SITE_ID', help=(
'list pages of the supplied site')) 'list pages for the specified site'))
# group.add_argument(
# '--page', dest='page', metavar='PAGE_ID', help=(
# 'list only the specified page'))
group = arg_parser.add_mutually_exclusive_group() group = arg_parser.add_mutually_exclusive_group()
group.add_argument( group.add_argument(
'--queued', dest='queued', action='store_true', help=( '--queued', dest='queued', action='store_true', help=(
'limit only queued pages')) 'limit to queued pages'))
group.add_argument( group.add_argument(
'--brozzled', dest='brozzled', action='store_true', help=( '--brozzled', dest='brozzled', action='store_true', help=(
'limit only pages that have already been brozzled')) 'limit to pages that have already been brozzled'))
group.add_argument( group.add_argument(
'--claimed', dest='claimed', action='store_true', help=( '--claimed', dest='claimed', action='store_true', help=(
'limit only pages that are currently claimed by a brozzler ' 'limit to pages that are currently claimed by a brozzler '
'worker')) 'worker'))
add_rethinkdb_options(arg_parser) add_rethinkdb_options(arg_parser)
add_common_options(arg_parser) add_common_options(arg_parser)
@ -503,7 +519,7 @@ def brozzler_list_pages():
reql = rr.table('sites').get_all(job_id, index='job_id')['id'] reql = rr.table('sites').get_all(job_id, index='job_id')['id']
logging.debug('querying rethinkb: %s', reql) logging.debug('querying rethinkb: %s', reql)
site_ids = reql.run() site_ids = reql.run()
else: elif args.site:
try: try:
site_ids = [int(args.site)] site_ids = [int(args.site)]
except ValueError: except ValueError:

View File

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup( setuptools.setup(
name='brozzler', name='brozzler',
version='1.1b9.dev205', version='1.1b9.dev206',
description='Distributed web crawling with browsers', description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler', url='https://github.com/internetarchive/brozzler',
author='Noah Levitt', author='Noah Levitt',