Revert "bump version number for last pull request"

This reverts commit d192fc269eddeb8b06888e95bb6e4a6639e34415.
This commit is contained in:
Noah Levitt 2017-04-05 17:01:06 -07:00
parent d192fc269e
commit 62917a6f1a
2 changed files with 29 additions and 38 deletions

View File

@ -58,7 +58,7 @@ def add_common_options(arg_parser):
arg_parser.add_argument( arg_parser.add_argument(
'--version', action='version', '--version', action='version',
version='brozzler %s - %s' % ( version='brozzler %s - %s' % (
brozzler.__version__, os.path.basename(argv[0]))) brozzler.__version__, os.path.basename(sys.argv[0])))
def add_rethinkdb_options(arg_parser): def add_rethinkdb_options(arg_parser):
arg_parser.add_argument( arg_parser.add_argument(
@ -124,14 +124,13 @@ class BetterArgumentDefaultsHelpFormatter(
else: else:
return super()._get_help_string(action) return super()._get_help_string(action)
def brozzle_page(argv=None): def brozzle_page():
''' '''
Command line utility entry point for brozzling a single page. Opens url in Command line utility entry point for brozzling a single page. Opens url in
a browser, running some javascript behaviors, and prints outlinks. a browser, running some javascript behaviors, and prints outlinks.
''' '''
argv = argv or sys.argv
arg_parser = argparse.ArgumentParser( arg_parser = argparse.ArgumentParser(
prog=os.path.basename(argv[0]), prog=os.path.basename(sys.argv[0]),
description='brozzle-page - brozzle a single page', description='brozzle-page - brozzle a single page',
formatter_class=BetterArgumentDefaultsHelpFormatter) formatter_class=BetterArgumentDefaultsHelpFormatter)
arg_parser.add_argument('url', metavar='URL', help='page url') arg_parser.add_argument('url', metavar='URL', help='page url')
@ -155,7 +154,7 @@ def brozzle_page(argv=None):
'--proxy', dest='proxy', default=None, help='http proxy') '--proxy', dest='proxy', default=None, help='http proxy')
add_common_options(arg_parser) add_common_options(arg_parser)
args = arg_parser.parse_args(args=argv[1:]) args = arg_parser.parse_args(args=sys.argv[1:])
configure_logging(args) configure_logging(args)
behavior_parameters = {} behavior_parameters = {}
@ -188,15 +187,14 @@ def brozzle_page(argv=None):
finally: finally:
browser.stop() browser.stop()
def brozzler_new_job(argv=None): def brozzler_new_job():
''' '''
Command line utility entry point for queuing a new brozzler job. Takes a Command line utility entry point for queuing a new brozzler job. Takes a
yaml brozzler job configuration file, creates job, sites, and pages objects yaml brozzler job configuration file, creates job, sites, and pages objects
in rethinkdb, which brozzler-workers will look at and start crawling. in rethinkdb, which brozzler-workers will look at and start crawling.
''' '''
argv = argv or sys.argv
arg_parser = argparse.ArgumentParser( arg_parser = argparse.ArgumentParser(
prog=os.path.basename(argv[0]), prog=os.path.basename(sys.argv[0]),
description='brozzler-new-job - queue new job with brozzler', description='brozzler-new-job - queue new job with brozzler',
formatter_class=BetterArgumentDefaultsHelpFormatter) formatter_class=BetterArgumentDefaultsHelpFormatter)
arg_parser.add_argument( arg_parser.add_argument(
@ -205,7 +203,7 @@ def brozzler_new_job(argv=None):
add_rethinkdb_options(arg_parser) add_rethinkdb_options(arg_parser)
add_common_options(arg_parser) add_common_options(arg_parser)
args = arg_parser.parse_args(args=argv[1:]) args = arg_parser.parse_args(args=sys.argv[1:])
configure_logging(args) configure_logging(args)
rr = rethinker(args) rr = rethinker(args)
@ -217,15 +215,14 @@ def brozzler_new_job(argv=None):
print(' ' + yaml.dump(e.errors).rstrip().replace('\n', '\n '), file=sys.stderr) print(' ' + yaml.dump(e.errors).rstrip().replace('\n', '\n '), file=sys.stderr)
sys.exit(1) sys.exit(1)
def brozzler_new_site(argv=None): def brozzler_new_site():
''' '''
Command line utility entry point for queuing a new brozzler site. Command line utility entry point for queuing a new brozzler site.
Takes a seed url and creates a site and page object in rethinkdb, which Takes a seed url and creates a site and page object in rethinkdb, which
brozzler-workers will look at and start crawling. brozzler-workers will look at and start crawling.
''' '''
argv = argv or sys.argv
arg_parser = argparse.ArgumentParser( arg_parser = argparse.ArgumentParser(
prog=os.path.basename(argv[0]), prog=os.path.basename(sys.argv[0]),
description='brozzler-new-site - register site to brozzle', description='brozzler-new-site - register site to brozzle',
formatter_class=BetterArgumentDefaultsHelpFormatter) formatter_class=BetterArgumentDefaultsHelpFormatter)
arg_parser.add_argument('seed', metavar='SEED', help='seed url') arg_parser.add_argument('seed', metavar='SEED', help='seed url')
@ -256,7 +253,7 @@ def brozzler_new_site(argv=None):
help='use this password to try to log in if a login form is found') help='use this password to try to log in if a login form is found')
add_common_options(arg_parser) add_common_options(arg_parser)
args = arg_parser.parse_args(args=argv[1:]) args = arg_parser.parse_args(args=sys.argv[1:])
configure_logging(args) configure_logging(args)
rr = rethinker(args) rr = rethinker(args)
@ -274,14 +271,13 @@ def brozzler_new_site(argv=None):
frontier = brozzler.RethinkDbFrontier(rr) frontier = brozzler.RethinkDbFrontier(rr)
brozzler.new_site(frontier, site) brozzler.new_site(frontier, site)
def brozzler_worker(argv=None): def brozzler_worker():
''' '''
Main entry point for brozzler, gets sites and pages to brozzle from Main entry point for brozzler, gets sites and pages to brozzle from
rethinkdb, brozzles them. rethinkdb, brozzles them.
''' '''
argv = argv or sys.argv
arg_parser = argparse.ArgumentParser( arg_parser = argparse.ArgumentParser(
prog=os.path.basename(argv[0]), prog=os.path.basename(sys.argv[0]),
formatter_class=BetterArgumentDefaultsHelpFormatter) formatter_class=BetterArgumentDefaultsHelpFormatter)
add_rethinkdb_options(arg_parser) add_rethinkdb_options(arg_parser)
arg_parser.add_argument( arg_parser.add_argument(
@ -300,7 +296,7 @@ def brozzler_worker(argv=None):
'the rethinkdb service registry')) 'the rethinkdb service registry'))
add_common_options(arg_parser) add_common_options(arg_parser)
args = arg_parser.parse_args(args=argv[1:]) args = arg_parser.parse_args(args=sys.argv[1:])
configure_logging(args) configure_logging(args)
def sigterm(signum, frame): def sigterm(signum, frame):
@ -345,7 +341,7 @@ def brozzler_worker(argv=None):
logging.info('brozzler-worker is all done, exiting') logging.info('brozzler-worker is all done, exiting')
def brozzler_ensure_tables(argv=None): def brozzler_ensure_tables():
''' '''
Creates rethinkdb tables if they don't already exist. Brozzler Creates rethinkdb tables if they don't already exist. Brozzler
(brozzler-worker, brozzler-new-job, etc) normally creates the tables it (brozzler-worker, brozzler-new-job, etc) normally creates the tables it
@ -353,14 +349,13 @@ def brozzler_ensure_tables(argv=None):
the same time, you can end up with duplicate broken tables. So it's a good the same time, you can end up with duplicate broken tables. So it's a good
idea to use this utility at an early step when spinning up a cluster. idea to use this utility at an early step when spinning up a cluster.
''' '''
argv = argv or sys.argv
arg_parser = argparse.ArgumentParser( arg_parser = argparse.ArgumentParser(
prog=os.path.basename(argv[0]), prog=os.path.basename(sys.argv[0]),
formatter_class=BetterArgumentDefaultsHelpFormatter) formatter_class=BetterArgumentDefaultsHelpFormatter)
add_rethinkdb_options(arg_parser) add_rethinkdb_options(arg_parser)
add_common_options(arg_parser) add_common_options(arg_parser)
args = arg_parser.parse_args(args=argv[1:]) args = arg_parser.parse_args(args=sys.argv[1:])
configure_logging(args) configure_logging(args)
rr = rethinker(args) rr = rethinker(args)
@ -380,10 +375,9 @@ class Jsonner(json.JSONEncoder):
else: else:
return json.JSONEncoder.default(self, o) return json.JSONEncoder.default(self, o)
def brozzler_list_jobs(argv=None): def brozzler_list_jobs():
argv = argv or sys.argv
arg_parser = argparse.ArgumentParser( arg_parser = argparse.ArgumentParser(
prog=os.path.basename(argv[0]), prog=os.path.basename(sys.argv[0]),
formatter_class=BetterArgumentDefaultsHelpFormatter) formatter_class=BetterArgumentDefaultsHelpFormatter)
arg_parser.add_argument( arg_parser.add_argument(
'--yaml', dest='yaml', action='store_true', help=( '--yaml', dest='yaml', action='store_true', help=(
@ -401,7 +395,7 @@ def brozzler_list_jobs(argv=None):
add_rethinkdb_options(arg_parser) add_rethinkdb_options(arg_parser)
add_common_options(arg_parser) add_common_options(arg_parser)
args = arg_parser.parse_args(args=argv[1:]) args = arg_parser.parse_args(args=sys.argv[1:])
configure_logging(args) configure_logging(args)
rr = rethinker(args) rr = rethinker(args)
@ -432,10 +426,9 @@ def brozzler_list_jobs(argv=None):
for result in results: for result in results:
print(json.dumps(result, cls=Jsonner, indent=2)) print(json.dumps(result, cls=Jsonner, indent=2))
def brozzler_list_sites(argv=None): def brozzler_list_sites():
argv = argv or sys.argv
arg_parser = argparse.ArgumentParser( arg_parser = argparse.ArgumentParser(
prog=os.path.basename(argv[0]), prog=os.path.basename(sys.argv[0]),
formatter_class=BetterArgumentDefaultsHelpFormatter) formatter_class=BetterArgumentDefaultsHelpFormatter)
arg_parser.add_argument( arg_parser.add_argument(
'--yaml', dest='yaml', action='store_true', help=( '--yaml', dest='yaml', action='store_true', help=(
@ -459,7 +452,7 @@ def brozzler_list_sites(argv=None):
add_rethinkdb_options(arg_parser) add_rethinkdb_options(arg_parser)
add_common_options(arg_parser) add_common_options(arg_parser)
args = arg_parser.parse_args(args=argv[1:]) args = arg_parser.parse_args(args=sys.argv[1:])
configure_logging(args) configure_logging(args)
rr = rethinker(args) rr = rethinker(args)
@ -485,10 +478,9 @@ def brozzler_list_sites(argv=None):
for result in results: for result in results:
print(json.dumps(result, cls=Jsonner, indent=2)) print(json.dumps(result, cls=Jsonner, indent=2))
def brozzler_list_pages(argv=None): def brozzler_list_pages():
argv = argv or sys.argv
arg_parser = argparse.ArgumentParser( arg_parser = argparse.ArgumentParser(
prog=os.path.basename(argv[0]), prog=os.path.basename(sys.argv[0]),
formatter_class=BetterArgumentDefaultsHelpFormatter) formatter_class=BetterArgumentDefaultsHelpFormatter)
arg_parser.add_argument( arg_parser.add_argument(
'--yaml', dest='yaml', action='store_true', help=( '--yaml', dest='yaml', action='store_true', help=(
@ -517,7 +509,7 @@ def brozzler_list_pages(argv=None):
add_rethinkdb_options(arg_parser) add_rethinkdb_options(arg_parser)
add_common_options(arg_parser) add_common_options(arg_parser)
args = arg_parser.parse_args(args=argv[1:]) args = arg_parser.parse_args(args=sys.argv[1:])
configure_logging(args) configure_logging(args)
rr = rethinker(args) rr = rethinker(args)
@ -562,16 +554,15 @@ def brozzler_list_pages(argv=None):
for result in results: for result in results:
print(json.dumps(result, cls=Jsonner, indent=2)) print(json.dumps(result, cls=Jsonner, indent=2))
def brozzler_list_captures(argv=None): def brozzler_list_captures():
''' '''
Handy utility for looking up entries in the rethinkdb "captures" table by Handy utility for looking up entries in the rethinkdb "captures" table by
url or sha1. url or sha1.
''' '''
import urlcanon import urlcanon
argv = argv or sys.argv
arg_parser = argparse.ArgumentParser( arg_parser = argparse.ArgumentParser(
prog=os.path.basename(argv[0]), prog=os.path.basename(sys.argv[0]),
formatter_class=BetterArgumentDefaultsHelpFormatter) formatter_class=BetterArgumentDefaultsHelpFormatter)
arg_parser.add_argument( arg_parser.add_argument(
'-p', '--prefix', dest='prefix', action='store_true', help=( '-p', '--prefix', dest='prefix', action='store_true', help=(
@ -587,7 +578,7 @@ def brozzler_list_captures(argv=None):
'url_or_sha1', metavar='URL_or_SHA1', 'url_or_sha1', metavar='URL_or_SHA1',
help='url or sha1 to look up in captures table') help='url or sha1 to look up in captures table')
args = arg_parser.parse_args(args=argv[1:]) args = arg_parser.parse_args(args=sys.argv[1:])
configure_logging(args) configure_logging(args)
rr = rethinker(args) rr = rethinker(args)

View File

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup( setuptools.setup(
name='brozzler', name='brozzler',
version='1.1b11.dev227', version='1.1b11.dev226',
description='Distributed web crawling with browsers', description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler', url='https://github.com/internetarchive/brozzler',
author='Noah Levitt', author='Noah Levitt',