diff --git a/brozzler/cli.py b/brozzler/cli.py index 4b0bd76..2db020c 100644 --- a/brozzler/cli.py +++ b/brozzler/cli.py @@ -2,7 +2,7 @@ ''' brozzler/cli.py - brozzler command line executables -Copyright (C) 2014-2017 Internet Archive +Copyright (C) 2014-2019 Internet Archive Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -605,6 +605,10 @@ def brozzler_purge(argv=None): '--site', dest='site', metavar='SITE_ID', help=( 'purge crawl state from rethinkdb for a site, including all ' 'pages')) + group.add_argument( + '--finished-before', dest='finished_before', metavar='YYYY-MM-DD', + help=('purge crawl state from rethinkdb for a jobs that ended ' + 'before this date')) arg_parser.add_argument( '--force', dest='force', action='store_true', help=( 'purge even if job or site is still has status ACTIVE')) @@ -653,6 +657,20 @@ def brozzler_purge(argv=None): '(override with --force)', site_id) sys.exit(1) _purge_site(rr, site_id) + elif args.finished_before: + finished_before = datetime.datetime.strptime( + args.finished_before, '%Y-%m-%d').replace( + tzinfo=doublethink.UTC) + reql = rr.table('jobs').filter( + r.row['finished'].default(r.maxval).lt(finished_before).or_( + r.row['starts_and_stops'].nth(-1)['stop'].default(r.maxval).lt(finished_before))) + logging.debug( + 'retrieving jobs older than %s: %s', finished_before, reql) + for job in reql.run(): + # logging.info('job %s finished=%s starts_and_stops[-1]["stop"]=%s', + # job['id'], job.get('finished'), + # job.get('starts_and_stops', [{'stop':None}])[-1]['stop']) + _purge_job(rr, job['id']) def _purge_site(rr, site_id): reql = rr.table('pages').between( diff --git a/setup.py b/setup.py index c474fa2..43cef28 100644 --- a/setup.py +++ b/setup.py @@ -64,10 +64,10 @@ setuptools.setup( ], }, install_requires=[ - 'PyYAML>=3.12', + 'PyYAML>=5.8', 'youtube-dl>=2018.7.21', 'reppy==0.3.4', - 'requests>=2.18.4', + 'requests>=2.21', 'websocket-client>=0.39.0,<=0.48.0', 'pillow>=5.2.0', 'urlcanon>=0.1.dev23', @@ -80,13 +80,13 @@ setuptools.setup( ], extras_require={ 'dashboard': [ - 'flask>=0.11', + 'flask>=1.0', 'gunicorn>=19.8.1' ], 'easy': [ 'warcprox>=2.4b2.dev173', 'pywb>=0.33.2,<2', - 'flask>=0.11', + 'flask>=1.0', 'gunicorn>=19.8.1' ], },