mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-19 15:25:59 -04:00
commit
42ddfba923
@ -2,7 +2,7 @@
|
||||
'''
|
||||
brozzler/cli.py - brozzler command line executables
|
||||
|
||||
Copyright (C) 2014-2017 Internet Archive
|
||||
Copyright (C) 2014-2019 Internet Archive
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@ -605,6 +605,10 @@ def brozzler_purge(argv=None):
|
||||
'--site', dest='site', metavar='SITE_ID', help=(
|
||||
'purge crawl state from rethinkdb for a site, including all '
|
||||
'pages'))
|
||||
group.add_argument(
|
||||
'--finished-before', dest='finished_before', metavar='YYYY-MM-DD',
|
||||
help=('purge crawl state from rethinkdb for a jobs that ended '
|
||||
'before this date'))
|
||||
arg_parser.add_argument(
|
||||
'--force', dest='force', action='store_true', help=(
|
||||
'purge even if job or site is still has status ACTIVE'))
|
||||
@ -653,6 +657,20 @@ def brozzler_purge(argv=None):
|
||||
'(override with --force)', site_id)
|
||||
sys.exit(1)
|
||||
_purge_site(rr, site_id)
|
||||
elif args.finished_before:
|
||||
finished_before = datetime.datetime.strptime(
|
||||
args.finished_before, '%Y-%m-%d').replace(
|
||||
tzinfo=doublethink.UTC)
|
||||
reql = rr.table('jobs').filter(
|
||||
r.row['finished'].default(r.maxval).lt(finished_before).or_(
|
||||
r.row['starts_and_stops'].nth(-1)['stop'].default(r.maxval).lt(finished_before)))
|
||||
logging.debug(
|
||||
'retrieving jobs older than %s: %s', finished_before, reql)
|
||||
for job in reql.run():
|
||||
# logging.info('job %s finished=%s starts_and_stops[-1]["stop"]=%s',
|
||||
# job['id'], job.get('finished'),
|
||||
# job.get('starts_and_stops', [{'stop':None}])[-1]['stop'])
|
||||
_purge_job(rr, job['id'])
|
||||
|
||||
def _purge_site(rr, site_id):
|
||||
reql = rr.table('pages').between(
|
||||
|
8
setup.py
8
setup.py
@ -64,10 +64,10 @@ setuptools.setup(
|
||||
],
|
||||
},
|
||||
install_requires=[
|
||||
'PyYAML>=3.12',
|
||||
'PyYAML>=5.8',
|
||||
'youtube-dl>=2018.7.21',
|
||||
'reppy==0.3.4',
|
||||
'requests>=2.18.4',
|
||||
'requests>=2.21',
|
||||
'websocket-client>=0.39.0,<=0.48.0',
|
||||
'pillow>=5.2.0',
|
||||
'urlcanon>=0.1.dev23',
|
||||
@ -80,13 +80,13 @@ setuptools.setup(
|
||||
],
|
||||
extras_require={
|
||||
'dashboard': [
|
||||
'flask>=0.11',
|
||||
'flask>=1.0',
|
||||
'gunicorn>=19.8.1'
|
||||
],
|
||||
'easy': [
|
||||
'warcprox>=2.4b2.dev173',
|
||||
'pywb>=0.33.2,<2',
|
||||
'flask>=0.11',
|
||||
'flask>=1.0',
|
||||
'gunicorn>=19.8.1'
|
||||
],
|
||||
},
|
||||
|
Loading…
x
Reference in New Issue
Block a user