mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-19 23:35:54 -04:00
replace vagrant-brozzler-new-site with python script that fills in default options and passes through others
This commit is contained in:
parent
cc9517cb45
commit
2462efc4ed
2
setup.py
2
setup.py
@ -32,7 +32,7 @@ def find_package_data(package):
|
||||
|
||||
setuptools.setup(
|
||||
name='brozzler',
|
||||
version='1.1b6.dev84',
|
||||
version='1.1b6.dev85',
|
||||
description='Distributed web crawling with browsers',
|
||||
url='https://github.com/internetarchive/brozzler',
|
||||
author='Noah Levitt',
|
||||
|
86
vagrant/vagrant-brozzler-new-site.py
Executable file
86
vagrant/vagrant-brozzler-new-site.py
Executable file
@ -0,0 +1,86 @@
|
||||
#!/usr/bin/env python
|
||||
'''
|
||||
vagrant-brozzler-new-site.py - runs brozzler-new-site inside the vagrant vm to
|
||||
queue a site for your vagrant brozzler deployment.
|
||||
|
||||
Fills in the --proxy option automatically. some other options are passed
|
||||
through.
|
||||
|
||||
This is a standalone script with no dependencies other than python, and should
|
||||
work with python 2.7 or python 3.2+. The only reason it's not a bash script is
|
||||
so we can use the argparse library.
|
||||
|
||||
Copyright (C) 2014-2016 Internet Archive
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
'''
|
||||
|
||||
import sys
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
try:
|
||||
from shlex import quote
|
||||
except:
|
||||
from pipes import quote
|
||||
|
||||
def main(argv=[]):
|
||||
arg_parser = argparse.ArgumentParser(prog=os.path.basename(argv[0]))
|
||||
arg_parser.add_argument('seed', metavar='SEED', help='seed url')
|
||||
arg_parser.add_argument(
|
||||
'--time-limit', dest='time_limit', default=None,
|
||||
help='time limit in seconds for this site')
|
||||
arg_parser.add_argument(
|
||||
'--ignore-robots', dest='ignore_robots', action='store_true',
|
||||
help='ignore robots.txt for this site')
|
||||
arg_parser.add_argument(
|
||||
'--warcprox-meta', dest='warcprox_meta',
|
||||
help=(
|
||||
'Warcprox-Meta http request header to send with each request; '
|
||||
'must be a json blob, ignored unless warcprox features are '
|
||||
'enabled'))
|
||||
arg_parser.add_argument(
|
||||
'-q', '--quiet', dest='quiet', action='store_true')
|
||||
arg_parser.add_argument(
|
||||
'-v', '--verbose', dest='verbose', action='store_true')
|
||||
|
||||
args = arg_parser.parse_args(args=argv[1:])
|
||||
|
||||
options = []
|
||||
if args.time_limit:
|
||||
options.append('--time-limit=%s' % args.time_limit)
|
||||
if args.ignore_robots:
|
||||
options.append('--ignore-robots')
|
||||
if args.warcprox_meta:
|
||||
# I think this shell escaping is correct?
|
||||
options.append(
|
||||
'--warcprox-meta=%s' % quote(args.warcprox_meta))
|
||||
if args.quiet:
|
||||
options.append('--quiet')
|
||||
if args.verbose:
|
||||
options.append('--verbose')
|
||||
|
||||
# cd to path with Vagrantfile so "vagrant ssh" knows what to do
|
||||
os.chdir(os.path.dirname(__file__))
|
||||
|
||||
cmd = (
|
||||
'PYTHONPATH=/home/vagrant/brozzler-ve34/lib/python3.4/site-packages '
|
||||
'/home/vagrant/brozzler-ve34/bin/python '
|
||||
'/home/vagrant/brozzler-ve34/bin/brozzler-new-site '
|
||||
'--proxy=localhost:8000 --enable-warcprox-features %s %s') % (
|
||||
' '.join(options), args.seed)
|
||||
subprocess.call(['vagrant', 'ssh', '--', cmd])
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(sys.argv)
|
||||
|
@ -1,14 +0,0 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# vagrant-brozzler-new-site.sh - run brozzler-new-site inside the vagrant vm to
|
||||
# queue a job for your vagrant brozzler deployment
|
||||
#
|
||||
|
||||
# cd to path with Vagrantfile so "vagrant ssh" knows what to do
|
||||
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
cd $script_dir
|
||||
|
||||
vagrant ssh -- \
|
||||
PYTHONPATH=/home/vagrant/brozzler-ve34/lib/python3.4/site-packages \
|
||||
/home/vagrant/brozzler-ve34/bin/python \
|
||||
/home/vagrant/brozzler-ve34/bin/brozzler-new-site "$@"
|
Loading…
x
Reference in New Issue
Block a user