mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-02 19:46:36 -04:00
mount warcs dir with sshfs; start-dead to start only services that aren't already running
This commit is contained in:
parent
196e52ac0a
commit
80f963591f
1 changed files with 139 additions and 100 deletions
|
@ -63,39 +63,33 @@ _reset() {
|
|||
tstamp=$(date +"%Y%m%d%H%M%S")
|
||||
echo "renaming rethinkdb database archiveit_brozzler to archiveit_brozzler_$tstamp"
|
||||
PYTHONPATH=/home/nlevitt/workspace/brozzler/brozzler-ve34/lib/python3.4/site-packages python3.4 <<EOF
|
||||
import rethinkdb as r
|
||||
with r.connect("wbgrp-svc035") as conn:
|
||||
import rethinkdb as r
|
||||
with r.connect("wbgrp-svc035") as conn:
|
||||
r.db("archiveit_brozzler").config().update({"name":"archiveit_brozzler_$tstamp"}).run(conn)
|
||||
EOF
|
||||
EOF
|
||||
mysql -hwbgrp-svc107 -P6306 -uarchiveit -parchiveit archiveit3 -e "update CrawlJob set status='FINISHED_ABNORMAL', endDate=now() where status='ACTIVE'"
|
||||
|
||||
set -e
|
||||
sudo umount /1/brzl/warcs
|
||||
mv -v /1/brzl /tmp/brzl.$tstamp
|
||||
mkdir -vp /1/brzl/{warcs,logs}
|
||||
chgrp -v archiveit /1/brzl/warcs/ && chmod g+w /1/brzl/warcs
|
||||
|
||||
ssh wbgrp-svc111 rm -vf /1/brzl/warcs/*.warc*
|
||||
# chgrp -v archiveit /1/brzl/warcs/ && chmod g+w /1/brzl/warcs
|
||||
ssh wbgrp-svc111 mv -v "/1/brzl/warcs /tmp/brzl-warcs.$tstamp && mkdir -vp /1/brzl/warcs"
|
||||
sudo -H -u archiveit sshfs wbgrp-svc111:/1/brzl/warcs /1/brzl/warcs -o nonempty,ro,allow_other
|
||||
}
|
||||
|
||||
_start() {
|
||||
if _status > /dev/null ; then
|
||||
echo "$0: can't start because something's still running"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
set -e
|
||||
|
||||
start_warcprox() {
|
||||
echo $0: starting warcprox
|
||||
ssh -fn wbgrp-svc111 'PYTHONPATH=/home/nlevitt/workspace/warcprox/warcprox-ve34/lib/python3.4/site-packages nice /home/nlevitt/workspace/warcprox/warcprox-ve34/bin/warcprox --dir=/1/brzl/warcs --rethinkdb-servers=wbgrp-svc020,wbgrp-svc035,wbgrp-svc036 --rethinkdb-db=archiveit_brozzler --rethinkdb-big-table --cacert=/1/brzl/warcprox-ca.pem --certs-dir=/1/brzl/certs --address=0.0.0.0 --base32 --gzip --rollover-idle-time=180 --kafka-broker-list=qa-archive-it.org:6092 --kafka-capture-feed-topic=ait-brozzler-captures' &>>/1/brzl/logs/warcprox.out &
|
||||
}
|
||||
|
||||
sleep 5
|
||||
|
||||
start_brozzler_boss() {
|
||||
echo $0: starting ait-brozzler-boss.py
|
||||
venv=/home/nlevitt/workspace/ait5/ait5-ve34
|
||||
PYTHONPATH=$venv/lib/python3.4/site-packages $venv/bin/python /home/nlevitt/workspace/ait5/scripts/ait-brozzler-boss.py &>> /1/brzl/logs/ait-brozzler-boss.out &
|
||||
}
|
||||
|
||||
sleep 5
|
||||
|
||||
start_brozzler_workers() {
|
||||
echo $0: starting brozzler-workers
|
||||
for node in aidata{400,400-bu,401,401-bu} ; do
|
||||
(
|
||||
|
@ -106,22 +100,65 @@ _start() {
|
|||
sleep 5
|
||||
)
|
||||
done
|
||||
}
|
||||
|
||||
start_pywayback() {
|
||||
echo $0: starting pywayback
|
||||
PYTHONPATH=/home/nlevitt/workspace/pygwb/pygwb-ve27/lib/python2.7/site-packages WAYBACK_CONFIG=/home/nlevitt/workspace/pygwb/gwb.yaml PATH=/home/nlevitt/workspace/pygwb/pygwb-ve27/bin:/usr/bin:/bin /home/nlevitt/workspace/pygwb/start-gwb.sh &>> /1/brzl/logs/pywayback.out &
|
||||
}
|
||||
|
||||
start_ait5() {
|
||||
echo $0: starting ait5 partner webapp
|
||||
PYTHONPATH=/home/nlevitt/workspace/ait5/ait5-ve34/lib/python3.4/site-packages python3.4 /home/nlevitt/workspace/ait5/manage.py runserver_plus 0.0.0.0:8888 &>> /1/brzl/logs/ait5.out &
|
||||
}
|
||||
|
||||
start_brozzler_console() {
|
||||
echo $0: starting brozzler web console
|
||||
PYTHONPATH=/home/nlevitt/workspace/brozzler/webconsole/brozzler-webconsole-ve34/lib/python3.4/site-packages /home/nlevitt/workspace/brozzler/webconsole/brozzler-webconsole-ve34/bin/flask --debug --app=/home/nlevitt/workspace/brozzler/webconsole/brozzler-webconsole.py run --host=0.0.0.0 --port=8081 &>> /1/brzl/logs/brozzler-console.out &
|
||||
}
|
||||
|
||||
start_dead() {
|
||||
warcprox_pids=( $(pgrep -f /home/nlevitt/workspace/warcprox/warcprox-ve34/bin/warcprox) )
|
||||
worker_pids=( $(pgrep -f 'ssh .* docker run .* internetarchive/brozzler-worker .* brozzler-worker') )
|
||||
pywayback_pids=( $(pgrep -f /home/nlevitt/workspace/pygwb/pygwb-ve27/bin/gunicorn) )
|
||||
ait_brozzler_boss=( $(pgrep -f /home/nlevitt/workspace/ait5/scripts/ait-brozzler-boss.py) )
|
||||
ait5_pids=( $(pgrep -f 0.0.0.0:8888) )
|
||||
console_pids=( $(pgrep -f app=.*brozzler-webconsole.py) )
|
||||
|
||||
[ -z "${warcprox_pids[*]}" ] && start_warcprox
|
||||
[ -z "${worker_pids[*]}" ] && start_brozzler_workers
|
||||
[ -z "${pywayback_pids[*]}" ] && start_pywayback
|
||||
[ -z "${ait_brozzler_boss[*]}" ] && start_brozzler_boss
|
||||
[ -z "${ait5_pids[*]}" ] && start_ait5
|
||||
[ -z "${console_pids[*]}" ] && start_brozzler_console
|
||||
}
|
||||
|
||||
_start() {
|
||||
if _status > /dev/null ; then
|
||||
echo "$0: can't start because something's still running"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
set -e
|
||||
start_warcprox
|
||||
sleep 5
|
||||
start_brozzler_boss
|
||||
sleep 5
|
||||
start_brozzler_workers
|
||||
start_pywayback
|
||||
start_ait5
|
||||
start_brozzler_console
|
||||
|
||||
echo $0: logs are in /1/brzl/logs
|
||||
echo $0: warcs are in /1/brzl/warcs
|
||||
}
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 status|start|stop|restart|reset|start-dead"
|
||||
}
|
||||
|
||||
if [ $# != 1 ] ; then
|
||||
echo "Usage: $0 status|start|stop|restart|reset"
|
||||
usage
|
||||
exit 1
|
||||
elif [ $1 = 'status' ] ; then
|
||||
_status
|
||||
|
@ -134,8 +171,10 @@ elif [ $1 = 'restart' ] ; then
|
|||
_start
|
||||
elif [ $1 = 'reset' ] ; then
|
||||
_reset
|
||||
elif [ $1 = 'start-dead' ] ; then
|
||||
start_dead
|
||||
else
|
||||
echo "Usage: $0 status|start|stop|restart|reset"
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue