mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
smart waiting for tables and indexes to be ready
This commit is contained in:
parent
90fad87f7e
commit
8c422534a5
@ -47,7 +47,9 @@ _stop() {
|
||||
fi
|
||||
|
||||
set -x
|
||||
ssh wbgrp-svc111 pkill -f /home/nlevitt/workspace/warcprox/warcprox-ve34/bin/warcprox
|
||||
for node in wbgrp-svc{110,111} ; do
|
||||
ssh $node pkill -f /home/nlevitt/workspace/warcprox/warcprox-ve34/bin/warcprox
|
||||
done
|
||||
set +x
|
||||
|
||||
if _status > /dev/null ; then
|
||||
@ -72,25 +74,66 @@ with r.connect("wbgrp-svc035") as conn:
|
||||
EOF
|
||||
mysql -hwbgrp-svc107 -P6306 -uarchiveit -parchiveit archiveit3 -e "update CrawlJob set status='FINISHED_ABNORMAL', endDate=now() where status='ACTIVE'"
|
||||
|
||||
for node in wbgrp-svc{110,111} ; do
|
||||
sudo umount /1/brzl/warcs/$node
|
||||
done
|
||||
set -e
|
||||
sudo umount /1/brzl/warcs
|
||||
mv -v /1/brzl /tmp/brzl.$tstamp
|
||||
mkdir -vp /1/brzl/{logs,warcs}
|
||||
sudo chown -v archiveit /1/brzl/warcs
|
||||
# chgrp -v archiveit /1/brzl/warcs/ && chmod g+w /1/brzl/warcs
|
||||
ssh wbgrp-svc111 mv -v "/1/brzl/warcs /tmp/brzl-warcs.$tstamp && mkdir -vp /1/brzl/warcs"
|
||||
sudo -H -u archiveit sshfs wbgrp-svc111:/1/brzl/warcs /1/brzl/warcs -o nonempty,ro,allow_other
|
||||
for node in wbgrp-svc{110,111} ; do
|
||||
ssh $node mv -v "/1/brzl/warcs /tmp/brzl-warcs.$tstamp && mkdir -vp /1/brzl/warcs"
|
||||
sudo -H -u archiveit mkdir -vp /1/brzl/warcs/$node
|
||||
sudo -H -u archiveit sshfs $node:/1/brzl/warcs /1/brzl/warcs/$node -o nonempty,ro,allow_other
|
||||
done
|
||||
}
|
||||
|
||||
rethinkdb_tables_ready() {
|
||||
PYTHONPATH=/home/nlevitt/workspace/brozzler/brozzler-ve34/lib/python3.4/site-packages python3.4 <<EOF
|
||||
import rethinkdb as r
|
||||
import sys
|
||||
for server in ["wbgrp-svc020","wbgrp-svc035","wbgrp-svc036"]:
|
||||
with r.connect(server) as conn:
|
||||
for table in "$@".split():
|
||||
table_name, n_indexes = table.split(":")
|
||||
try:
|
||||
status = r.db("archiveit_brozzler").table(table_name).status().run(conn)
|
||||
if not status["status"]["ready_for_writes"]:
|
||||
# print("table %s not ready for writes" % (table_name))
|
||||
sys.exit(2)
|
||||
|
||||
indexes_statuses = r.db("archiveit_brozzler").table(table_name).index_status().run(conn)
|
||||
# print("indexes_statuses=%s" % indexes_statuses)
|
||||
if len(indexes_statuses) != int(n_indexes):
|
||||
# print("table %s has only %s indexes (expected %s)" % (table_name, len(indexes_statuses), n_indexes))
|
||||
sys.exit(3)
|
||||
for index_status in indexes_statuses:
|
||||
if not index_status["ready"]:
|
||||
# print("table %s index %s is not ready" % (table_name, index_status["index"]))
|
||||
sys.exit(4)
|
||||
# print("table %s ready with %s indexes" % (table_name, n_indexes))
|
||||
except BaseException as e:
|
||||
# print("table %s: exception %s" % (table_name, e))
|
||||
sys.exit(1)
|
||||
EOF
|
||||
}
|
||||
|
||||
start_warcprox() {
|
||||
echo $0: starting warcprox
|
||||
ssh -fn wbgrp-svc111 'PYTHONPATH=/home/nlevitt/workspace/warcprox/warcprox-ve34/lib/python3.4/site-packages nice /home/nlevitt/workspace/warcprox/warcprox-ve34/bin/warcprox --dir=/1/brzl/warcs --rethinkdb-servers=wbgrp-svc020,wbgrp-svc035,wbgrp-svc036 --rethinkdb-db=archiveit_brozzler --rethinkdb-big-table --cacert=/1/brzl/warcprox-ca.pem --certs-dir=/1/brzl/certs --address=0.0.0.0 --base32 --gzip --rollover-idle-time=180 --kafka-broker-list=qa-archive-it.org:6092 --kafka-capture-feed-topic=ait-brozzler-captures' &>>/1/brzl/logs/warcprox.out &
|
||||
for node in wbgrp-svc{110,111} ; do
|
||||
ssh -fn $node 'PYTHONPATH=/home/nlevitt/workspace/warcprox/warcprox-ve34/lib/python3.4/site-packages nice /home/nlevitt/workspace/warcprox/warcprox-ve34/bin/warcprox --dir=/1/brzl/warcs --rethinkdb-servers=wbgrp-svc020,wbgrp-svc035,wbgrp-svc036 --rethinkdb-db=archiveit_brozzler --rethinkdb-big-table --cacert=/1/brzl/warcprox-ca.pem --certs-dir=/1/brzl/certs --address=0.0.0.0 --base32 --gzip --rollover-idle-time=180 --kafka-broker-list=qa-archive-it.org:6092 --kafka-capture-feed-topic=ait-brozzler-captures' &>>/1/brzl/logs/warcprox-$node.out &
|
||||
while ! rethinkdb_tables_ready stats:0 captures:2 services:0 ; do
|
||||
sleep 1 ;
|
||||
done
|
||||
done
|
||||
}
|
||||
|
||||
start_brozzler_boss() {
|
||||
echo $0: starting ait-brozzler-boss.py
|
||||
venv=/home/nlevitt/workspace/ait5/ait5-ve34
|
||||
PYTHONPATH=$venv/lib/python3.4/site-packages $venv/bin/python /home/nlevitt/workspace/ait5/scripts/ait-brozzler-boss.py &>> /1/brzl/logs/ait-brozzler-boss.out &
|
||||
while ! rethinkdb_tables_ready stats:0 services:0 jobs:0 pages:1 sites:2 ; do sleep 1 ; done
|
||||
}
|
||||
|
||||
start_brozzler_workers() {
|
||||
@ -101,14 +144,15 @@ start_brozzler_workers() {
|
||||
ssh $node "docker --version || curl -sSL https://get.docker.com/ | sh && sudo usermod -aG docker $USER"
|
||||
ssh $node 'docker build -t internetarchive/brozzler-worker /home/nlevitt/workspace/brozzler/docker'
|
||||
ssh -fn $node 'docker run -t --rm -p 8901:8901 -p 5901:5901 internetarchive/brozzler-worker /sbin/my_init -- setuser brozzler bash -c "DISPLAY=:1 brozzler-worker --rethinkdb-servers=wbgrp-svc036,wbgrp-svc020,wbgrp-svc035 --rethinkdb-db=archiveit_brozzler --max-browsers=10"' &>> /1/brzl/logs/brozzler-worker-$node.out
|
||||
sleep 5
|
||||
)
|
||||
while ! rethinkdb_tables_ready jobs:0 pages:1 sites:2 ; do sleep 1 ; done
|
||||
done
|
||||
}
|
||||
|
||||
start_pywayback() {
|
||||
echo $0: starting pywayback
|
||||
PYTHONPATH=/home/nlevitt/workspace/pygwb/pygwb-ve27/lib/python2.7/site-packages WAYBACK_CONFIG=/home/nlevitt/workspace/pygwb/gwb.yaml PATH=/home/nlevitt/workspace/pygwb/pygwb-ve27/bin:/usr/bin:/bin /home/nlevitt/workspace/pygwb/start-gwb.sh &>> /1/brzl/logs/pywayback.out &
|
||||
while ! rethinkdb_tables_ready captures:2 ; do sleep 1 ; done
|
||||
}
|
||||
|
||||
start_ait5() {
|
||||
@ -145,9 +189,7 @@ _start() {
|
||||
|
||||
set -e
|
||||
start_warcprox
|
||||
sleep 5
|
||||
start_brozzler_boss
|
||||
sleep 5
|
||||
start_brozzler_workers
|
||||
start_pywayback
|
||||
start_ait5
|
||||
|
Loading…
x
Reference in New Issue
Block a user