move ansible directory to top level; generalize formerly vagrant-specific ansible configuration; let upstart manage logging with "console log"

This commit is contained in:
Noah Levitt 2016-10-13 17:21:55 -07:00
parent 56e651baeb
commit 3627209be1
34 changed files with 194 additions and 145 deletions

23
ansible/hosts-vagrant Normal file
View File

@ -0,0 +1,23 @@
ansible_ssh_private_key_file=.vagrant/machines/10.9.9.9/virtualbox/private_key
warcs_dir=/vagrant/warcs
brozzler_pip_name='-e git+file:///brozzler'
### possible values for a prod deployment
# brozzler_pip_name=brozzler # get it from pypi
# brozzler_pip_name=git+https://github.com/internetarchive/brozzler.git#egg=brozzler
[rethinkdb]
10.9.9.9
[warcprox]
work_dir=/vagrant
10.9.9.9
[brozzler-worker]
10.9.9.9
[brozzler-webconsole]
10.9.9.9
[pywb]
10.9.9.9

View File

@ -1,9 +1,4 @@
--- ---
- name: apply common configuration to all nodes
hosts: all
roles:
- common
- name: deploy rethinkdb - name: deploy rethinkdb
hosts: rethinkdb hosts: rethinkdb
roles: roles:
@ -27,4 +22,4 @@
- name: deploy pywb - name: deploy pywb
hosts: pywb hosts: pywb
roles: roles:
- pywb - pywb

View File

@ -0,0 +1,3 @@
---
dependencies:
- role: common

View File

@ -1,9 +1,15 @@
--- ---
- name: mkdir {{venv_root}}/brozzler-webconsole-ve34
file: path={{venv_root}}/brozzler-webconsole-ve34 state=directory
owner={{user}}
become: true
- name: install brozzler[webconsole] in virtualenv - name: install brozzler[webconsole] in virtualenv
pip: name='-e /brozzler[webconsole]' pip: name='{{brozzler_pip_name}}[webconsole]'
virtualenv=/home/vagrant/brozzler-webconsole-ve34 virtualenv={{venv_root}}/brozzler-webconsole-ve34
virtualenv_python=python3.4 virtualenv_python=python3.4
extra_args='--no-input --upgrade --pre' extra_args='--no-input --upgrade --pre'
become: true
become_user: '{{user}}'
notify: notify:
- restart brozzler-webconsole - restart brozzler-webconsole
- name: install upstart config /etc/init/brozzler-webconsole.conf - name: install upstart config /etc/init/brozzler-webconsole.conf

View File

@ -3,16 +3,16 @@ description "brozzler-webconsole"
start on runlevel [2345] start on runlevel [2345]
stop on runlevel [!2345] stop on runlevel [!2345]
env PYTHONPATH=/home/vagrant/brozzler-webconsole-ve34/lib/python3.4/site-packages env PYTHONPATH={{venv_root}}/brozzler-webconsole-ve34/lib/python3.4/site-packages
env PATH=/home/vagrant/brozzler-webconsole-ve34/bin:/usr/bin:/bin env PATH={{venv_root}}/brozzler-webconsole-ve34/bin:/usr/bin:/bin
env LC_ALL=C.UTF-8 env LC_ALL=C.UTF-8
env WAYBACK_BASEURL=http://{{groups['pywb'][0]}}:8880/brozzler env WAYBACK_BASEURL=http://{{groups['pywb'][0]}}:8880/brozzler
env RETHINKDB_SERVERS={{groups['rethinkdb'] | join(',')}} env RETHINKDB_SERVERS={{groups['rethinkdb'] | join(',')}}
env RETHINKDB_DB=brozzler env RETHINKDB_DB=brozzler
setuid vagrant setuid {{user}}
# console log console log
exec gunicorn --bind=0.0.0.0:8881 brozzler.webconsole:app >>/vagrant/logs/brozzler-webconsole.log 2>&1 exec gunicorn --bind=0.0.0.0:8881 brozzler.webconsole:app

View File

@ -0,0 +1,3 @@
---
dependencies:
- role: common

View File

@ -7,56 +7,66 @@
become: true become: true
apt: name={{item}} state=present apt: name={{item}} state=present
with_items: with_items:
- python-virtualenv - python-virtualenv
- vnc4server - vnc4server
- chromium-browser - chromium-browser
- xfonts-base - xfonts-base
- fonts-arphic-bkai00mp - fonts-arphic-bkai00mp
- fonts-arphic-bsmi00lp - fonts-arphic-bsmi00lp
- fonts-arphic-gbsn00lp - fonts-arphic-gbsn00lp
- fonts-arphic-gkai00mp - fonts-arphic-gkai00mp
- fonts-arphic-ukai - fonts-arphic-ukai
- fonts-farsiweb - fonts-farsiweb
- fonts-nafees - fonts-nafees
- fonts-sil-abyssinica - fonts-sil-abyssinica
- fonts-sil-ezra - fonts-sil-ezra
- fonts-sil-padauk - fonts-sil-padauk
- fonts-unfonts-extra - fonts-unfonts-extra
- fonts-unfonts-core - fonts-unfonts-core
- ttf-indic-fonts - ttf-indic-fonts
- fonts-thai-tlwg - fonts-thai-tlwg
- fonts-lklug-sinhala - fonts-lklug-sinhala
- git - git
- libjpeg-turbo8-dev - libjpeg-turbo8-dev
- zlib1g-dev - zlib1g-dev
- gcc - gcc
- libpython3.4-dev - libpython3.4-dev
- adobe-flashplugin - adobe-flashplugin
- name: install Xvnc upstart config /etc/init/Xvnc.conf - name: install Xvnc upstart config /etc/init/Xvnc.conf
template: src=templates/Xvnc.conf.j2 dest=/etc/init/Xvnc.conf template: src=templates/Xvnc.conf.j2 dest=/etc/init/Xvnc.conf
become: true become: true
notify: notify:
- restart Xvnc - restart Xvnc
- name: mkdir {{venv_root}}/websockify-ve34
become: true
file: path={{venv_root}}/websockify-ve34 state=directory owner={{user}}
- name: install websockify in virtualenv - name: install websockify in virtualenv
pip: name=git+https://github.com/kanaka/websockify.git#egg=websockify pip: name=git+https://github.com/kanaka/websockify.git#egg=websockify
virtualenv=/home/vagrant/websockify-ve34 virtualenv={{venv_root}}/websockify-ve34
virtualenv_python=python3.4 virtualenv_python=python3.4
extra_args='--no-input --upgrade --pre' extra_args='--no-input --upgrade --pre'
become: true
become_user: '{{user}}'
- name: install vnc-websock upstart config /etc/init/vnc-websock.conf - name: install vnc-websock upstart config /etc/init/vnc-websock.conf
template: src=templates/vnc-websock.conf.j2 dest=/etc/init/vnc-websock.conf template: src=templates/vnc-websock.conf.j2 dest=/etc/init/vnc-websock.conf
become: true become: true
notify: notify:
- restart vnc-websock - restart vnc-websock
- name: mkdir {{venv_root}}/brozzler-ve34
become: true
file: path={{venv_root}}/brozzler-ve34 state=directory owner={{user}}
- name: install brozzler in virtualenv - name: install brozzler in virtualenv
pip: # name=git+https://github.com/internetarchive/brozzler.git#egg=brozzler pip: # name=git+https://github.com/internetarchive/brozzler.git#egg=brozzler
name='-e /brozzler' name='{{brozzler_pip_name}}'
virtualenv=/home/vagrant/brozzler-ve34 virtualenv={{venv_root}}/brozzler-ve34
virtualenv_python=python3.4 virtualenv_python=python3.4
extra_args='--no-input --upgrade --pre' extra_args='--no-input --upgrade --pre'
become: true
become_user: '{{user}}'
notify: notify:
- restart brozzler-worker - restart brozzler-worker
- name: install brozzler-worker upstart config /etc/init/brozzler-worker.conf - name: install brozzler-worker upstart config /etc/init/brozzler-worker.conf
template: src=templates/brozzler-worker.conf.j2 dest=/etc/init/brozzler-worker.conf template: src=templates/brozzler-worker.conf.j2 dest=/etc/init/brozzler-worker.conf
become: true become: true
notify: notify:
- restart brozzler-worker - restart brozzler-worker

View File

@ -3,11 +3,11 @@ description "Xvnc"
start on runlevel [2345] start on runlevel [2345]
stop on runlevel [!2345] stop on runlevel [!2345]
setuid vagrant setuid {{user}}
console log console log
exec nice Xvnc4 :1 -auth /tmp/Xauthority.vagrant \ exec nice Xvnc4 :1 -auth /tmp/Xauthority.{{user}} \
-geometry 1600x1000 -depth 24 -rfbwait 0 -nolisten tcp -rfbport 5901 \ -geometry 1600x1000 -depth 24 -rfbwait 0 -nolisten tcp -rfbport 5901 \
-SecurityTypes None -pn -fp /usr/share/fonts/X11/misc/ -co /etc/X11/rgb \ -SecurityTypes None -pn -fp /usr/share/fonts/X11/misc/ -co /etc/X11/rgb \
AcceptCutText=0 AcceptPointerEvents=0 AcceptKeyEvents=0 AcceptCutText=0 AcceptPointerEvents=0 AcceptKeyEvents=0

View File

@ -4,13 +4,13 @@ start on runlevel [2345]
stop on runlevel [!2345] stop on runlevel [!2345]
env DISPLAY=:1 env DISPLAY=:1
env PATH=/home/vagrant/brozzler-ve34/bin:/usr/bin:/bin env PATH={{venv_root}}/brozzler-ve34/bin:/usr/bin:/bin
env PYTHONPATH=/home/vagrant/brozzler-ve34/lib/python3.4/site-packages env PYTHONPATH={{venv_root}}/brozzler-ve34/lib/python3.4/site-packages
env LANG=C.UTF-8 env LANG=C.UTF-8
setuid vagrant setuid {{user}}
# console log console log
# depends on vnc server # depends on vnc server
start on started Xvnc start on started Xvnc
@ -20,4 +20,4 @@ kill timeout 60
exec nice brozzler-worker \ exec nice brozzler-worker \
--rethinkdb-servers={{groups['rethinkdb'] | join(',')}} \ --rethinkdb-servers={{groups['rethinkdb'] | join(',')}} \
--max-browsers=4 >>/vagrant/logs/brozzler-worker.log 2>&1 --max-browsers=4

View File

@ -3,12 +3,12 @@ description "vnc-websock"
start on runlevel [2345] start on runlevel [2345]
stop on runlevel [!2345] stop on runlevel [!2345]
setuid vagrant setuid {{user}}
console log console log
env PYTHONPATH=/home/vagrant/websockify-ve34/lib/python3.4/site-packages env PYTHONPATH={{venv_root}}/websockify-ve34/lib/python3.4/site-packages
env PATH=/home/vagrant/websockify-ve34/bin:/usr/bin:/bin env PATH={{venv_root}}/websockify-ve34/bin:/usr/bin:/bin
# port 8901 is hard-coded in brozzler/webconsole/static/partials/workers.html # port 8901 is hard-coded in brozzler/webconsole/static/partials/workers.html
exec nice websockify 0.0.0.0:8901 localhost:5901 exec nice websockify 0.0.0.0:8901 localhost:5901

View File

@ -0,0 +1,7 @@
# variables default values, these can be overridden in the ansible inventory
# or various other places
---
user: brozzler
venv_root: /opt
warcs_dir: /var/tmp/warcs
brozzler_pip_name: brozzler # get it from pypi by default

View File

@ -19,6 +19,7 @@
## command: python3 setup.py install chdir=/tmp/pip-8.1.2 ## command: python3 setup.py install chdir=/tmp/pip-8.1.2
## creates=/usr/local/lib/python2.7/dist-packages/pip-8.1.2-py2.7.egg/pip/__init__.py ## creates=/usr/local/lib/python2.7/dist-packages/pip-8.1.2-py2.7.egg/pip/__init__.py
## become: true ## become: true
- name: mkdir /vagrant/logs - name: ensure service user {{user}} exists
file: path=/vagrant/logs state=directory user: name={{user}} system=yes createhome=no home=/nonexistent
shell=/usr/sbin/nologin
become: true become: true

View File

@ -0,0 +1,3 @@
---
dependencies:
- role: common

View File

@ -1,16 +1,24 @@
--- ---
- name: mkdir {{venv_root}}/pywb-ve34
file: path={{venv_root}}/pywb-ve34 state=directory
owner={{user}}
become: true
- name: install pywb in virtualenv - name: install pywb in virtualenv
pip: name=pywb pip: name=pywb
virtualenv=/home/vagrant/pywb-ve34 virtualenv={{venv_root}}/pywb-ve34
virtualenv_python=python3.4 virtualenv_python=python3.4
extra_args='--no-input --upgrade --pre' extra_args='--no-input --upgrade --pre'
become: true
become_user: '{{user}}'
notify: notify:
- restart pywb - restart pywb
- name: install brozzler in pywb virtualenv - name: install brozzler in pywb virtualenv
pip: name='-e /brozzler' pip: name='{{brozzler_pip_name}}'
virtualenv=/home/vagrant/pywb-ve34 virtualenv={{venv_root}}/pywb-ve34
virtualenv_python=python3.4 virtualenv_python=python3.4
extra_args='--no-input --upgrade --pre' extra_args='--no-input --upgrade --pre'
become: true
become_user: '{{user}}'
notify: notify:
- restart pywb - restart pywb
- name: pywb config file /etc/pywb.yml - name: pywb config file /etc/pywb.yml

View File

@ -0,0 +1,14 @@
description "pywb"
start on runlevel [2345]
stop on runlevel [!2345]
env PYTHONPATH={{venv_root}}/pywb-ve34/lib/python3.4/site-packages
env PATH={{venv_root}}/pywb-ve34/bin:/usr/bin:/bin
env PYWB_CONFIG_FILE=/etc/pywb.yml
setuid {{user}}
console log
exec nice brozzler-wayback

View File

@ -1,9 +1,12 @@
archive_paths: /vagrant/warcs/ archive_paths: {{warcs_dir}}
collections: collections:
brozzler: brozzler:
index_paths: !!python/object:brozzler.pywb.RethinkCDXSource index_paths: !!python/object:brozzler.pywb.RethinkCDXSource
db: brozzler db: brozzler
servers: [localhost] servers:
{% for node in groups['rethinkdb'] %}
- {{node}}
{% endfor %}
table: captures table: captures
enable_auto_colls: false enable_auto_colls: false
enable_cdx_api: true enable_cdx_api: true

View File

@ -11,13 +11,17 @@
become: true become: true
notify: notify:
- restart rethinkdb - restart rethinkdb
# XXX rethinkdb fails to start in spite of this, I think because /vagrant
# gets mounted too late, and it tries to log there
- name: ensure rethinkdb starts on reboot - name: ensure rethinkdb starts on reboot
service: name=rethinkdb enabled=yes service: name=rethinkdb enabled=yes
- stat: path=/var/log/rethinkdb.log
register: p
- name: ensure user rethinkdb owns /var/log/rethinkdb.log
file: path=/var/log/rethinkdb.log owner=rethinkdb state=touch mode=0644
when: not p.stat.exists
become: true
- name: ensure rethinkdb instance config file is installed - name: ensure rethinkdb instance config file is installed
template: src=templates/rethinkdb-brozzler-vagrant-1.conf.j2 template: src=templates/rethinkdb-brozzler.conf.j2
dest=/etc/rethinkdb/instances.d/rethinkdb-brozzler-vagrant-1.conf dest=/etc/rethinkdb/instances.d/rethinkdb-brozzler.conf
become: true become: true
notify: notify:
- restart rethinkdb - restart rethinkdb

View File

@ -0,0 +1,6 @@
bind=0.0.0.0
# directory=/var/lib/rethinkdb
log-file=/var/log/rethinkdb.log
{% for node in groups['rethinkdb'] %}
join={{node}}:29015
{% endfor %}

View File

@ -0,0 +1,2 @@
---
work_dir: /var/tmp

View File

@ -0,0 +1,4 @@
---
- name: restart warcprox
service: name=warcprox state=restarted
become: true

View File

@ -0,0 +1,3 @@
---
dependencies:
- role: common

View File

@ -11,11 +11,16 @@
- libssl-dev - libssl-dev
- tor - tor
- git - git
- name: mkdir {{venv_root}}/warcprox-ve34
become: true
file: path={{venv_root}}/warcprox-ve34 state=directory owner={{user}}
- name: install warcprox in virtualenv - name: install warcprox in virtualenv
pip: name=git+https://github.com/internetarchive/warcprox.git@2.x#egg=warcprox pip: name=git+https://github.com/internetarchive/warcprox.git@2.x#egg=warcprox
virtualenv=/home/vagrant/warcprox-ve34 virtualenv={{venv_root}}/warcprox-ve34
virtualenv_python=python3.4 virtualenv_python=python3.4
extra_args='--no-input --upgrade --pre' extra_args='--no-input --upgrade --pre'
become: true
become_user: '{{user}}'
notify: notify:
- restart warcprox - restart warcprox
- name: install upstart config /etc/init/warcprox.conf - name: install upstart config /etc/init/warcprox.conf

View File

@ -0,0 +1,24 @@
description "warcprox"
start on runlevel [2345]
stop on runlevel [!2345]
env PYTHONPATH={{venv_root}}/warcprox-ve34/lib/python3.4/site-packages
env PATH={{venv_root}}/warcprox-ve34/bin:/usr/bin:/bin
# by default warcprox creates some files/dirs relative to cwd
chdir {{work_dir}}
setuid {{user}}
console log
# --profile
exec nice warcprox \
--dir={{warcs_dir}} \
--base32 \
--gzip \
--rollover-idle-time=180 \
--onion-tor-socks-proxy=localhost:9050 \
--rethinkdb-servers={{groups['rethinkdb']|join(',')}} \
--rethinkdb-db=brozzler \
--rethinkdb-big-table

View File

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup( setuptools.setup(
name='brozzler', name='brozzler',
version='1.1b7.dev96', version='1.1b7.dev97',
description='Distributed web crawling with browsers', description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler', url='https://github.com/internetarchive/brozzler',
author='Noah Levitt', author='Noah Levitt',

4
vagrant/Vagrantfile vendored
View File

@ -7,7 +7,7 @@ Vagrant.configure(2) do |config|
config.vm.synced_folder "..", "/brozzler" config.vm.synced_folder "..", "/brozzler"
config.vm.provision "ansible" do |ansible| config.vm.provision "ansible" do |ansible|
ansible.inventory_path = "ansible/hosts" ansible.inventory_path = "../ansible/hosts-vagrant"
ansible.playbook = "ansible/playbook.yml" ansible.playbook = "../ansible/playbook.yml"
end end
end end

View File

@ -1,16 +0,0 @@
ansible_ssh_private_key_file=.vagrant/machines/10.9.9.9/virtualbox/private_key
[rethinkdb]
10.9.9.9
[warcprox]
10.9.9.9
[brozzler-worker]
10.9.9.9
[brozzler-webconsole]
10.9.9.9
[pywb]
10.9.9.9

View File

@ -1,14 +0,0 @@
description "pywb"
start on runlevel [2345]
stop on runlevel [!2345]
env PYTHONPATH=/home/vagrant/pywb-ve34/lib/python3.4/site-packages
env PATH=/home/vagrant/pywb-ve34/bin:/usr/bin:/bin
env PYWB_CONFIG_FILE=/etc/pywb.yml
setuid vagrant
# console log
exec nice brozzler-wayback >>/vagrant/logs/pywb.log 2>&1

View File

@ -1,5 +0,0 @@
runuser=vagrant
bind=0.0.0.0
# directory=/var/lib/rethinkdb
# log-file=/var/log/rethinkdb.log
log-file=/vagrant/logs/rethinkdb.log # synced dir

View File

@ -1,14 +0,0 @@
---
# - name: start warcprox
# environment:
# PYTHONPATH: /home/vagrant/warcprox-ve34/lib/python3.4/site-packages
# PATH: /home/vagrant/warcprox-ve34/bin:/usr/bin:/bin
# args:
# executable: /bin/bash
# shell: nice warcprox --dir=/vagrant/warcs --base32 --gzip
# --rollover-idle-time=180 --cacert=/vagrant/warcprox-ca.pem
# --onion-tor-socks-proxy=localhost:9050 --rethinkdb-servers=localhost
# --rethinkdb-big-table &> /vagrant/logs/warcprox.out &
- name: restart warcprox
service: name=warcprox state=restarted
become: true

View File

@ -1,26 +0,0 @@
description "warcprox"
start on runlevel [2345]
stop on runlevel [!2345]
env PYTHONPATH=/home/vagrant/warcprox-ve34/lib/python3.4/site-packages
env PATH=/home/vagrant/warcprox-ve34/bin:/usr/bin:/bin
# by default warcprox creates some files/dirs relative to cwd
chdir /home/vagrant
setuid vagrant
# console log
# --profile
exec nice warcprox \
--dir=/vagrant/warcs \
--base32 \
--gzip \
--rollover-idle-time=180 \
--cacert=/vagrant/warcprox-ca.pem \
--onion-tor-socks-proxy=localhost:9050 \
--rethinkdb-servers=localhost \
--rethinkdb-db=brozzler \
--rethinkdb-big-table >>/vagrant/logs/warcprox.log 2>&1
# --rethinkdb-servers={{groups['rethinkdb'] | join(',')}} \