mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 00:29:53 -05:00
move ansible directory to top level; generalize formerly vagrant-specific ansible configuration; let upstart manage logging with "console log"
This commit is contained in:
parent
56e651baeb
commit
3627209be1
23
ansible/hosts-vagrant
Normal file
23
ansible/hosts-vagrant
Normal file
@ -0,0 +1,23 @@
|
||||
ansible_ssh_private_key_file=.vagrant/machines/10.9.9.9/virtualbox/private_key
|
||||
warcs_dir=/vagrant/warcs
|
||||
brozzler_pip_name='-e git+file:///brozzler'
|
||||
|
||||
### possible values for a prod deployment
|
||||
# brozzler_pip_name=brozzler # get it from pypi
|
||||
# brozzler_pip_name=git+https://github.com/internetarchive/brozzler.git#egg=brozzler
|
||||
|
||||
[rethinkdb]
|
||||
10.9.9.9
|
||||
|
||||
[warcprox]
|
||||
work_dir=/vagrant
|
||||
10.9.9.9
|
||||
|
||||
[brozzler-worker]
|
||||
10.9.9.9
|
||||
|
||||
[brozzler-webconsole]
|
||||
10.9.9.9
|
||||
|
||||
[pywb]
|
||||
10.9.9.9
|
@ -1,9 +1,4 @@
|
||||
---
|
||||
- name: apply common configuration to all nodes
|
||||
hosts: all
|
||||
roles:
|
||||
- common
|
||||
|
||||
- name: deploy rethinkdb
|
||||
hosts: rethinkdb
|
||||
roles:
|
3
ansible/roles/brozzler-webconsole/meta/main.yml
Normal file
3
ansible/roles/brozzler-webconsole/meta/main.yml
Normal file
@ -0,0 +1,3 @@
|
||||
---
|
||||
dependencies:
|
||||
- role: common
|
@ -1,9 +1,15 @@
|
||||
---
|
||||
- name: mkdir {{venv_root}}/brozzler-webconsole-ve34
|
||||
file: path={{venv_root}}/brozzler-webconsole-ve34 state=directory
|
||||
owner={{user}}
|
||||
become: true
|
||||
- name: install brozzler[webconsole] in virtualenv
|
||||
pip: name='-e /brozzler[webconsole]'
|
||||
virtualenv=/home/vagrant/brozzler-webconsole-ve34
|
||||
pip: name='{{brozzler_pip_name}}[webconsole]'
|
||||
virtualenv={{venv_root}}/brozzler-webconsole-ve34
|
||||
virtualenv_python=python3.4
|
||||
extra_args='--no-input --upgrade --pre'
|
||||
become: true
|
||||
become_user: '{{user}}'
|
||||
notify:
|
||||
- restart brozzler-webconsole
|
||||
- name: install upstart config /etc/init/brozzler-webconsole.conf
|
@ -3,16 +3,16 @@ description "brozzler-webconsole"
|
||||
start on runlevel [2345]
|
||||
stop on runlevel [!2345]
|
||||
|
||||
env PYTHONPATH=/home/vagrant/brozzler-webconsole-ve34/lib/python3.4/site-packages
|
||||
env PATH=/home/vagrant/brozzler-webconsole-ve34/bin:/usr/bin:/bin
|
||||
env PYTHONPATH={{venv_root}}/brozzler-webconsole-ve34/lib/python3.4/site-packages
|
||||
env PATH={{venv_root}}/brozzler-webconsole-ve34/bin:/usr/bin:/bin
|
||||
env LC_ALL=C.UTF-8
|
||||
|
||||
env WAYBACK_BASEURL=http://{{groups['pywb'][0]}}:8880/brozzler
|
||||
env RETHINKDB_SERVERS={{groups['rethinkdb'] | join(',')}}
|
||||
env RETHINKDB_DB=brozzler
|
||||
|
||||
setuid vagrant
|
||||
setuid {{user}}
|
||||
|
||||
# console log
|
||||
console log
|
||||
|
||||
exec gunicorn --bind=0.0.0.0:8881 brozzler.webconsole:app >>/vagrant/logs/brozzler-webconsole.log 2>&1
|
||||
exec gunicorn --bind=0.0.0.0:8881 brozzler.webconsole:app
|
3
ansible/roles/brozzler-worker/meta/main.yml
Normal file
3
ansible/roles/brozzler-worker/meta/main.yml
Normal file
@ -0,0 +1,3 @@
|
||||
---
|
||||
dependencies:
|
||||
- role: common
|
@ -37,22 +37,32 @@
|
||||
become: true
|
||||
notify:
|
||||
- restart Xvnc
|
||||
- name: mkdir {{venv_root}}/websockify-ve34
|
||||
become: true
|
||||
file: path={{venv_root}}/websockify-ve34 state=directory owner={{user}}
|
||||
- name: install websockify in virtualenv
|
||||
pip: name=git+https://github.com/kanaka/websockify.git#egg=websockify
|
||||
virtualenv=/home/vagrant/websockify-ve34
|
||||
virtualenv={{venv_root}}/websockify-ve34
|
||||
virtualenv_python=python3.4
|
||||
extra_args='--no-input --upgrade --pre'
|
||||
become: true
|
||||
become_user: '{{user}}'
|
||||
- name: install vnc-websock upstart config /etc/init/vnc-websock.conf
|
||||
template: src=templates/vnc-websock.conf.j2 dest=/etc/init/vnc-websock.conf
|
||||
become: true
|
||||
notify:
|
||||
- restart vnc-websock
|
||||
- name: mkdir {{venv_root}}/brozzler-ve34
|
||||
become: true
|
||||
file: path={{venv_root}}/brozzler-ve34 state=directory owner={{user}}
|
||||
- name: install brozzler in virtualenv
|
||||
pip: # name=git+https://github.com/internetarchive/brozzler.git#egg=brozzler
|
||||
name='-e /brozzler'
|
||||
virtualenv=/home/vagrant/brozzler-ve34
|
||||
name='{{brozzler_pip_name}}'
|
||||
virtualenv={{venv_root}}/brozzler-ve34
|
||||
virtualenv_python=python3.4
|
||||
extra_args='--no-input --upgrade --pre'
|
||||
become: true
|
||||
become_user: '{{user}}'
|
||||
notify:
|
||||
- restart brozzler-worker
|
||||
- name: install brozzler-worker upstart config /etc/init/brozzler-worker.conf
|
@ -3,11 +3,11 @@ description "Xvnc"
|
||||
start on runlevel [2345]
|
||||
stop on runlevel [!2345]
|
||||
|
||||
setuid vagrant
|
||||
setuid {{user}}
|
||||
|
||||
console log
|
||||
|
||||
exec nice Xvnc4 :1 -auth /tmp/Xauthority.vagrant \
|
||||
exec nice Xvnc4 :1 -auth /tmp/Xauthority.{{user}} \
|
||||
-geometry 1600x1000 -depth 24 -rfbwait 0 -nolisten tcp -rfbport 5901 \
|
||||
-SecurityTypes None -pn -fp /usr/share/fonts/X11/misc/ -co /etc/X11/rgb \
|
||||
AcceptCutText=0 AcceptPointerEvents=0 AcceptKeyEvents=0
|
@ -4,13 +4,13 @@ start on runlevel [2345]
|
||||
stop on runlevel [!2345]
|
||||
|
||||
env DISPLAY=:1
|
||||
env PATH=/home/vagrant/brozzler-ve34/bin:/usr/bin:/bin
|
||||
env PYTHONPATH=/home/vagrant/brozzler-ve34/lib/python3.4/site-packages
|
||||
env PATH={{venv_root}}/brozzler-ve34/bin:/usr/bin:/bin
|
||||
env PYTHONPATH={{venv_root}}/brozzler-ve34/lib/python3.4/site-packages
|
||||
env LANG=C.UTF-8
|
||||
|
||||
setuid vagrant
|
||||
setuid {{user}}
|
||||
|
||||
# console log
|
||||
console log
|
||||
|
||||
# depends on vnc server
|
||||
start on started Xvnc
|
||||
@ -20,4 +20,4 @@ kill timeout 60
|
||||
|
||||
exec nice brozzler-worker \
|
||||
--rethinkdb-servers={{groups['rethinkdb'] | join(',')}} \
|
||||
--max-browsers=4 >>/vagrant/logs/brozzler-worker.log 2>&1
|
||||
--max-browsers=4
|
@ -3,12 +3,12 @@ description "vnc-websock"
|
||||
start on runlevel [2345]
|
||||
stop on runlevel [!2345]
|
||||
|
||||
setuid vagrant
|
||||
setuid {{user}}
|
||||
|
||||
console log
|
||||
|
||||
env PYTHONPATH=/home/vagrant/websockify-ve34/lib/python3.4/site-packages
|
||||
env PATH=/home/vagrant/websockify-ve34/bin:/usr/bin:/bin
|
||||
env PYTHONPATH={{venv_root}}/websockify-ve34/lib/python3.4/site-packages
|
||||
env PATH={{venv_root}}/websockify-ve34/bin:/usr/bin:/bin
|
||||
|
||||
# port 8901 is hard-coded in brozzler/webconsole/static/partials/workers.html
|
||||
exec nice websockify 0.0.0.0:8901 localhost:5901
|
7
ansible/roles/common/defaults/main.yml
Normal file
7
ansible/roles/common/defaults/main.yml
Normal file
@ -0,0 +1,7 @@
|
||||
# variables default values, these can be overridden in the ansible inventory
|
||||
# or various other places
|
||||
---
|
||||
user: brozzler
|
||||
venv_root: /opt
|
||||
warcs_dir: /var/tmp/warcs
|
||||
brozzler_pip_name: brozzler # get it from pypi by default
|
@ -19,6 +19,7 @@
|
||||
## command: python3 setup.py install chdir=/tmp/pip-8.1.2
|
||||
## creates=/usr/local/lib/python2.7/dist-packages/pip-8.1.2-py2.7.egg/pip/__init__.py
|
||||
## become: true
|
||||
- name: mkdir /vagrant/logs
|
||||
file: path=/vagrant/logs state=directory
|
||||
- name: ensure service user {{user}} exists
|
||||
user: name={{user}} system=yes createhome=no home=/nonexistent
|
||||
shell=/usr/sbin/nologin
|
||||
become: true
|
3
ansible/roles/pywb/meta/main.yml
Normal file
3
ansible/roles/pywb/meta/main.yml
Normal file
@ -0,0 +1,3 @@
|
||||
---
|
||||
dependencies:
|
||||
- role: common
|
@ -1,16 +1,24 @@
|
||||
---
|
||||
- name: mkdir {{venv_root}}/pywb-ve34
|
||||
file: path={{venv_root}}/pywb-ve34 state=directory
|
||||
owner={{user}}
|
||||
become: true
|
||||
- name: install pywb in virtualenv
|
||||
pip: name=pywb
|
||||
virtualenv=/home/vagrant/pywb-ve34
|
||||
virtualenv={{venv_root}}/pywb-ve34
|
||||
virtualenv_python=python3.4
|
||||
extra_args='--no-input --upgrade --pre'
|
||||
become: true
|
||||
become_user: '{{user}}'
|
||||
notify:
|
||||
- restart pywb
|
||||
- name: install brozzler in pywb virtualenv
|
||||
pip: name='-e /brozzler'
|
||||
virtualenv=/home/vagrant/pywb-ve34
|
||||
pip: name='{{brozzler_pip_name}}'
|
||||
virtualenv={{venv_root}}/pywb-ve34
|
||||
virtualenv_python=python3.4
|
||||
extra_args='--no-input --upgrade --pre'
|
||||
become: true
|
||||
become_user: '{{user}}'
|
||||
notify:
|
||||
- restart pywb
|
||||
- name: pywb config file /etc/pywb.yml
|
14
ansible/roles/pywb/templates/pywb.conf.j2
Normal file
14
ansible/roles/pywb/templates/pywb.conf.j2
Normal file
@ -0,0 +1,14 @@
|
||||
description "pywb"
|
||||
|
||||
start on runlevel [2345]
|
||||
stop on runlevel [!2345]
|
||||
|
||||
env PYTHONPATH={{venv_root}}/pywb-ve34/lib/python3.4/site-packages
|
||||
env PATH={{venv_root}}/pywb-ve34/bin:/usr/bin:/bin
|
||||
env PYWB_CONFIG_FILE=/etc/pywb.yml
|
||||
|
||||
setuid {{user}}
|
||||
|
||||
console log
|
||||
|
||||
exec nice brozzler-wayback
|
@ -1,9 +1,12 @@
|
||||
archive_paths: /vagrant/warcs/
|
||||
archive_paths: {{warcs_dir}}
|
||||
collections:
|
||||
brozzler:
|
||||
index_paths: !!python/object:brozzler.pywb.RethinkCDXSource
|
||||
db: brozzler
|
||||
servers: [localhost]
|
||||
servers:
|
||||
{% for node in groups['rethinkdb'] %}
|
||||
- {{node}}
|
||||
{% endfor %}
|
||||
table: captures
|
||||
enable_auto_colls: false
|
||||
enable_cdx_api: true
|
@ -11,13 +11,17 @@
|
||||
become: true
|
||||
notify:
|
||||
- restart rethinkdb
|
||||
# XXX rethinkdb fails to start in spite of this, I think because /vagrant
|
||||
# gets mounted too late, and it tries to log there
|
||||
- name: ensure rethinkdb starts on reboot
|
||||
service: name=rethinkdb enabled=yes
|
||||
- stat: path=/var/log/rethinkdb.log
|
||||
register: p
|
||||
- name: ensure user rethinkdb owns /var/log/rethinkdb.log
|
||||
file: path=/var/log/rethinkdb.log owner=rethinkdb state=touch mode=0644
|
||||
when: not p.stat.exists
|
||||
become: true
|
||||
- name: ensure rethinkdb instance config file is installed
|
||||
template: src=templates/rethinkdb-brozzler-vagrant-1.conf.j2
|
||||
dest=/etc/rethinkdb/instances.d/rethinkdb-brozzler-vagrant-1.conf
|
||||
template: src=templates/rethinkdb-brozzler.conf.j2
|
||||
dest=/etc/rethinkdb/instances.d/rethinkdb-brozzler.conf
|
||||
become: true
|
||||
notify:
|
||||
- restart rethinkdb
|
@ -0,0 +1,6 @@
|
||||
bind=0.0.0.0
|
||||
# directory=/var/lib/rethinkdb
|
||||
log-file=/var/log/rethinkdb.log
|
||||
{% for node in groups['rethinkdb'] %}
|
||||
join={{node}}:29015
|
||||
{% endfor %}
|
2
ansible/roles/warcprox/defaults/main.yml
Normal file
2
ansible/roles/warcprox/defaults/main.yml
Normal file
@ -0,0 +1,2 @@
|
||||
---
|
||||
work_dir: /var/tmp
|
4
ansible/roles/warcprox/handlers/main.yml
Normal file
4
ansible/roles/warcprox/handlers/main.yml
Normal file
@ -0,0 +1,4 @@
|
||||
---
|
||||
- name: restart warcprox
|
||||
service: name=warcprox state=restarted
|
||||
become: true
|
3
ansible/roles/warcprox/meta/main.yml
Normal file
3
ansible/roles/warcprox/meta/main.yml
Normal file
@ -0,0 +1,3 @@
|
||||
---
|
||||
dependencies:
|
||||
- role: common
|
@ -11,11 +11,16 @@
|
||||
- libssl-dev
|
||||
- tor
|
||||
- git
|
||||
- name: mkdir {{venv_root}}/warcprox-ve34
|
||||
become: true
|
||||
file: path={{venv_root}}/warcprox-ve34 state=directory owner={{user}}
|
||||
- name: install warcprox in virtualenv
|
||||
pip: name=git+https://github.com/internetarchive/warcprox.git@2.x#egg=warcprox
|
||||
virtualenv=/home/vagrant/warcprox-ve34
|
||||
virtualenv={{venv_root}}/warcprox-ve34
|
||||
virtualenv_python=python3.4
|
||||
extra_args='--no-input --upgrade --pre'
|
||||
become: true
|
||||
become_user: '{{user}}'
|
||||
notify:
|
||||
- restart warcprox
|
||||
- name: install upstart config /etc/init/warcprox.conf
|
24
ansible/roles/warcprox/templates/warcprox.conf.j2
Normal file
24
ansible/roles/warcprox/templates/warcprox.conf.j2
Normal file
@ -0,0 +1,24 @@
|
||||
description "warcprox"
|
||||
|
||||
start on runlevel [2345]
|
||||
stop on runlevel [!2345]
|
||||
|
||||
env PYTHONPATH={{venv_root}}/warcprox-ve34/lib/python3.4/site-packages
|
||||
env PATH={{venv_root}}/warcprox-ve34/bin:/usr/bin:/bin
|
||||
|
||||
# by default warcprox creates some files/dirs relative to cwd
|
||||
chdir {{work_dir}}
|
||||
setuid {{user}}
|
||||
|
||||
console log
|
||||
|
||||
# --profile
|
||||
exec nice warcprox \
|
||||
--dir={{warcs_dir}} \
|
||||
--base32 \
|
||||
--gzip \
|
||||
--rollover-idle-time=180 \
|
||||
--onion-tor-socks-proxy=localhost:9050 \
|
||||
--rethinkdb-servers={{groups['rethinkdb']|join(',')}} \
|
||||
--rethinkdb-db=brozzler \
|
||||
--rethinkdb-big-table
|
2
setup.py
2
setup.py
@ -32,7 +32,7 @@ def find_package_data(package):
|
||||
|
||||
setuptools.setup(
|
||||
name='brozzler',
|
||||
version='1.1b7.dev96',
|
||||
version='1.1b7.dev97',
|
||||
description='Distributed web crawling with browsers',
|
||||
url='https://github.com/internetarchive/brozzler',
|
||||
author='Noah Levitt',
|
||||
|
4
vagrant/Vagrantfile
vendored
4
vagrant/Vagrantfile
vendored
@ -7,7 +7,7 @@ Vagrant.configure(2) do |config|
|
||||
config.vm.synced_folder "..", "/brozzler"
|
||||
|
||||
config.vm.provision "ansible" do |ansible|
|
||||
ansible.inventory_path = "ansible/hosts"
|
||||
ansible.playbook = "ansible/playbook.yml"
|
||||
ansible.inventory_path = "../ansible/hosts-vagrant"
|
||||
ansible.playbook = "../ansible/playbook.yml"
|
||||
end
|
||||
end
|
||||
|
@ -1,16 +0,0 @@
|
||||
ansible_ssh_private_key_file=.vagrant/machines/10.9.9.9/virtualbox/private_key
|
||||
|
||||
[rethinkdb]
|
||||
10.9.9.9
|
||||
|
||||
[warcprox]
|
||||
10.9.9.9
|
||||
|
||||
[brozzler-worker]
|
||||
10.9.9.9
|
||||
|
||||
[brozzler-webconsole]
|
||||
10.9.9.9
|
||||
|
||||
[pywb]
|
||||
10.9.9.9
|
@ -1,14 +0,0 @@
|
||||
description "pywb"
|
||||
|
||||
start on runlevel [2345]
|
||||
stop on runlevel [!2345]
|
||||
|
||||
env PYTHONPATH=/home/vagrant/pywb-ve34/lib/python3.4/site-packages
|
||||
env PATH=/home/vagrant/pywb-ve34/bin:/usr/bin:/bin
|
||||
env PYWB_CONFIG_FILE=/etc/pywb.yml
|
||||
|
||||
setuid vagrant
|
||||
|
||||
# console log
|
||||
|
||||
exec nice brozzler-wayback >>/vagrant/logs/pywb.log 2>&1
|
@ -1,5 +0,0 @@
|
||||
runuser=vagrant
|
||||
bind=0.0.0.0
|
||||
# directory=/var/lib/rethinkdb
|
||||
# log-file=/var/log/rethinkdb.log
|
||||
log-file=/vagrant/logs/rethinkdb.log # synced dir
|
@ -1,14 +0,0 @@
|
||||
---
|
||||
# - name: start warcprox
|
||||
# environment:
|
||||
# PYTHONPATH: /home/vagrant/warcprox-ve34/lib/python3.4/site-packages
|
||||
# PATH: /home/vagrant/warcprox-ve34/bin:/usr/bin:/bin
|
||||
# args:
|
||||
# executable: /bin/bash
|
||||
# shell: nice warcprox --dir=/vagrant/warcs --base32 --gzip
|
||||
# --rollover-idle-time=180 --cacert=/vagrant/warcprox-ca.pem
|
||||
# --onion-tor-socks-proxy=localhost:9050 --rethinkdb-servers=localhost
|
||||
# --rethinkdb-big-table &> /vagrant/logs/warcprox.out &
|
||||
- name: restart warcprox
|
||||
service: name=warcprox state=restarted
|
||||
become: true
|
@ -1,26 +0,0 @@
|
||||
description "warcprox"
|
||||
|
||||
start on runlevel [2345]
|
||||
stop on runlevel [!2345]
|
||||
|
||||
env PYTHONPATH=/home/vagrant/warcprox-ve34/lib/python3.4/site-packages
|
||||
env PATH=/home/vagrant/warcprox-ve34/bin:/usr/bin:/bin
|
||||
|
||||
# by default warcprox creates some files/dirs relative to cwd
|
||||
chdir /home/vagrant
|
||||
setuid vagrant
|
||||
|
||||
# console log
|
||||
|
||||
# --profile
|
||||
exec nice warcprox \
|
||||
--dir=/vagrant/warcs \
|
||||
--base32 \
|
||||
--gzip \
|
||||
--rollover-idle-time=180 \
|
||||
--cacert=/vagrant/warcprox-ca.pem \
|
||||
--onion-tor-socks-proxy=localhost:9050 \
|
||||
--rethinkdb-servers=localhost \
|
||||
--rethinkdb-db=brozzler \
|
||||
--rethinkdb-big-table >>/vagrant/logs/warcprox.log 2>&1
|
||||
# --rethinkdb-servers={{groups['rethinkdb'] | join(',')}} \
|
Loading…
x
Reference in New Issue
Block a user