From d4f8bc768f87666d68562e5547949e57a952690c Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Mon, 18 Mar 2019 16:38:23 -0700 Subject: [PATCH 01/23] trying to make this work with xenial for travis see error https://travis-ci.org/internetarchive/brozzler/jobs/508141058 --- ansible/roles/rethinkdb/tasks/main.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ansible/roles/rethinkdb/tasks/main.yml b/ansible/roles/rethinkdb/tasks/main.yml index 774520b..b24414e 100644 --- a/ansible/roles/rethinkdb/tasks/main.yml +++ b/ansible/roles/rethinkdb/tasks/main.yml @@ -3,8 +3,9 @@ apt_key: url=http://download.rethinkdb.com/apt/pubkey.gpg become: true - name: ensure rethinkdb repo is in apt sources.list - apt_repository: repo='deb http://download.rethinkdb.com/apt trusty main' - state=present + apt_repository: + repo: 'deb http://download.rethinkdb.com/apt {{ansible_lsb.codename|lower}} main' + state: present become: true - apt: update_cache=yes become: true From 19522aff85bb3ffc092106d37b034fe439752086 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Tue, 19 Mar 2019 16:37:13 -0700 Subject: [PATCH 02/23] adjusting ansible config for xenial untested because of vagrant problems --- ansible/hosts-vagrant | 4 +- .../roles/brozzler-dashboard/tasks/main.yml | 8 ++-- .../templates/brozzler-dashboard.conf.j2 | 3 +- ansible/roles/brozzler-worker/tasks/main.yml | 39 +++++++++---------- .../templates/brozzler-worker.conf.j2 | 3 +- .../templates/vnc-websock.conf.j2 | 3 +- ansible/roles/common/tasks/main.yml | 10 ++--- ansible/roles/pywb/tasks/main.yml | 12 +++--- ansible/roles/pywb/templates/pywb.conf.j2 | 4 +- ansible/roles/warcprox/tasks/main.yml | 11 +++--- .../roles/warcprox/templates/warcprox.conf.j2 | 5 +-- 11 files changed, 47 insertions(+), 55 deletions(-) diff --git a/ansible/hosts-vagrant b/ansible/hosts-vagrant index b5a6604..7b27a61 100644 --- a/ansible/hosts-vagrant +++ b/ansible/hosts-vagrant @@ -1,7 +1,9 @@ [all:vars] warcs_dir=/vagrant/warcs -brozzler_pip_name='-e /brozzler' +# brozzler_pip_name='-e /brozzler' # not working anymore? :( +brozzler_pip_name='/brozzler' user=vagrant +ansible_python_interpreter=/usr/bin/python3 ### possible values for a prod deployment # brozzler_pip_name=brozzler # get it from pypi # brozzler_pip_name=git+https://github.com/internetarchive/brozzler.git#egg=brozzler diff --git a/ansible/roles/brozzler-dashboard/tasks/main.yml b/ansible/roles/brozzler-dashboard/tasks/main.yml index 42a7551..b341d1a 100644 --- a/ansible/roles/brozzler-dashboard/tasks/main.yml +++ b/ansible/roles/brozzler-dashboard/tasks/main.yml @@ -1,12 +1,12 @@ --- -- name: mkdir {{venv_root}}/brozzler-dashboard-ve34 - file: path={{venv_root}}/brozzler-dashboard-ve34 state=directory +- name: mkdir {{venv_root}}/brozzler-dashboard-ve3 + file: path={{venv_root}}/brozzler-dashboard-ve3 state=directory owner={{user}} become: true - name: install brozzler[dashboard] in virtualenv pip: name='{{brozzler_pip_name}}[dashboard]' - virtualenv={{venv_root}}/brozzler-dashboard-ve34 - virtualenv_python=python3.4 + virtualenv={{venv_root}}/brozzler-dashboard-ve3 + virtualenv_python=python3 extra_args='--no-input --upgrade --pre --cache-dir=/tmp/pip-cache' become: true become_user: '{{user}}' diff --git a/ansible/roles/brozzler-dashboard/templates/brozzler-dashboard.conf.j2 b/ansible/roles/brozzler-dashboard/templates/brozzler-dashboard.conf.j2 index cd8e8e0..7a8f0bb 100644 --- a/ansible/roles/brozzler-dashboard/templates/brozzler-dashboard.conf.j2 +++ b/ansible/roles/brozzler-dashboard/templates/brozzler-dashboard.conf.j2 @@ -3,8 +3,7 @@ description "brozzler-dashboard" start on runlevel [2345] stop on runlevel [!2345] -env PYTHONPATH={{venv_root}}/brozzler-dashboard-ve34/lib/python3.4/site-packages -env PATH={{venv_root}}/brozzler-dashboard-ve34/bin:/usr/bin:/bin +env PATH={{venv_root}}/brozzler-dashboard-ve3/bin:/usr/bin:/bin env LC_ALL=C.UTF-8 env WAYBACK_BASEURL=http://{{groups['pywb'][0]}}:8880/brozzler diff --git a/ansible/roles/brozzler-worker/tasks/main.yml b/ansible/roles/brozzler-worker/tasks/main.yml index deb7a92..bd9512a 100644 --- a/ansible/roles/brozzler-worker/tasks/main.yml +++ b/ansible/roles/brozzler-worker/tasks/main.yml @@ -9,8 +9,14 @@ become: true apt: name={{item}} state=present with_items: - - vnc4server - chromium-browser + - vnc4server + - libjpeg-turbo8-dev + - zlib1g-dev + - gcc + - python3-dev + - python3-dbg + - adobe-flashplugin - xfonts-base - fonts-arphic-bkai00mp - fonts-arphic-bsmi00lp @@ -24,28 +30,21 @@ - fonts-sil-padauk - fonts-unfonts-extra - fonts-unfonts-core - - ttf-indic-fonts + - fonts-indic - fonts-thai-tlwg - fonts-lklug-sinhala - - git - - libjpeg-turbo8-dev - - zlib1g-dev - - gcc - - g++ - - libpython3.4-dev - - adobe-flashplugin - name: install Xvnc upstart config /etc/init/Xvnc.conf template: src=templates/Xvnc.conf.j2 dest=/etc/init/Xvnc.conf become: true notify: - restart Xvnc -- name: mkdir {{venv_root}}/websockify-ve34 +- name: mkdir {{venv_root}}/websockify-ve3 become: true - file: path={{venv_root}}/websockify-ve34 state=directory owner={{user}} + file: path={{venv_root}}/websockify-ve3 state=directory owner={{user}} - name: install websockify in virtualenv pip: name=git+https://github.com/kanaka/websockify.git#egg=websockify - virtualenv={{venv_root}}/websockify-ve34 - virtualenv_python=python3.4 + virtualenv={{venv_root}}/websockify-ve3 + virtualenv_python=python3 extra_args='--no-input --upgrade --pre --cache-dir=/tmp/pip-cache' become: true become_user: '{{user}}' @@ -54,15 +53,15 @@ become: true notify: - restart vnc-websock -- name: mkdir {{venv_root}}/brozzler-ve34 +- name: mkdir {{venv_root}}/brozzler-ve3 become: true - file: path={{venv_root}}/brozzler-ve34 state=directory owner={{user}} + file: path={{venv_root}}/brozzler-ve3 state=directory owner={{user}} - name: install brozzler in virtualenv - pip: # name=git+https://github.com/internetarchive/brozzler.git#egg=brozzler - name='{{brozzler_pip_name}}' - virtualenv={{venv_root}}/brozzler-ve34 - virtualenv_python=python3.4 - extra_args='--no-input --upgrade --pre --cache-dir=/tmp/pip-cache' + pip: + name: '{{brozzler_pip_name}}' + virtualenv: '{{venv_root}}/brozzler-ve3' + virtualenv_python: python3 + extra_args: '--no-input --upgrade --pre --cache-dir=/tmp/pip-cache' become: true become_user: '{{user}}' notify: diff --git a/ansible/roles/brozzler-worker/templates/brozzler-worker.conf.j2 b/ansible/roles/brozzler-worker/templates/brozzler-worker.conf.j2 index 3fd73d6..5b9f711 100644 --- a/ansible/roles/brozzler-worker/templates/brozzler-worker.conf.j2 +++ b/ansible/roles/brozzler-worker/templates/brozzler-worker.conf.j2 @@ -4,8 +4,7 @@ start on runlevel [2345] stop on runlevel [!2345] env DISPLAY=:1 -env PATH={{venv_root}}/brozzler-ve34/bin:/usr/bin:/bin -env PYTHONPATH={{venv_root}}/brozzler-ve34/lib/python3.4/site-packages +env PATH={{venv_root}}/brozzler-ve3/bin:/usr/bin:/bin env LANG=C.UTF-8 setuid {{user}} diff --git a/ansible/roles/brozzler-worker/templates/vnc-websock.conf.j2 b/ansible/roles/brozzler-worker/templates/vnc-websock.conf.j2 index 2468bae..a26345d 100644 --- a/ansible/roles/brozzler-worker/templates/vnc-websock.conf.j2 +++ b/ansible/roles/brozzler-worker/templates/vnc-websock.conf.j2 @@ -7,8 +7,7 @@ setuid {{user}} console log -env PYTHONPATH={{venv_root}}/websockify-ve34/lib/python3.4/site-packages -env PATH={{venv_root}}/websockify-ve34/bin:/usr/bin:/bin +env PATH={{venv_root}}/websockify-ve3/bin:/usr/bin:/bin # port 8901 is hard-coded in brozzler/dashboard/static/partials/workers.html exec nice websockify 0.0.0.0:8901 localhost:5901 diff --git a/ansible/roles/common/tasks/main.yml b/ansible/roles/common/tasks/main.yml index 5942b86..2167ba1 100644 --- a/ansible/roles/common/tasks/main.yml +++ b/ansible/roles/common/tasks/main.yml @@ -18,22 +18,22 @@ # this clause is a workaround for travis-ci, which only wants to install in /usr # see https://travis-ci.org/internetarchive/brozzler/builds/174338601 -# but it complains that /usr/lib/python3.4/site-packages doesn't exist +# but it complains that /usr/lib/python3.5/site-packages doesn't exist # see https://travis-ci.org/internetarchive/brozzler/builds/174094831 - file: path={{item}} state=directory with_items: - - /usr/lib/python3.4/site-packages - - /usr/lib/python3.4/dist-packages + - /usr/lib/python3.5/site-packages + - /usr/lib/python3.5/dist-packages become: true - name: run "python3 setup.py install" in /tmp/pip-9.0.1 command: python3 setup.py install chdir=/tmp/pip-9.0.1 - creates=/usr/local/lib/python3.4/dist-packages/pip-9.0.1-py3.4.egg/pip/__init__.py + creates=/usr/local/lib/python3.5/dist-packages/pip-9.0.1-py3.5.egg/pip/__init__.py become: true - name: run "pip install virtualenv" command: pip install virtualenv - creates=/usr/local/lib/python3.4/dist-packages/virtualenv.py + creates=/usr/local/lib/python3.5/dist-packages/virtualenv.py become: true - command: id {{user}} register: id_user diff --git a/ansible/roles/pywb/tasks/main.yml b/ansible/roles/pywb/tasks/main.yml index 16b9ea7..7ffe49c 100644 --- a/ansible/roles/pywb/tasks/main.yml +++ b/ansible/roles/pywb/tasks/main.yml @@ -1,13 +1,13 @@ --- -- name: mkdir {{venv_root}}/pywb-ve34 - file: path={{venv_root}}/pywb-ve34 state=directory +- name: mkdir {{venv_root}}/pywb-ve3 + file: path={{venv_root}}/pywb-ve3 state=directory owner={{user}} become: true - name: install pywb in virtualenv pip: name=pywb version=0.33.2 - virtualenv={{venv_root}}/pywb-ve34 - virtualenv_python=python3.4 + virtualenv={{venv_root}}/pywb-ve3 + virtualenv_python=python3 extra_args='--no-input --upgrade --pre --cache-dir=/tmp/pip-cache' become: true become_user: '{{user}}' @@ -15,8 +15,8 @@ - restart pywb - name: install brozzler in pywb virtualenv pip: name='{{brozzler_pip_name}}' - virtualenv={{venv_root}}/pywb-ve34 - virtualenv_python=python3.4 + virtualenv={{venv_root}}/pywb-ve3 + virtualenv_python=python3 extra_args='--no-input --upgrade --pre --cache-dir=/tmp/pip-cache' become: true become_user: '{{user}}' diff --git a/ansible/roles/pywb/templates/pywb.conf.j2 b/ansible/roles/pywb/templates/pywb.conf.j2 index c2cc89e..6b3450c 100644 --- a/ansible/roles/pywb/templates/pywb.conf.j2 +++ b/ansible/roles/pywb/templates/pywb.conf.j2 @@ -3,12 +3,10 @@ description "pywb" start on runlevel [2345] stop on runlevel [!2345] -env PYTHONPATH={{venv_root}}/pywb-ve34/lib/python3.4/site-packages -env PATH={{venv_root}}/pywb-ve34/bin:/usr/bin:/bin env PYWB_CONFIG_FILE=/etc/pywb.yml setuid {{user}} console log -exec nice brozzler-wayback +exec nice {{venv_root}}/pywb-ve3/bin/python {{venv_root}}/pywb-ve3/bin/brozzler-wayback diff --git a/ansible/roles/warcprox/tasks/main.yml b/ansible/roles/warcprox/tasks/main.yml index be53dc5..74e8c59 100644 --- a/ansible/roles/warcprox/tasks/main.yml +++ b/ansible/roles/warcprox/tasks/main.yml @@ -4,19 +4,18 @@ apt: name={{item}} state=present with_items: - gcc - - python3.4 - - libpython3.4-dev + - python3-dev - libffi-dev - libssl-dev - tor - git -- name: mkdir {{venv_root}}/warcprox-ve34 +- name: mkdir {{venv_root}}/warcprox-ve3 become: true - file: path={{venv_root}}/warcprox-ve34 state=directory owner={{user}} + file: path={{venv_root}}/warcprox-ve3 state=directory owner={{user}} - name: install warcprox in virtualenv pip: name=git+https://github.com/internetarchive/warcprox.git#egg=warcprox - virtualenv={{venv_root}}/warcprox-ve34 - virtualenv_python=python3.4 + virtualenv={{venv_root}}/warcprox-ve3 + virtualenv_python=python3 extra_args='--no-input --upgrade --pre --cache-dir=/tmp/pip-cache' become: true become_user: '{{user}}' diff --git a/ansible/roles/warcprox/templates/warcprox.conf.j2 b/ansible/roles/warcprox/templates/warcprox.conf.j2 index 4d9a33d..61f36ba 100644 --- a/ansible/roles/warcprox/templates/warcprox.conf.j2 +++ b/ansible/roles/warcprox/templates/warcprox.conf.j2 @@ -3,9 +3,6 @@ description "warcprox" start on runlevel [2345] stop on runlevel [!2345] -env PYTHONPATH={{venv_root}}/warcprox-ve34/lib/python3.4/site-packages -env PATH={{venv_root}}/warcprox-ve34/bin:/usr/bin:/bin - # by default warcprox creates some files/dirs relative to cwd chdir {{work_dir}} setuid {{user}} @@ -13,7 +10,7 @@ setuid {{user}} console log # --profile -exec nice warcprox \ +exec nice {{venv_root}}/warcprox-ve3/bin/python {{venv_root}}/warcprox-ve3/bin/warcprox \ --address=0.0.0.0 \ --dir={{warcs_dir}} \ --base32 \ From 18b4a26db6ad79a2970aaf726be9f9145903d1ca Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Fri, 22 Mar 2019 23:50:46 -0700 Subject: [PATCH 03/23] porting ansible config to xenial no more upstart, switch to daemontools, among other things --- .travis.yml | 6 +- .../brozzler-dashboard/handlers/main.yml | 6 +- .../roles/brozzler-dashboard/tasks/main.yml | 27 +++++-- .../templates/brozzler-dashboard-run.j2 | 13 +++ .../templates/brozzler-dashboard.conf.j2 | 17 ---- .../roles/brozzler-worker/handlers/main.yml | 21 +++-- ansible/roles/brozzler-worker/tasks/main.yml | 57 ++++++++++--- .../brozzler-worker/templates/Xvnc-run.j2 | 14 ++++ .../brozzler-worker/templates/Xvnc.conf.j2 | 14 ---- .../templates/brozzler-worker-run.j2 | 17 ++++ .../templates/brozzler-worker.conf.j2 | 24 ------ .../templates/vnc-websock-run.j2 | 10 +++ .../templates/vnc-websock.conf.j2 | 14 ---- ansible/roles/common/tasks/main.yml | 80 +++++++++++++------ ansible/roles/pywb/handlers/main.yml | 6 +- ansible/roles/pywb/tasks/main.yml | 40 +++++++--- ansible/roles/pywb/templates/pywb-run.j2 | 10 +++ ansible/roles/pywb/templates/pywb.conf.j2 | 12 --- ansible/roles/warcprox/handlers/main.yml | 5 +- ansible/roles/warcprox/tasks/main.yml | 24 ++++-- .../templates/{warcprox.conf.j2 => run.j2} | 21 ++--- vagrant/README.rst | 10 +-- vagrant/Vagrantfile | 4 +- vagrant/run-tests.sh | 4 +- vagrant/vagrant-brozzler-new-job.py | 7 +- vagrant/vagrant-brozzler-new-site.py | 8 +- 26 files changed, 289 insertions(+), 182 deletions(-) create mode 100644 ansible/roles/brozzler-dashboard/templates/brozzler-dashboard-run.j2 delete mode 100644 ansible/roles/brozzler-dashboard/templates/brozzler-dashboard.conf.j2 create mode 100644 ansible/roles/brozzler-worker/templates/Xvnc-run.j2 delete mode 100644 ansible/roles/brozzler-worker/templates/Xvnc.conf.j2 create mode 100644 ansible/roles/brozzler-worker/templates/brozzler-worker-run.j2 delete mode 100644 ansible/roles/brozzler-worker/templates/brozzler-worker.conf.j2 create mode 100644 ansible/roles/brozzler-worker/templates/vnc-websock-run.j2 delete mode 100644 ansible/roles/brozzler-worker/templates/vnc-websock.conf.j2 create mode 100644 ansible/roles/pywb/templates/pywb-run.j2 delete mode 100644 ansible/roles/pywb/templates/pywb.conf.j2 rename ansible/roles/warcprox/templates/{warcprox.conf.j2 => run.j2} (58%) diff --git a/.travis.yml b/.travis.yml index 318ad3b..301b377 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,9 +21,9 @@ script: - DISPLAY=:1 py.test --tb=native -v tests after_failure: - chromium-browser --version -- sudo cat /var/log/upstart/warcprox.log -- sudo cat /var/log/upstart/brozzler-worker.log -- sudo cat /var/log/upstart/pywb.log +- sudo cat /var/log/warcprox.log +- sudo cat /var/log/brozzler-worker.log +- sudo cat /var/log/pywb.log notifications: slack: secure: KPPXSscXnmSEQ2NXBZFKrzDEYHg067Kv1WR7RTRUH8EIlSS9MHTyErRa7HkaRPmqOllj4vvPbplNU2ALnCfhP4cqW+MvF0xv3GuEGXQ7Om2sBvVUQ3w0JJ5rLq9ferAfGdSnQFeViqfDix5LA3fMNZGouUHQdUHq7iO8E9n9jntvkKO9Jff7Dyo0K5KvOZOJfM9KsqFZLlFO5zoNB6Y9jubIT7+Ulk3EDto/Kny34VPIyJIm7y0cHHlYLEq780AweY0EIwMyMg/VPSRrVAsbLSrilO0YRgsQpjPC9Ci/rAWNWooaOk0eA+bwv1uHQnGtH0z446XUMXr3UZ2QlD4DE/uoP2okkl8EtqvlmEyjV8eO86TqYFDRgKfYpvlK6hHtb7SAHX28QeXQjbKNc5f7KpKO5PtZqaoBRL7acLlKyS8xQGiRtonTPFSBTFR2A+s6dZmKO9dDboglptiHk4dvL1ZD4S8qLJn1JjTJqvIU6tpCY3BpNErn4n1MkDjN5nqdXf7Q9Vmui8vRetwnMf1oXcsKj9FEt2utNfDqFNXcFsN+Mnr9rhXQ1++gt/7Zo844OowiARcxqZTNy5LqSD01WgGCvNMy3Odf+FTQ8PcDOF+001+g8La1R99U0o9/hT/gy+WYk2prYneWru4pQHF/a6goZgkLTwkskcaPVpDJtDs= diff --git a/ansible/roles/brozzler-dashboard/handlers/main.yml b/ansible/roles/brozzler-dashboard/handlers/main.yml index 610c470..39c6b75 100644 --- a/ansible/roles/brozzler-dashboard/handlers/main.yml +++ b/ansible/roles/brozzler-dashboard/handlers/main.yml @@ -1,4 +1,8 @@ --- - name: restart brozzler-dashboard - service: name=brozzler-dashboard state=restarted + svc: + name: brozzler-dashboard + state: restarted + service_dir: /etc/service become: true + diff --git a/ansible/roles/brozzler-dashboard/tasks/main.yml b/ansible/roles/brozzler-dashboard/tasks/main.yml index b341d1a..db09af7 100644 --- a/ansible/roles/brozzler-dashboard/tasks/main.yml +++ b/ansible/roles/brozzler-dashboard/tasks/main.yml @@ -3,18 +3,31 @@ file: path={{venv_root}}/brozzler-dashboard-ve3 state=directory owner={{user}} become: true + - name: install brozzler[dashboard] in virtualenv - pip: name='{{brozzler_pip_name}}[dashboard]' - virtualenv={{venv_root}}/brozzler-dashboard-ve3 - virtualenv_python=python3 - extra_args='--no-input --upgrade --pre --cache-dir=/tmp/pip-cache' + pip: + name: '{{brozzler_pip_name}}[dashboard]' + virtualenv: '{{venv_root}}/brozzler-dashboard-ve3' + virtualenv_python: python3 + virtualenv_command: python3 /usr/lib/python3/dist-packages/virtualenv.py + extra_args: '--no-input --upgrade --pre --cache-dir=/tmp/pip-cache' become: true become_user: '{{user}}' notify: - restart brozzler-dashboard -- name: install upstart config /etc/init/brozzler-dashboard.conf + +- name: mkdir /etc/service/brozzler-dashboard + file: + path: /etc/service/brozzler-dashboard + state: directory become: true - template: src=templates/brozzler-dashboard.conf.j2 - dest=/etc/init/brozzler-dashboard.conf + +- name: install /etc/service/brozzler-dashboard/run + template: + src: templates/brozzler-dashboard-run.j2 + dest: /etc/service/brozzler-dashboard/run + mode: 0755 notify: - restart brozzler-dashboard + become: true + diff --git a/ansible/roles/brozzler-dashboard/templates/brozzler-dashboard-run.j2 b/ansible/roles/brozzler-dashboard/templates/brozzler-dashboard-run.j2 new file mode 100644 index 0000000..45fe737 --- /dev/null +++ b/ansible/roles/brozzler-dashboard/templates/brozzler-dashboard-run.j2 @@ -0,0 +1,13 @@ +#!/bin/bash + +logfile=/var/log/brozzler-dashboard.log +touch $logfile +chown {{user}} $logfile + +exec nice setuidgid {{user}} \ + env WAYBACK_BASEURL=http://{{groups['pywb'][0]}}:8880/brozzler \ + RETHINKDB_SERVERS={{groups['rethinkdb'] | join(',')}} \ + RETHINKDB_DB=brozzler LANG=en_US.UTF-8 LC_COLLATE=C \ + gunicorn --bind=0.0.0.0:8881 brozzler.dashboard:app \ + >> $logfile 2>&1 + diff --git a/ansible/roles/brozzler-dashboard/templates/brozzler-dashboard.conf.j2 b/ansible/roles/brozzler-dashboard/templates/brozzler-dashboard.conf.j2 deleted file mode 100644 index 7a8f0bb..0000000 --- a/ansible/roles/brozzler-dashboard/templates/brozzler-dashboard.conf.j2 +++ /dev/null @@ -1,17 +0,0 @@ -description "brozzler-dashboard" - -start on runlevel [2345] -stop on runlevel [!2345] - -env PATH={{venv_root}}/brozzler-dashboard-ve3/bin:/usr/bin:/bin -env LC_ALL=C.UTF-8 - -env WAYBACK_BASEURL=http://{{groups['pywb'][0]}}:8880/brozzler -env RETHINKDB_SERVERS={{groups['rethinkdb'] | join(',')}} -env RETHINKDB_DB=brozzler - -setuid {{user}} - -console log - -exec gunicorn --bind=0.0.0.0:8881 brozzler.dashboard:app diff --git a/ansible/roles/brozzler-worker/handlers/main.yml b/ansible/roles/brozzler-worker/handlers/main.yml index 1fac304..e139240 100644 --- a/ansible/roles/brozzler-worker/handlers/main.yml +++ b/ansible/roles/brozzler-worker/handlers/main.yml @@ -1,13 +1,22 @@ --- - name: restart Xvnc - service: name=Xvnc state=restarted - become: true -- name: restart websockify - service: name=websockify state=restarted + svc: + name: Xvnc + state: restarted + service_dir: /etc/service become: true + - name: restart vnc-websock - service: name=vnc-websock state=restarted + svc: + name: vnc-websock + state: restarted + service_dir: /etc/service become: true + - name: restart brozzler-worker - service: name=brozzler-worker state=restarted + svc: + name: brozzler-worker + state: restarted + service_dir: /etc/service become: true + diff --git a/ansible/roles/brozzler-worker/tasks/main.yml b/ansible/roles/brozzler-worker/tasks/main.yml index bd9512a..ebf5d2d 100644 --- a/ansible/roles/brozzler-worker/tasks/main.yml +++ b/ansible/roles/brozzler-worker/tasks/main.yml @@ -3,8 +3,10 @@ apt_repository: repo='deb http://archive.canonical.com/ubuntu trusty partner' state=present become: true + - apt: update_cache=yes become: true + - name: ensure required packages are installed become: true apt: name={{item}} state=present @@ -33,41 +35,72 @@ - fonts-indic - fonts-thai-tlwg - fonts-lklug-sinhala -- name: install Xvnc upstart config /etc/init/Xvnc.conf - template: src=templates/Xvnc.conf.j2 dest=/etc/init/Xvnc.conf + +- name: mkdir /etc/service/warcprox + file: + path: '/etc/service/{{item}}' + state: directory + with_items: + - Xvnc + - websockify + - vnc-websock + - brozzler-worker become: true + +- name: install /etc/service/Xvnc/run + template: + src: templates/Xvnc-run.j2 + dest: /etc/service/Xvnc/run + mode: 0755 notify: - restart Xvnc + become: true + - name: mkdir {{venv_root}}/websockify-ve3 become: true file: path={{venv_root}}/websockify-ve3 state=directory owner={{user}} + - name: install websockify in virtualenv - pip: name=git+https://github.com/kanaka/websockify.git#egg=websockify - virtualenv={{venv_root}}/websockify-ve3 - virtualenv_python=python3 - extra_args='--no-input --upgrade --pre --cache-dir=/tmp/pip-cache' + pip: + name: git+https://github.com/kanaka/websockify.git#egg=websockify + virtualenv: '{{venv_root}}/websockify-ve3' + virtualenv_python: python3 + virtualenv_command: python3 /usr/lib/python3/dist-packages/virtualenv.py + extra_args: '--no-input --upgrade --pre --cache-dir=/tmp/pip-cache' become: true become_user: '{{user}}' -- name: install vnc-websock upstart config /etc/init/vnc-websock.conf - template: src=templates/vnc-websock.conf.j2 dest=/etc/init/vnc-websock.conf - become: true + +- name: install /etc/service/vnc-websock/run + template: + src: templates/vnc-websock-run.j2 + dest: /etc/service/vnc-websock/run + mode: 0755 notify: - restart vnc-websock + become: true + - name: mkdir {{venv_root}}/brozzler-ve3 become: true file: path={{venv_root}}/brozzler-ve3 state=directory owner={{user}} + - name: install brozzler in virtualenv pip: name: '{{brozzler_pip_name}}' virtualenv: '{{venv_root}}/brozzler-ve3' virtualenv_python: python3 + virtualenv_command: python3 /usr/lib/python3/dist-packages/virtualenv.py extra_args: '--no-input --upgrade --pre --cache-dir=/tmp/pip-cache' become: true become_user: '{{user}}' notify: - restart brozzler-worker -- name: install brozzler-worker upstart config /etc/init/brozzler-worker.conf - template: src=templates/brozzler-worker.conf.j2 dest=/etc/init/brozzler-worker.conf - become: true + +- name: install /etc/service/brozzler-worker/run + template: + src: templates/brozzler-worker-run.j2 + dest: /etc/service/brozzler-worker/run + mode: 0755 notify: - restart brozzler-worker + become: true + diff --git a/ansible/roles/brozzler-worker/templates/Xvnc-run.j2 b/ansible/roles/brozzler-worker/templates/Xvnc-run.j2 new file mode 100644 index 0000000..e8d573d --- /dev/null +++ b/ansible/roles/brozzler-worker/templates/Xvnc-run.j2 @@ -0,0 +1,14 @@ +#!/bin/bash + +cd /tmp + +logfile=/var/log/Xvnc.log +touch $logfile +chown {{user}} $logfile + +exec nice setuidgid {{user}} Xvnc4 :1 -auth /tmp/Xauthority.{{user}} \ + -geometry 1600x1000 -depth 24 -rfbwait 0 -nolisten tcp -rfbport 5901 \ + -SecurityTypes None -pn -fp /usr/share/fonts/X11/misc/ -co /etc/X11/rgb \ + AcceptCutText=0 AcceptPointerEvents=0 AcceptKeyEvents=0 \ + >> $logfile 2>&1 + diff --git a/ansible/roles/brozzler-worker/templates/Xvnc.conf.j2 b/ansible/roles/brozzler-worker/templates/Xvnc.conf.j2 deleted file mode 100644 index 57ece99..0000000 --- a/ansible/roles/brozzler-worker/templates/Xvnc.conf.j2 +++ /dev/null @@ -1,14 +0,0 @@ -description "Xvnc" - -start on runlevel [2345] -stop on runlevel [!2345] - -setuid {{user}} - -console log - -exec nice Xvnc4 :1 -auth /tmp/Xauthority.{{user}} \ - -geometry 1600x1000 -depth 24 -rfbwait 0 -nolisten tcp -rfbport 5901 \ - -SecurityTypes None -pn -fp /usr/share/fonts/X11/misc/ -co /etc/X11/rgb \ - AcceptCutText=0 AcceptPointerEvents=0 AcceptKeyEvents=0 - diff --git a/ansible/roles/brozzler-worker/templates/brozzler-worker-run.j2 b/ansible/roles/brozzler-worker/templates/brozzler-worker-run.j2 new file mode 100644 index 0000000..9889ef7 --- /dev/null +++ b/ansible/roles/brozzler-worker/templates/brozzler-worker-run.j2 @@ -0,0 +1,17 @@ +#!/bin/bash + +logfile=/var/log/brozzler-worker.log +touch $logfile +chown {{user}} $logfile + +source {{venv_root}}/brozzler-ve3/bin/activate + +exec nice setuidgid {{user}} \ + env LANG=en_US.UTF-8 LC_COLLATE=C \ + brozzler-worker \ + --rethinkdb-servers={{groups['rethinkdb'] | join(',')}} \ + --max-browsers=4 \ + --verbose \ + --warcprox-auto \ + >> $logfile 2>&1 + diff --git a/ansible/roles/brozzler-worker/templates/brozzler-worker.conf.j2 b/ansible/roles/brozzler-worker/templates/brozzler-worker.conf.j2 deleted file mode 100644 index 5b9f711..0000000 --- a/ansible/roles/brozzler-worker/templates/brozzler-worker.conf.j2 +++ /dev/null @@ -1,24 +0,0 @@ -description "brozzler-worker" - -start on runlevel [2345] -stop on runlevel [!2345] - -env DISPLAY=:1 -env PATH={{venv_root}}/brozzler-ve3/bin:/usr/bin:/bin -env LANG=C.UTF-8 - -setuid {{user}} - -console log - -# depends on vnc server -start on started Xvnc -stop on stopping Xvnc - -kill timeout 60 - -exec nice brozzler-worker \ - --rethinkdb-servers={{groups['rethinkdb'] | join(',')}} \ - --max-browsers=4 \ - --verbose \ - --warcprox-auto diff --git a/ansible/roles/brozzler-worker/templates/vnc-websock-run.j2 b/ansible/roles/brozzler-worker/templates/vnc-websock-run.j2 new file mode 100644 index 0000000..522c125 --- /dev/null +++ b/ansible/roles/brozzler-worker/templates/vnc-websock-run.j2 @@ -0,0 +1,10 @@ +#!/bin/bash + +logfile=/var/log/vnc-websock.log +touch $logfile +chown {{user}} $logfile + +source /opt/websockify-ve3/bin/activate + +exec nice setuidgid {{user}} websockify 0.0.0.0:8901 localhost:5901 >> $logfile 2>&1 + diff --git a/ansible/roles/brozzler-worker/templates/vnc-websock.conf.j2 b/ansible/roles/brozzler-worker/templates/vnc-websock.conf.j2 deleted file mode 100644 index a26345d..0000000 --- a/ansible/roles/brozzler-worker/templates/vnc-websock.conf.j2 +++ /dev/null @@ -1,14 +0,0 @@ -description "vnc-websock" - -start on runlevel [2345] -stop on runlevel [!2345] - -setuid {{user}} - -console log - -env PATH={{venv_root}}/websockify-ve3/bin:/usr/bin:/bin - -# port 8901 is hard-coded in brozzler/dashboard/static/partials/workers.html -exec nice websockify 0.0.0.0:8901 localhost:5901 - diff --git a/ansible/roles/common/tasks/main.yml b/ansible/roles/common/tasks/main.yml index 2167ba1..6ff90a2 100644 --- a/ansible/roles/common/tasks/main.yml +++ b/ansible/roles/common/tasks/main.yml @@ -1,44 +1,74 @@ --- -# get latest pip (had problems with version from apt-get, specifically -# "pip install pyopenssl" did not install the dependency "cryptography") -# http://stackoverflow.com/questions/34587473/what-is-get-pip-py-checksum-where-can-i-get-it-for-sure -- name: install setuptools for python 2 and 3 +- apt: + name: + - python3-setuptools + - python3-pip + - python3-virtualenv + - daemontools + - daemontools-run + state: present + update_cache: yes + cache_valid_time: 86400 # one day become: true - apt: name={{item}} state=present - with_items: - - python-setuptools - - python3-setuptools -- name: download pip-9.0.1.tar.gz - get_url: - url: https://pypi.python.org/packages/11/b6/abcb525026a4be042b486df43905d6893fb04f05aac21c32c638e939e447/pip-9.0.1.tar.gz - dest: /tmp - checksum: sha1:57ff41e99cb01b6a1c2b0999161589b726f0ec8b -- name: extract pip-9.0.1.tar.gz - unarchive: src=/tmp/pip-9.0.1.tar.gz dest=/tmp copy=no + +# # get recent virtualenv, which bundles a recent pip +# - find: +# paths: +# - /usr/local/lib/python3.4/dist-packages +# - /usr/local/lib/python3.5/dist-packages +# recurse: true +# patterns: virtualenv.py +# contains: '__version__ = "16.4.3"' +# register: virtualenv_py_16_4_3 +# +# - command: mktemp -d +# register: mktempd_out +# when: virtualenv_py_16_4_3.matched == 0 +# +# - name: download virtualenv-16.4.3 +# get_url: +# url: https://files.pythonhosted.org/packages/37/db/89d6b043b22052109da35416abc3c397655e4bd3cff031446ba02b9654fa/virtualenv-16.4.3.tar.gz +# dest: '{{mktempd_out.stdout}}' +# checksum: sha256:984d7e607b0a5d1329425dd8845bd971b957424b5ba664729fab51ab8c11bc39 +# when: virtualenv_py_16_4_3.matched == 0 +# +# - name: extract virtualenv-16.4.3.tar.gz +# unarchive: +# src: '{{mktempd_out.stdout}}/virtualenv-16.4.3.tar.gz' +# dest: '{{mktempd_out.stdout}}' +# copy: no +# when: virtualenv_py_16_4_3.matched == 0 +# +# - name: run "python3 setup.py install" in {{mktempd_out.stdout}}/virtualenv-16.4.3 +# become: true +# command: python3 setup.py install +# args: +# chdir: '{{mktempd_out.stdout}}/virtualenv-16.4.3' +# when: virtualenv_py_16_4_3.matched == 0 +# +# - file: +# path: '{{mktempd_out.stdout}}' +# state: absent +# become: true +# when: virtualenv_py_16_4_3.matched == 0 # this clause is a workaround for travis-ci, which only wants to install in /usr # see https://travis-ci.org/internetarchive/brozzler/builds/174338601 # but it complains that /usr/lib/python3.5/site-packages doesn't exist # see https://travis-ci.org/internetarchive/brozzler/builds/174094831 -- file: path={{item}} state=directory +- file: + path: '{{item}}' + state: directory with_items: - /usr/lib/python3.5/site-packages - /usr/lib/python3.5/dist-packages become: true -- name: run "python3 setup.py install" in /tmp/pip-9.0.1 - command: python3 setup.py install - chdir=/tmp/pip-9.0.1 - creates=/usr/local/lib/python3.5/dist-packages/pip-9.0.1-py3.5.egg/pip/__init__.py - become: true -- name: run "pip install virtualenv" - command: pip install virtualenv - creates=/usr/local/lib/python3.5/dist-packages/virtualenv.py - become: true - command: id {{user}} register: id_user ignore_errors: true changed_when: false + - name: ensure service user {{user}} exists user: name={{user}} system=yes createhome=no home=/nonexistent shell=/usr/sbin/nologin diff --git a/ansible/roles/pywb/handlers/main.yml b/ansible/roles/pywb/handlers/main.yml index 4424b3e..744d30a 100644 --- a/ansible/roles/pywb/handlers/main.yml +++ b/ansible/roles/pywb/handlers/main.yml @@ -1,5 +1,9 @@ --- - name: restart pywb - service: name=pywb state=restarted + svc: + name: pywb + state: restarted + service_dir: /etc/service become: true + diff --git a/ansible/roles/pywb/tasks/main.yml b/ansible/roles/pywb/tasks/main.yml index 7ffe49c..532ad4c 100644 --- a/ansible/roles/pywb/tasks/main.yml +++ b/ansible/roles/pywb/tasks/main.yml @@ -3,34 +3,50 @@ file: path={{venv_root}}/pywb-ve3 state=directory owner={{user}} become: true + - name: install pywb in virtualenv - pip: name=pywb - version=0.33.2 - virtualenv={{venv_root}}/pywb-ve3 - virtualenv_python=python3 - extra_args='--no-input --upgrade --pre --cache-dir=/tmp/pip-cache' + pip: + name: pywb + version: 0.33.2 + virtualenv: '{{venv_root}}/pywb-ve3' + virtualenv_python: python3 + virtualenv_command: python3 /usr/lib/python3/dist-packages/virtualenv.py + extra_args: '--no-input --upgrade --pre --cache-dir=/tmp/pip-cache' become: true become_user: '{{user}}' notify: - restart pywb + - name: install brozzler in pywb virtualenv - pip: name='{{brozzler_pip_name}}' - virtualenv={{venv_root}}/pywb-ve3 - virtualenv_python=python3 - extra_args='--no-input --upgrade --pre --cache-dir=/tmp/pip-cache' + pip: + name: '{{brozzler_pip_name}}' + virtualenv: '{{venv_root}}/pywb-ve3' + virtualenv_python: python3 + virtualenv_command: python3 /usr/lib/python3/dist-packages/virtualenv.py + extra_args: '--no-input --upgrade --pre --cache-dir=/tmp/pip-cache' become: true become_user: '{{user}}' notify: - restart pywb + - name: pywb config file /etc/pywb.yml template: src=templates/pywb.yml.j2 dest=/etc/pywb.yml become: true notify: - restart pywb -- name: upstart config file /etc/init/pywb.conf - template: src=templates/pywb.conf.j2 - dest=/etc/init/pywb.conf + +- name: mkdir /etc/service/pywb + file: + path: /etc/service/pywb + state: directory become: true + +- name: install /etc/service/pywb/run + template: + src: templates/pywb-run.j2 + dest: /etc/service/pywb/run + mode: 0755 notify: - restart pywb + become: true diff --git a/ansible/roles/pywb/templates/pywb-run.j2 b/ansible/roles/pywb/templates/pywb-run.j2 new file mode 100644 index 0000000..26a40f9 --- /dev/null +++ b/ansible/roles/pywb/templates/pywb-run.j2 @@ -0,0 +1,10 @@ +#!/bin/bash + +logfile=/var/log/pywb.log +touch $logfile +chown {{user}} $logfile + +exec nice setuidgid {{user}} env PYWB_CONFIG_FILE=/etc/pywb.yml \ + {{venv_root}}/pywb-ve3/bin/python {{venv_root}}/pywb-ve3/bin/brozzler-wayback \ + >> $logfile 2>&1 + diff --git a/ansible/roles/pywb/templates/pywb.conf.j2 b/ansible/roles/pywb/templates/pywb.conf.j2 deleted file mode 100644 index 6b3450c..0000000 --- a/ansible/roles/pywb/templates/pywb.conf.j2 +++ /dev/null @@ -1,12 +0,0 @@ -description "pywb" - -start on runlevel [2345] -stop on runlevel [!2345] - -env PYWB_CONFIG_FILE=/etc/pywb.yml - -setuid {{user}} - -console log - -exec nice {{venv_root}}/pywb-ve3/bin/python {{venv_root}}/pywb-ve3/bin/brozzler-wayback diff --git a/ansible/roles/warcprox/handlers/main.yml b/ansible/roles/warcprox/handlers/main.yml index 0b7edcd..8fbb3c9 100644 --- a/ansible/roles/warcprox/handlers/main.yml +++ b/ansible/roles/warcprox/handlers/main.yml @@ -1,4 +1,7 @@ --- - name: restart warcprox - service: name=warcprox state=restarted + svc: + name: warcprox + state: restarted + service_dir: /etc/service become: true diff --git a/ansible/roles/warcprox/tasks/main.yml b/ansible/roles/warcprox/tasks/main.yml index 74e8c59..8027606 100644 --- a/ansible/roles/warcprox/tasks/main.yml +++ b/ansible/roles/warcprox/tasks/main.yml @@ -13,16 +13,28 @@ become: true file: path={{venv_root}}/warcprox-ve3 state=directory owner={{user}} - name: install warcprox in virtualenv - pip: name=git+https://github.com/internetarchive/warcprox.git#egg=warcprox - virtualenv={{venv_root}}/warcprox-ve3 - virtualenv_python=python3 - extra_args='--no-input --upgrade --pre --cache-dir=/tmp/pip-cache' + pip: + name: git+https://github.com/internetarchive/warcprox.git#egg=warcprox + virtualenv: '{{venv_root}}/warcprox-ve3' + virtualenv_python: python3 + extra_args: --no-input --upgrade --pre --cache-dir=/tmp/pip-cache + virtualenv_command: python3 /usr/lib/python3/dist-packages/virtualenv.py become: true become_user: '{{user}}' notify: - restart warcprox -- name: install upstart config /etc/init/warcprox.conf + +- name: mkdir /etc/service/warcprox + file: + path: /etc/service/warcprox + state: directory become: true - template: src=templates/warcprox.conf.j2 dest=/etc/init/warcprox.conf + +- name: install /etc/service/warcprox/run + template: + src: templates/run.j2 + dest: /etc/service/warcprox/run + mode: 0755 notify: - restart warcprox + become: true diff --git a/ansible/roles/warcprox/templates/warcprox.conf.j2 b/ansible/roles/warcprox/templates/run.j2 similarity index 58% rename from ansible/roles/warcprox/templates/warcprox.conf.j2 rename to ansible/roles/warcprox/templates/run.j2 index 61f36ba..30cd173 100644 --- a/ansible/roles/warcprox/templates/warcprox.conf.j2 +++ b/ansible/roles/warcprox/templates/run.j2 @@ -1,16 +1,15 @@ -description "warcprox" +#!/bin/bash -start on runlevel [2345] -stop on runlevel [!2345] -# by default warcprox creates some files/dirs relative to cwd -chdir {{work_dir}} -setuid {{user}} +logfile=/var/log/warcprox.log +touch $logfile +chown {{user}} $logfile -console log +ulimit -n 4096 -# --profile -exec nice {{venv_root}}/warcprox-ve3/bin/python {{venv_root}}/warcprox-ve3/bin/warcprox \ +source {{venv_root}}/warcprox-ve3/bin/activate + +exec nice -n5 setuidgid {{user}} env LANG=en_US.UTF-8 LC_COLLATE=C warcprox \ --address=0.0.0.0 \ --dir={{warcs_dir}} \ --base32 \ @@ -19,4 +18,6 @@ exec nice {{venv_root}}/warcprox-ve3/bin/python {{venv_root}}/warcprox-ve3/bin/w --onion-tor-socks-proxy=localhost:9050 \ --rethinkdb-services-url=rethinkdb://{{groups['rethinkdb']|join(',')}}/brozzler/services \ --rethinkdb-stats-url=rethinkdb://{{groups['rethinkdb']|join(',')}}/brozzler/stats \ - --rethinkdb-big-table-url=rethinkdb://{{groups['rethinkdb']|join(',')}}/brozzler/captures + --rethinkdb-big-table-url=rethinkdb://{{groups['rethinkdb']|join(',')}}/brozzler/captures \ + >> $logfile 2>&1 + diff --git a/vagrant/README.rst b/vagrant/README.rst index fdb96bc..8b54e59 100644 --- a/vagrant/README.rst +++ b/vagrant/README.rst @@ -24,27 +24,27 @@ the brozzler virtualenv. :: my-laptop$ vagrant ssh - vagrant@brzl:~$ source /opt/brozzler-ve34/bin/activate - (brozzler-ve34)vagrant@brzl:~$ + vagrant@brzl:~$ source /opt/brozzler-ve3/bin/activate + (brozzler-ve3)vagrant@brzl:~$ Then you can run brozzler-new-site: :: - (brozzler-ve34)vagrant@brzl:~$ brozzler-new-site --proxy=localhost:8000 http://example.com/ + (brozzler-ve3)vagrant@brzl:~$ brozzler-new-site --proxy=localhost:8000 http://example.com/ Or brozzler-new-job (make sure to set the proxy to localhost:8000): :: - (brozzler-ve34)vagrant@brzl:~$ cat >job1.yml <job1.yml <=2.1b1.dev86"' -vagrant ssh -- "source /opt/brozzler-ve34/bin/activate && DISPLAY=:1 py.test -v /brozzler/tests $@" +vagrant ssh -- 'set -x ; source /opt/brozzler-ve3/bin/activate && pip install pytest && pip install --upgrade --pre "warcprox>=2.1b1.dev86"' +vagrant ssh -- "source /opt/brozzler-ve3/bin/activate && DISPLAY=:1 py.test -v /brozzler/tests $@" diff --git a/vagrant/vagrant-brozzler-new-job.py b/vagrant/vagrant-brozzler-new-job.py index 454e45a..c75d075 100755 --- a/vagrant/vagrant-brozzler-new-job.py +++ b/vagrant/vagrant-brozzler-new-job.py @@ -7,7 +7,7 @@ This is a standalone script with no dependencies other than python, and should work with python 2.7 or python 3.2+. The only reason it's not a bash script is so we can use the argparse library. -Copyright (C) 2016 Internet Archive +Copyright (C) 2016-2019 Internet Archive Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -41,9 +41,8 @@ def main(argv=[]): subprocess.call([ 'vagrant', 'ssh', '--', 'f=`mktemp` && cat > $f && ' - 'PYTHONPATH=/home/vagrant/brozzler-ve34/lib/python3.4/site-packages ' - '/home/vagrant/brozzler-ve34/bin/python ' - '/home/vagrant/brozzler-ve34/bin/brozzler-new-job $f'], + '/home/vagrant/brozzler-ve3/bin/python ' + '/home/vagrant/brozzler-ve3/bin/brozzler-new-job $f'], stdin=f) if __name__ == '__main__': diff --git a/vagrant/vagrant-brozzler-new-site.py b/vagrant/vagrant-brozzler-new-site.py index 99401c5..158095e 100755 --- a/vagrant/vagrant-brozzler-new-site.py +++ b/vagrant/vagrant-brozzler-new-site.py @@ -74,11 +74,9 @@ def main(argv=[]): os.chdir(os.path.dirname(__file__)) cmd = ( - 'PYTHONPATH=/home/vagrant/brozzler-ve34/lib/python3.4/site-packages ' - '/home/vagrant/brozzler-ve34/bin/python ' - '/home/vagrant/brozzler-ve34/bin/brozzler-new-site ' - '--proxy=localhost:8000 %s %s') % ( - ' '.join(options), args.seed) + '/home/vagrant/brozzler-ve3/bin/python ' + '/home/vagrant/brozzler-ve3/bin/brozzler-new-site ' + '--proxy=localhost:8000 %s %s') % (' '.join(options), args.seed) subprocess.call(['vagrant', 'ssh', '--', cmd]) if __name__ == '__main__': From 48bb03418ffaedddbc28066b6b734558a261cc33 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Sat, 23 Mar 2019 00:26:39 -0700 Subject: [PATCH 04/23] daemontools --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 301b377..f6cda55 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,7 +16,7 @@ install: - sudo apt-get update - sudo apt-get install --only-upgrade chromium-browser - chromium-browser --version -- sudo service brozzler-worker restart +- sudo svc -t /etc/service/brozzler-worker script: - DISPLAY=:1 py.test --tb=native -v tests after_failure: From 9c658cddf7693d19d733daa5e483d96c270f966b Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Sun, 24 Mar 2019 16:06:36 -0700 Subject: [PATCH 05/23] fix a couple of svc definitions --- .../brozzler-dashboard/templates/brozzler-dashboard-run.j2 | 2 ++ ansible/roles/warcprox/templates/run.j2 | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/ansible/roles/brozzler-dashboard/templates/brozzler-dashboard-run.j2 b/ansible/roles/brozzler-dashboard/templates/brozzler-dashboard-run.j2 index 45fe737..da40564 100644 --- a/ansible/roles/brozzler-dashboard/templates/brozzler-dashboard-run.j2 +++ b/ansible/roles/brozzler-dashboard/templates/brozzler-dashboard-run.j2 @@ -4,6 +4,8 @@ logfile=/var/log/brozzler-dashboard.log touch $logfile chown {{user}} $logfile +source /opt/brozzler-dashboard-ve3/bin/activate + exec nice setuidgid {{user}} \ env WAYBACK_BASEURL=http://{{groups['pywb'][0]}}:8880/brozzler \ RETHINKDB_SERVERS={{groups['rethinkdb'] | join(',')}} \ diff --git a/ansible/roles/warcprox/templates/run.j2 b/ansible/roles/warcprox/templates/run.j2 index 30cd173..161c79f 100644 --- a/ansible/roles/warcprox/templates/run.j2 +++ b/ansible/roles/warcprox/templates/run.j2 @@ -1,12 +1,13 @@ #!/bin/bash - logfile=/var/log/warcprox.log touch $logfile chown {{user}} $logfile ulimit -n 4096 +cd {{work_dir}} + source {{venv_root}}/warcprox-ve3/bin/activate exec nice -n5 setuidgid {{user}} env LANG=en_US.UTF-8 LC_COLLATE=C warcprox \ From 85c6ac0ab208e91c8ed573626428a8695e759747 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Tue, 2 Apr 2019 12:05:08 -0700 Subject: [PATCH 06/23] fix next travis-ci problem --- ansible/roles/brozzler-worker/tasks/main.yml | 3 +-- tests/test_cluster.py | 4 ++-- vagrant/run-tests.sh | 10 +++++----- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/ansible/roles/brozzler-worker/tasks/main.yml b/ansible/roles/brozzler-worker/tasks/main.yml index ebf5d2d..4945052 100644 --- a/ansible/roles/brozzler-worker/tasks/main.yml +++ b/ansible/roles/brozzler-worker/tasks/main.yml @@ -36,13 +36,12 @@ - fonts-thai-tlwg - fonts-lklug-sinhala -- name: mkdir /etc/service/warcprox +- name: mkdir /etc/service/{Xvnc,vnc-websock,brozzler-worker} file: path: '/etc/service/{{item}}' state: directory with_items: - Xvnc - - websockify - vnc-websock - brozzler-worker become: true diff --git a/tests/test_cluster.py b/tests/test_cluster.py index f5007a1..13b3c66 100644 --- a/tests/test_cluster.py +++ b/tests/test_cluster.py @@ -35,10 +35,10 @@ import logging import warcprox def start_service(service): - subprocess.check_call(['sudo', 'service', service, 'start']) + subprocess.check_call(['sudo', 'svc', '-u', '/etc/service/' + service]) def stop_service(service): - subprocess.check_call(['sudo', 'service', service, 'stop']) + subprocess.check_call(['sudo', 'svc', '-d', '/etc/service/' + service]) @pytest.fixture(scope='module') def httpd(request): diff --git a/vagrant/run-tests.sh b/vagrant/run-tests.sh index 6ef8022..515b37b 100755 --- a/vagrant/run-tests.sh +++ b/vagrant/run-tests.sh @@ -10,11 +10,11 @@ cd $(dirname "${BASH_SOURCE[0]}") vagrant up echo service status: -vagrant ssh -- 'status warcprox ; - status Xvnc ; - status brozzler-worker ; - status brozzler-dashboard ; - status vnc-websock' +vagrant ssh -- 'sudo svcstat /etc/service/warcprox ; + sudo svcstat /etc/service/Xvnc ; + sudo svcstat /etc/service/brozzler-worker ; + sudo svcstat /etc/service/brozzler-dashboard ; + sudo svcstat /etc/service/vnc-websock' echo vagrant ssh -- 'set -x ; source /opt/brozzler-ve3/bin/activate && pip install pytest && pip install --upgrade --pre "warcprox>=2.1b1.dev86"' From 68ce9eac762d375b1e73b1a744b3d595e2684042 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Tue, 2 Apr 2019 13:05:36 -0700 Subject: [PATCH 07/23] debugging travis-ci is a slow process --- .travis.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index f6cda55..9f5b6b6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,8 +17,11 @@ install: - sudo apt-get install --only-upgrade chromium-browser - chromium-browser --version - sudo svc -t /etc/service/brozzler-worker +- sleep 10 +- sudo cat /var/log/brozzler-worker.log +- sudo cat /var/log/warcprox.log script: -- DISPLAY=:1 py.test --tb=native -v tests +- DISPLAY=:1 py.test --tb=native -x -v tests after_failure: - chromium-browser --version - sudo cat /var/log/warcprox.log From 9459ed40d02e2c8ce23487feec745d6f2f9a8288 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 4 Apr 2019 12:38:41 -0700 Subject: [PATCH 08/23] fix typo --- vagrant/run-tests.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vagrant/run-tests.sh b/vagrant/run-tests.sh index 515b37b..b1ff00b 100755 --- a/vagrant/run-tests.sh +++ b/vagrant/run-tests.sh @@ -10,11 +10,11 @@ cd $(dirname "${BASH_SOURCE[0]}") vagrant up echo service status: -vagrant ssh -- 'sudo svcstat /etc/service/warcprox ; - sudo svcstat /etc/service/Xvnc ; - sudo svcstat /etc/service/brozzler-worker ; - sudo svcstat /etc/service/brozzler-dashboard ; - sudo svcstat /etc/service/vnc-websock' +vagrant ssh -- 'sudo svstat /etc/service/warcprox ; + sudo svstat /etc/service/Xvnc ; + sudo svstat /etc/service/brozzler-worker ; + sudo svstat /etc/service/brozzler-dashboard ; + sudo svstat /etc/service/vnc-websock' echo vagrant ssh -- 'set -x ; source /opt/brozzler-ve3/bin/activate && pip install pytest && pip install --upgrade --pre "warcprox>=2.1b1.dev86"' From 899794f2dafb55ac451c061feb3953d0cfb6e6e4 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 4 Apr 2019 12:38:46 -0700 Subject: [PATCH 09/23] debug what's going on with chromium in travis see https://travis-ci.org/internetarchive/brozzler/jobs/514858838 (unroll "sudo cat /var/log/brozzler-worker.log") 2019-04-02 20:16:01,792 18595 CRITICAL BrozzlingThread:42073 brozzler.worker.BrozzlerWorker.brozzle_site(worker.py:412) unexpected exception Traceback (most recent call last): File "/opt/brozzler-ve3/lib/python3.6/site-packages/brozzler/worker.py", line 379, in brozzle_site enable_youtube_dl=not self._skip_youtube_dl) File "/opt/brozzler-ve3/lib/python3.6/site-packages/brozzler/worker.py", line 215, in brozzle_page browser, site, page, on_screenshot, on_request) File "/opt/brozzler-ve3/lib/python3.6/site-packages/brozzler/worker.py", line 292, in _browse_page cookie_db=site.get('cookie_db')) File "/opt/brozzler-ve3/lib/python3.6/site-packages/brozzler/browser.py", line 341, in start self.websock_url = self.chrome.start(**kwargs) File "/opt/brozzler-ve3/lib/python3.6/site-packages/brozzler/chrome.py", line 200, in start return self._websocket_url() File "/opt/brozzler-ve3/lib/python3.6/site-packages/brozzler/chrome.py", line 247, in _websocket_url raise e Exception: chrome process died with status 1 --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 9f5b6b6..1f5dcd4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,6 +16,7 @@ install: - sudo apt-get update - sudo apt-get install --only-upgrade chromium-browser - chromium-browser --version +- timeout 20 chromium-browser - sudo svc -t /etc/service/brozzler-worker - sleep 10 - sudo cat /var/log/brozzler-worker.log From 8303fd3ab357ccb4ff10aed3e1a65e067e452482 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 4 Apr 2019 12:50:50 -0700 Subject: [PATCH 10/23] guessing DISPLAY was the issue here https://travis-ci.org/internetarchive/brozzler/jobs/515882174#L610 --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 1f5dcd4..952e7b2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,7 +16,7 @@ install: - sudo apt-get update - sudo apt-get install --only-upgrade chromium-browser - chromium-browser --version -- timeout 20 chromium-browser +- DISPLAY=:1 timeout 20 chromium-browser - sudo svc -t /etc/service/brozzler-worker - sleep 10 - sudo cat /var/log/brozzler-worker.log From 45ac12117a0b5fc0e9da010ada106dd5425edb5e Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 4 Apr 2019 13:09:02 -0700 Subject: [PATCH 11/23] maybe Xvnc.log will tell us something --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 952e7b2..bc5b0e2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,6 +16,7 @@ install: - sudo apt-get update - sudo apt-get install --only-upgrade chromium-browser - chromium-browser --version +- sudo cat /var/log/Xvnc.log - DISPLAY=:1 timeout 20 chromium-browser - sudo svc -t /etc/service/brozzler-worker - sleep 10 From 0d46d8ce19a1fd2e5fe658ace4c9471edd7f98f3 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 4 Apr 2019 13:15:17 -0700 Subject: [PATCH 12/23] still trying to figure out what's up with chromium --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index bc5b0e2..8ec77f1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,8 +16,9 @@ install: - sudo apt-get update - sudo apt-get install --only-upgrade chromium-browser - chromium-browser --version +- ps -fHe - sudo cat /var/log/Xvnc.log -- DISPLAY=:1 timeout 20 chromium-browser +- DISPLAY=:1 time timeout 20 chromium-browser - sudo svc -t /etc/service/brozzler-worker - sleep 10 - sudo cat /var/log/brozzler-worker.log From 6d145c87c8062c3cbb41bbfc6dc3275fd9251852 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 4 Apr 2019 13:24:12 -0700 Subject: [PATCH 13/23] chromium-browser --disable-extensions ? --- .travis.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8ec77f1..de279fb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,9 +16,10 @@ install: - sudo apt-get update - sudo apt-get install --only-upgrade chromium-browser - chromium-browser --version -- ps -fHe +- ps ww -fHe - sudo cat /var/log/Xvnc.log -- DISPLAY=:1 time timeout 20 chromium-browser +- time DISPLAY=:1 timeout 20 chromium-browser --disable-extensions +- time DISPLAY=:1 timeout 20 chromium-browser - sudo svc -t /etc/service/brozzler-worker - sleep 10 - sudo cat /var/log/brozzler-worker.log From 473e891fb4ee448fb19b0537feda38d21bbf4233 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 4 Apr 2019 13:34:45 -0700 Subject: [PATCH 14/23] not sure if --disable-extensions did something --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index de279fb..45bbeb3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,8 +18,8 @@ install: - chromium-browser --version - ps ww -fHe - sudo cat /var/log/Xvnc.log -- time DISPLAY=:1 timeout 20 chromium-browser --disable-extensions -- time DISPLAY=:1 timeout 20 chromium-browser +- time DISPLAY=:1 timeout 20 chromium-browser || true +- time DISPLAY=:1 timeout 20 chromium-browser --disable-extensions || true - sudo svc -t /etc/service/brozzler-worker - sleep 10 - sudo cat /var/log/brozzler-worker.log From 58d1d1c42989c61dc0accd927910c3c2b2e8d90c Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 4 Apr 2019 14:38:29 -0700 Subject: [PATCH 15/23] chromium-browser with no args isn't dying at start what about with all the args? --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 45bbeb3..252f711 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,7 +19,8 @@ install: - ps ww -fHe - sudo cat /var/log/Xvnc.log - time DISPLAY=:1 timeout 20 chromium-browser || true -- time DISPLAY=:1 timeout 20 chromium-browser --disable-extensions || true +- mkdir -vp /tmp/chium +- time HOME=/tmp/chium DISPLAY=:1 timeout 20 chromium-browser --remote-debugging-port=9222 --use-mock-keychain --user-data-dir=/tmp/chium/chrome-user-data --disable-background-networking --disable-renderer-backgrounding --disable-hang-monitor --disable-background-timer-throttling --mute-audio --disable-web-sockets --window-size=1100,900 --no-default-browser-check --disable-first-run-ui --no-first-run --homepage=about:blank --disable-direct-npapi-requests --disable-web-security --disable-notifications --disable-extensions --disable-save-password-bubble --ignore-certificate-errors --proxy-server=localhost:8000 about:blank || true - sudo svc -t /etc/service/brozzler-worker - sleep 10 - sudo cat /var/log/brozzler-worker.log From 55541be9e9cd4dde7e192b55e6c1e932bb63baec Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 4 Apr 2019 15:11:24 -0700 Subject: [PATCH 16/23] let's see chromium output inside brozzler-worker using --trace, because chromium seems to be working ok when we just run it --- ansible/roles/brozzler-worker/templates/brozzler-worker-run.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/brozzler-worker/templates/brozzler-worker-run.j2 b/ansible/roles/brozzler-worker/templates/brozzler-worker-run.j2 index 9889ef7..87a1c8f 100644 --- a/ansible/roles/brozzler-worker/templates/brozzler-worker-run.j2 +++ b/ansible/roles/brozzler-worker/templates/brozzler-worker-run.j2 @@ -11,7 +11,7 @@ exec nice setuidgid {{user}} \ brozzler-worker \ --rethinkdb-servers={{groups['rethinkdb'] | join(',')}} \ --max-browsers=4 \ - --verbose \ + --trace \ --warcprox-auto \ >> $logfile 2>&1 From fd0fe811e9b06481347ca46581839849de41cd8e Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 4 Apr 2019 16:09:21 -0700 Subject: [PATCH 17/23] so little output from chromium-browser :( https://travis-ci.org/internetarchive/brozzler/jobs/515942434 could it be problems running as this other user? --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 252f711..0336fdd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,6 +11,7 @@ before_install: - sudo pip install ansible==2.1.3.0 install: - ansible-playbook --extra-vars="brozzler_pip_name=file://$TRAVIS_BUILD_DIR#egg=brozzler user=travis" --inventory-file=ansible/hosts-localhost ansible/playbook.yml +- head -999 /etc/service/*/run - pip install $TRAVIS_BUILD_DIR git+https://github.com/internetarchive/warcprox.git#egg=warcprox pytest - chromium-browser --version - sudo apt-get update @@ -19,6 +20,7 @@ install: - ps ww -fHe - sudo cat /var/log/Xvnc.log - time DISPLAY=:1 timeout 20 chromium-browser || true +- time DISPLAY=:1 sudo -u brozzler timeout 20 chromium-browser || true - mkdir -vp /tmp/chium - time HOME=/tmp/chium DISPLAY=:1 timeout 20 chromium-browser --remote-debugging-port=9222 --use-mock-keychain --user-data-dir=/tmp/chium/chrome-user-data --disable-background-networking --disable-renderer-backgrounding --disable-hang-monitor --disable-background-timer-throttling --mute-audio --disable-web-sockets --window-size=1100,900 --no-default-browser-check --disable-first-run-ui --no-first-run --homepage=about:blank --disable-direct-npapi-requests --disable-web-security --disable-notifications --disable-extensions --disable-save-password-bubble --ignore-certificate-errors --proxy-server=localhost:8000 about:blank || true - sudo svc -t /etc/service/brozzler-worker From dfd9d9ecdd494af544bab2b0608d3d71ec2d145e Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 4 Apr 2019 17:22:15 -0700 Subject: [PATCH 18/23] omfg --- ansible/roles/brozzler-worker/templates/brozzler-worker-run.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/brozzler-worker/templates/brozzler-worker-run.j2 b/ansible/roles/brozzler-worker/templates/brozzler-worker-run.j2 index 87a1c8f..855411f 100644 --- a/ansible/roles/brozzler-worker/templates/brozzler-worker-run.j2 +++ b/ansible/roles/brozzler-worker/templates/brozzler-worker-run.j2 @@ -7,7 +7,7 @@ chown {{user}} $logfile source {{venv_root}}/brozzler-ve3/bin/activate exec nice setuidgid {{user}} \ - env LANG=en_US.UTF-8 LC_COLLATE=C \ + env DISPLAY=:1 LANG=en_US.UTF-8 LC_COLLATE=C \ brozzler-worker \ --rethinkdb-servers={{groups['rethinkdb'] | join(',')}} \ --max-browsers=4 \ From 433b201b5284b8ad7ff1ce2b887e864f0a28d4f7 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Tue, 9 Apr 2019 01:43:38 -0700 Subject: [PATCH 19/23] use logging.warning() to quiet py37 warnings --- brozzler/__init__.py | 2 +- brozzler/chrome.py | 8 ++++---- brozzler/cli.py | 6 +++--- brozzler/easy.py | 2 +- brozzler/frontier.py | 8 ++++---- brozzler/robots.py | 2 +- brozzler/worker.py | 8 ++++---- brozzler/ydl.py | 4 ++-- tests/test_cluster.py | 2 +- 9 files changed, 21 insertions(+), 21 deletions(-) diff --git a/brozzler/__init__.py b/brozzler/__init__.py index ccfaacd..c97835f 100644 --- a/brozzler/__init__.py +++ b/brozzler/__init__.py @@ -159,7 +159,7 @@ class ThreadExceptionGate: def queue_exception(self, e): with self.lock: if self.pending_exception: - self.logger.warn( + self.logger.warning( '%r already pending for thread %r, discarding %r', self.pending_exception, self.thread, e) else: diff --git a/brozzler/chrome.py b/brozzler/chrome.py index 5928586..c70296f 100644 --- a/brozzler/chrome.py +++ b/brozzler/chrome.py @@ -223,7 +223,7 @@ class Chrome: raise except Exception as e: if time.time() - self._last_warning > 30: - self.logger.warn( + self.logger.warning( 'problem with %s (will keep trying until timeout ' 'of %d seconds): %s', json_url, timeout_sec, e) self._last_warning = time.time() @@ -294,7 +294,7 @@ class Chrome: 'chrome pid %s exited normally', self.chrome_process.pid) else: - self.logger.warn( + self.logger.warning( 'chrome pid %s exited with nonzero status %s', self.chrome_process.pid, status) @@ -305,13 +305,13 @@ class Chrome: return time.sleep(0.5) - self.logger.warn( + self.logger.warning( 'chrome pid %s still alive %.1f seconds after sending ' 'SIGTERM, sending SIGKILL', self.chrome_process.pid, time.time() - t0) os.killpg(self.chrome_process.pid, signal.SIGKILL) status = self.chrome_process.wait() - self.logger.warn( + self.logger.warning( 'chrome pid %s reaped (status=%s) after killing with ' 'SIGKILL', self.chrome_process.pid, status) diff --git a/brozzler/cli.py b/brozzler/cli.py index 188d591..4b0bd76 100644 --- a/brozzler/cli.py +++ b/brozzler/cli.py @@ -627,7 +627,7 @@ def brozzler_purge(argv=None): sys.exit(1) if job.status == 'ACTIVE': if args.force: - logging.warn( + logging.warning( 'job %s has status ACTIVE, purging anyway because ' '--force was supplied', job_id) else: @@ -644,7 +644,7 @@ def brozzler_purge(argv=None): sys.exit(1) if site.status == 'ACTIVE': if args.force: - logging.warn( + logging.warning( 'site %s has status ACTIVE, purging anyway because ' '--force was supplied', site_id) else: @@ -712,7 +712,7 @@ def brozzler_list_captures(argv=None): if args.url_or_sha1[:5] == 'sha1:': if args.prefix: - logging.warn( + logging.warning( 'ignoring supplied --prefix option which does not apply ' 'to lookup by sha1') # assumes it's already base32 (XXX could detect if hex and convert) diff --git a/brozzler/easy.py b/brozzler/easy.py index 83cf1ba..dd98884 100644 --- a/brozzler/easy.py +++ b/brozzler/easy.py @@ -260,7 +260,7 @@ class BrozzlerEasyController: state_strs.append(str(th)) stack = traceback.format_stack(sys._current_frames()[th.ident]) state_strs.append(''.join(stack)) - logging.warn('dumping state (caught signal {})\n{}'.format( + logging.warning('dumping state (caught signal {})\n{}'.format( signum, '\n'.join(state_strs))) def main(argv=None): diff --git a/brozzler/frontier.py b/brozzler/frontier.py index 3826abf..0e3b777 100644 --- a/brozzler/frontier.py +++ b/brozzler/frontier.py @@ -138,7 +138,7 @@ class RethinkDbFrontier: sites = [] for i in range(result["replaced"]): if result["changes"][i]["old_val"]["claimed"]: - self.logger.warn( + self.logger.warning( "re-claimed site that was still marked 'claimed' " "because it was last claimed a long time ago " "at %s, and presumably some error stopped it from " @@ -225,7 +225,7 @@ class RethinkDbFrontier: if not job: return False if job.status.startswith("FINISH"): - self.logger.warn("%s is already %s", job, job.status) + self.logger.warning("%s is already %s", job, job.status) return True results = self.rr.table("sites").get_all(job_id, index="job_id").run() @@ -415,7 +415,7 @@ class RethinkDbFrontier: assert isinstance(e, brozzler.ReachedLimit) if (site.reached_limit and site.reached_limit != e.warcprox_meta["reached-limit"]): - self.logger.warn( + self.logger.warning( "reached limit %s but site had already reached limit %s", e.warcprox_meta["reached-limit"], self.reached_limit) else: @@ -434,7 +434,7 @@ class RethinkDbFrontier: index="priority_by_site").filter({"hops_from_seed":0}).run() pages = list(results) if len(pages) > 1: - self.logger.warn( + self.logger.warning( "more than one seed page for site_id %s ?", site_id) if len(pages) < 1: return None diff --git a/brozzler/robots.py b/brozzler/robots.py index 5b96423..4122093 100644 --- a/brozzler/robots.py +++ b/brozzler/robots.py @@ -106,7 +106,7 @@ def is_permitted_by_robots(site, url, proxy=None): # reppy has wrapped an exception that we want to bubble up raise brozzler.ProxyError(e) else: - logging.warn( + logging.warning( "returning true (permitted) after problem fetching " "robots.txt for %r: %r", url, e) return True diff --git a/brozzler/worker.py b/brozzler/worker.py index fba83aa..5ce5499 100644 --- a/brozzler/worker.py +++ b/brozzler/worker.py @@ -147,13 +147,13 @@ class BrozzlerWorker: try: with urllib.request.urlopen(request, timeout=600) as response: if response.getcode() != 204: - self.logger.warn( + self.logger.warning( 'got "%s %s" response on warcprox ' 'WARCPROX_WRITE_RECORD request (expected 204)', response.getcode(), response.reason) return request, response except urllib.error.HTTPError as e: - self.logger.warn( + self.logger.warning( 'got "%s %s" response on warcprox ' 'WARCPROX_WRITE_RECORD request (expected 204)', e.getcode(), e.info()) @@ -370,7 +370,7 @@ class BrozzlerWorker: if (page.needs_robots_check and not brozzler.is_permitted_by_robots( site, page.url, self._proxy_for(site))): - logging.warn("page %s is blocked by robots.txt", page.url) + logging.warning("page %s is blocked by robots.txt", page.url) page.blocked_by_robots = True self._frontier.completed_page(site, page) else: @@ -544,7 +544,7 @@ class BrozzlerWorker: def start(self): with self._start_stop_lock: if self._thread: - self.logger.warn( + self.logger.warning( 'ignoring start request because self._thread is ' 'not None') return diff --git a/brozzler/ydl.py b/brozzler/ydl.py index 57550e5..2388df9 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -48,7 +48,7 @@ _orig_webpage_read_content = youtube_dl.extractor.generic.GenericIE._webpage_rea def _webpage_read_content(self, *args, **kwargs): content = _orig_webpage_read_content(self, *args, **kwargs) if len(content) > 20000000: - logging.warn( + logging.warning( 'bypassing youtube-dl extraction because content is ' 'too large (%s characters)', len(content)) return '' @@ -185,7 +185,7 @@ def _build_youtube_dl(worker, destdir, site): mimetype = magic.from_file(ctx['filename'], mime=True) except ImportError as e: mimetype = 'video/%s' % info_dict['ext'] - self.logger.warn( + self.logger.warning( 'guessing mimetype %s because %r', mimetype, e) url = 'youtube-dl:%05d:%s' % ( diff --git a/tests/test_cluster.py b/tests/test_cluster.py index 13b3c66..c57abb8 100644 --- a/tests/test_cluster.py +++ b/tests/test_cluster.py @@ -684,7 +684,7 @@ def test_warcprox_outage_resiliency(httpd): try: stop_service('warcprox') except Exception as e: - logging.warn('problem stopping warcprox service: %s', e) + logging.warning('problem stopping warcprox service: %s', e) # queue the site for brozzling brozzler.new_site(frontier, site) From 8dfd92cf7f15603ea8dcdcaf8efb93c73dc5a65e Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Tue, 9 Apr 2019 01:44:14 -0700 Subject: [PATCH 20/23] fix this utility --- vagrant/vagrant-brozzler-new-site.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/vagrant/vagrant-brozzler-new-site.py b/vagrant/vagrant-brozzler-new-site.py index 158095e..b0a0d80 100755 --- a/vagrant/vagrant-brozzler-new-site.py +++ b/vagrant/vagrant-brozzler-new-site.py @@ -74,9 +74,8 @@ def main(argv=[]): os.chdir(os.path.dirname(__file__)) cmd = ( - '/home/vagrant/brozzler-ve3/bin/python ' - '/home/vagrant/brozzler-ve3/bin/brozzler-new-site ' - '--proxy=localhost:8000 %s %s') % (' '.join(options), args.seed) + '/opt/brozzler-ve3/bin/python /opt/brozzler-ve3/bin/brozzler-new-site ' + '%s %s') % (' '.join(options), args.seed) subprocess.call(['vagrant', 'ssh', '--', cmd]) if __name__ == '__main__': From f8165dc02b0607268ad24f80c48cd49e8ccadf23 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Wed, 15 May 2019 18:46:21 -0700 Subject: [PATCH 21/23] work around pytest issue until fix is out https://github.com/pytest-dev/pytest/issues/5257 --- .travis.yml | 2 +- vagrant/run-tests.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 0336fdd..a2d8f2c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,7 +12,7 @@ before_install: install: - ansible-playbook --extra-vars="brozzler_pip_name=file://$TRAVIS_BUILD_DIR#egg=brozzler user=travis" --inventory-file=ansible/hosts-localhost ansible/playbook.yml - head -999 /etc/service/*/run -- pip install $TRAVIS_BUILD_DIR git+https://github.com/internetarchive/warcprox.git#egg=warcprox pytest +- pip install $TRAVIS_BUILD_DIR git+https://github.com/internetarchive/warcprox.git#egg=warcprox pytest==4.3.0 - chromium-browser --version - sudo apt-get update - sudo apt-get install --only-upgrade chromium-browser diff --git a/vagrant/run-tests.sh b/vagrant/run-tests.sh index b1ff00b..2d6c117 100755 --- a/vagrant/run-tests.sh +++ b/vagrant/run-tests.sh @@ -17,5 +17,5 @@ vagrant ssh -- 'sudo svstat /etc/service/warcprox ; sudo svstat /etc/service/vnc-websock' echo -vagrant ssh -- 'set -x ; source /opt/brozzler-ve3/bin/activate && pip install pytest && pip install --upgrade --pre "warcprox>=2.1b1.dev86"' -vagrant ssh -- "source /opt/brozzler-ve3/bin/activate && DISPLAY=:1 py.test -v /brozzler/tests $@" +vagrant ssh -- 'set -x ; source /opt/brozzler-ve3/bin/activate && pip install pytest==4.3.0 && pip install --upgrade --pre "warcprox>=2.1b1.dev86"' +vagrant ssh -- "source /opt/brozzler-ve3/bin/activate && DISPLAY=:1 py.test --tb=native -v /brozzler/tests $@" From 0a1360ab25a83f23b5c2ee4483435736e6a2f213 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Wed, 15 May 2019 18:49:18 -0700 Subject: [PATCH 22/23] don't use localhost for test http server... ... because apparently sometimes chromium bypasses the proxy for local addresses --- tests/test_cluster.py | 137 +++++++++++++++++++++++++----------------- 1 file changed, 82 insertions(+), 55 deletions(-) diff --git a/tests/test_cluster.py b/tests/test_cluster.py index c57abb8..e04624b 100644 --- a/tests/test_cluster.py +++ b/tests/test_cluster.py @@ -34,16 +34,41 @@ import http.server import logging import warcprox +# https://stackoverflow.com/questions/166506/finding-local-ip-addresses-using-pythons-stdlib +def _local_address(): + import socket + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + try: + s.connect(('10.255.255.255', 1)) # ip doesn't need to be reachable + return s.getsockname()[0] + except: + return '127.0.0.1' + finally: + s.close() + +local_address = _local_address() + def start_service(service): subprocess.check_call(['sudo', 'svc', '-u', '/etc/service/' + service]) def stop_service(service): subprocess.check_call(['sudo', 'svc', '-d', '/etc/service/' + service]) + while True: + status = subprocess.check_output( + ['sudo', 'svstat', '/etc/service/' + service]) + if b' down ' in status: + break + time.sleep(0.5) @pytest.fixture(scope='module') def httpd(request): class RequestHandler(http.server.SimpleHTTPRequestHandler): + def do_POST(self): + logging.info('\n%s\n%s', self.requestline, self.headers) + self.do_GET() + def do_GET(self): + logging.info('\n%s\n%s', self.requestline, self.headers) if self.path == '/site5/redirect/': self.send_response(303, 'See other') self.send_header('Connection', 'close') @@ -82,7 +107,7 @@ def httpd(request): # SimpleHTTPRequestHandler always uses CWD so we have to chdir os.chdir(os.path.join(os.path.dirname(__file__), 'htdocs')) - httpd = http.server.HTTPServer(('localhost', 0), RequestHandler) + httpd = http.server.HTTPServer((local_address, 0), RequestHandler) httpd_thread = threading.Thread(name='httpd', target=httpd.serve_forever) httpd_thread.start() @@ -94,6 +119,9 @@ def httpd(request): return httpd +def make_url(httpd, rel_url): + return 'http://%s:%s%s' % (local_address, httpd.server_port, rel_url) + def test_httpd(httpd): ''' Tests that our http server is working as expected, and that two fetches @@ -101,7 +129,7 @@ def test_httpd(httpd): deduplication. ''' payload1 = content2 = None - url = 'http://localhost:%s/site1/file1.txt' % httpd.server_port + url = make_url(httpd, '/site1/file1.txt') with urllib.request.urlopen(url) as response: assert response.status == 200 payload1 = response.read() @@ -140,13 +168,13 @@ def test_brozzle_site(httpd): test_id = 'test_brozzle_site-%s' % datetime.datetime.utcnow().isoformat() rr = doublethink.Rethinker('localhost', db='brozzler') site = brozzler.Site(rr, { - 'seed': 'http://localhost:%s/site1/' % httpd.server_port, + 'seed': make_url(httpd, '/site1/'), 'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}}) # the two pages we expect to be crawled - page1 = 'http://localhost:%s/site1/' % httpd.server_port - page2 = 'http://localhost:%s/site1/file1.txt' % httpd.server_port - robots = 'http://localhost:%s/robots.txt' % httpd.server_port + page1 = make_url(httpd, '/site1/') + page2 = make_url(httpd, '/site1/file1.txt') + robots = make_url(httpd, '/robots.txt') # so we can examine rethinkdb before it does anything try: @@ -171,8 +199,7 @@ def test_brozzle_site(httpd): pages = list(frontier.site_pages(site.id)) assert len(pages) == 2 assert {page.url for page in pages} == { - 'http://localhost:%s/site1/' % httpd.server_port, - 'http://localhost:%s/site1/file1.txt' % httpd.server_port} + make_url(httpd, '/site1/'), make_url(httpd, '/site1/file1.txt')} time.sleep(2) # in case warcprox hasn't finished processing urls # take a look at the captures table @@ -255,8 +282,8 @@ def test_proxy_non_warcprox(httpd): start_service('brozzler-worker') assert len(proxy.requests) <= 15 assert proxy.requests.count('GET /status') == 1 - assert ('GET http://localhost:%s/site1/' % httpd.server_port) in proxy.requests - assert ('GET http://localhost:%s/site1/file1.txt' % httpd.server_port) in proxy.requests + assert ('GET %s' % make_url(httpd, '/site1/')) in proxy.requests + assert ('GET %s' % make_url(httpd, '/site1/file1.txt')) in proxy.requests assert [req for req in proxy.requests if req.startswith('WARCPROX_WRITE_RECORD')] == [] proxy.shutdown() @@ -292,14 +319,14 @@ def _test_proxy_setting( datetime.datetime.utcnow().isoformat()) # the two pages we expect to be crawled - page1 = 'http://localhost:%s/site1/' % httpd.server_port - page2 = 'http://localhost:%s/site1/file1.txt' % httpd.server_port - robots = 'http://localhost:%s/robots.txt' % httpd.server_port + page1 = make_url(httpd, '/site1/') + page2 = make_url(httpd, '/site1/file1.txt') + robots = make_url(httpd, '/robots.txt') rr = doublethink.Rethinker('localhost', db='brozzler') service_registry = doublethink.ServiceRegistry(rr) site = brozzler.Site(rr, { - 'seed': 'http://localhost:%s/site1/' % httpd.server_port, + 'seed': make_url(httpd, '/site1/'), 'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}}) assert site.id is None frontier = brozzler.RethinkDbFrontier(rr) @@ -332,8 +359,8 @@ def _test_proxy_setting( pages = list(frontier.site_pages(site.id)) assert len(pages) == 2 assert {page.url for page in pages} == { - 'http://localhost:%s/site1/' % httpd.server_port, - 'http://localhost:%s/site1/file1.txt' % httpd.server_port} + make_url(httpd, '/site1/'), + make_url(httpd, '/site1/file1.txt')} time.sleep(2) # in case warcprox hasn't finished processing urls # take a look at the captures table @@ -360,7 +387,7 @@ def test_obey_robots(httpd): test_id = 'test_obey_robots-%s' % datetime.datetime.utcnow().isoformat() rr = doublethink.Rethinker('localhost', db='brozzler') site = brozzler.Site(rr, { - 'seed': 'http://localhost:%s/site1/' % httpd.server_port, + 'seed': make_url(httpd, '/site1/'), 'user_agent': 'im a badbot', # robots.txt blocks badbot 'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}}) @@ -390,12 +417,12 @@ def test_obey_robots(httpd): pages = list(frontier.site_pages(site.id)) assert len(pages) == 1 page = pages[0] - assert page.url == 'http://localhost:%s/site1/' % httpd.server_port + assert page.url == make_url(httpd, '/site1/') assert page.blocked_by_robots # take a look at the captures table time.sleep(2) # in case warcprox hasn't finished processing urls - robots_url = 'http://localhost:%s/robots.txt' % httpd.server_port + robots_url = make_url(httpd, '/robots.txt') captures = list(rr.table('captures').filter({'test_id':test_id}).run()) assert len(captures) == 1 assert captures[0]['url'] == robots_url @@ -412,7 +439,7 @@ def test_login(httpd): test_id = 'test_login-%s' % datetime.datetime.utcnow().isoformat() rr = doublethink.Rethinker('localhost', db='brozzler') site = brozzler.Site(rr, { - 'seed': 'http://localhost:%s/site2/' % httpd.server_port, + 'seed': make_url(httpd, '/site2/'), 'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}, 'username': 'test_username', 'password': 'test_password'}) @@ -428,7 +455,7 @@ def test_login(httpd): # take a look at the captures table time.sleep(2) # in case warcprox hasn't finished processing urls - robots_url = 'http://localhost:%s/robots.txt' % httpd.server_port + robots_url = make_url(httpd, '/robots.txt') captures = list(rr.table('captures').filter( {'test_id':test_id}).order_by('timestamp').run()) meth_url = ['%s %s' % (c['http_method'], c['url']) for c in captures] @@ -436,25 +463,25 @@ def test_login(httpd): # there are several forms in in htdocs/site2/login.html but only one # that brozzler's heuristic should match and try to submit, and it has # action='00', so we can check for that here - assert ('POST http://localhost:%s/site2/00' % httpd.server_port) in meth_url + assert ('POST %s' % make_url(httpd, '/site2/00')) in meth_url # sanity check the rest of the crawl - assert ('GET http://localhost:%s/robots.txt' % httpd.server_port) in meth_url - assert ('GET http://localhost:%s/site2/' % httpd.server_port) in meth_url - assert ('WARCPROX_WRITE_RECORD screenshot:http://localhost:%s/site2/' % httpd.server_port) in meth_url - assert ('WARCPROX_WRITE_RECORD thumbnail:http://localhost:%s/site2/' % httpd.server_port) in meth_url - assert ('GET http://localhost:%s/site2/login.html' % httpd.server_port) in meth_url - assert ('WARCPROX_WRITE_RECORD screenshot:http://localhost:%s/site2/login.html' % httpd.server_port) in meth_url - assert ('WARCPROX_WRITE_RECORD thumbnail:http://localhost:%s/site2/login.html' % httpd.server_port) in meth_url + assert ('GET %s' % make_url(httpd, '/robots.txt')) in meth_url + assert ('GET %s' % make_url(httpd, '/site2/')) in meth_url + assert ('WARCPROX_WRITE_RECORD screenshot:%s' % make_url(httpd, '/site2/')) in meth_url + assert ('WARCPROX_WRITE_RECORD thumbnail:%s' % make_url(httpd, '/site2/')) in meth_url + assert ('GET %s' % make_url(httpd, '/site2/login.html')) in meth_url + assert ('WARCPROX_WRITE_RECORD screenshot:%s' % make_url(httpd, '/site2/login.html')) in meth_url + assert ('WARCPROX_WRITE_RECORD thumbnail:%s' % make_url(httpd, '/site2/login.html')) in meth_url def test_seed_redirect(httpd): test_id = 'test_seed_redirect-%s' % datetime.datetime.utcnow().isoformat() rr = doublethink.Rethinker('localhost', db='brozzler') - seed_url = 'http://localhost:%s/site5/redirect/' % httpd.server_port + seed_url = make_url(httpd, '/site5/redirect/') site = brozzler.Site(rr, { - 'seed': 'http://localhost:%s/site5/redirect/' % httpd.server_port, + 'seed': make_url(httpd, '/site5/redirect/'), 'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}}) - assert site.scope == {'accepts': [{'ssurt': 'localhost,//%s:http:/site5/redirect/' % httpd.server_port}]} + assert site.scope == {'accepts': [{'ssurt': '%s//%s:http:/site5/redirect/' % (local_address, httpd.server_port)}]} frontier = brozzler.RethinkDbFrontier(rr) brozzler.new_site(frontier, site) @@ -473,19 +500,19 @@ def test_seed_redirect(httpd): pages.sort(key=lambda page: page.hops_from_seed) assert pages[0].hops_from_seed == 0 assert pages[0].url == seed_url - assert pages[0].redirect_url == 'http://localhost:%s/site5/destination/' % httpd.server_port + assert pages[0].redirect_url == make_url(httpd, '/site5/destination/') assert pages[1].hops_from_seed == 1 - assert pages[1].url == 'http://localhost:%s/site5/destination/page2.html' % httpd.server_port + assert pages[1].url == make_url(httpd, '/site5/destination/page2.html') # check that scope has been updated properly assert site.scope == {'accepts': [ - {'ssurt': 'localhost,//%s:http:/site5/redirect/' % httpd.server_port}, - {'ssurt': 'localhost,//%s:http:/site5/destination/' % httpd.server_port}]} + {'ssurt': '%s//%s:http:/site5/redirect/' % (local_address, httpd.server_port)}, + {'ssurt': '%s//%s:http:/site5/destination/' % (local_address, httpd.server_port)}]} def test_hashtags(httpd): test_id = 'test_hashtags-%s' % datetime.datetime.utcnow().isoformat() rr = doublethink.Rethinker('localhost', db='brozzler') - seed_url = 'http://localhost:%s/site7/' % httpd.server_port + seed_url = make_url(httpd, '/site7/') site = brozzler.Site(rr, { 'seed': seed_url, 'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}}) @@ -507,9 +534,9 @@ def test_hashtags(httpd): assert pages[0].url == seed_url assert pages[0].hops_from_seed == 0 assert pages[0].brozzle_count == 1 - assert pages[0].outlinks['accepted'] == ['http://localhost:%s/site7/foo.html' % httpd.server_port] + assert pages[0].outlinks['accepted'] == [make_url(httpd, '/site7/foo.html')] assert not pages[0].hashtags - assert pages[1].url == 'http://localhost:%s/site7/foo.html' % httpd.server_port + assert pages[1].url == make_url(httpd, '/site7/foo.html') assert pages[1].hops_from_seed == 1 assert pages[1].brozzle_count == 1 assert sorted(pages[1].hashtags) == ['#boosh','#ignored','#whee',] @@ -520,18 +547,18 @@ def test_hashtags(httpd): captures_by_url = { c['url']: c for c in captures if c['http_method'] != 'HEAD'} assert seed_url in captures_by_url - assert 'http://localhost:%s/site7/foo.html' % httpd.server_port in captures_by_url - assert 'http://localhost:%s/site7/whee.txt' % httpd.server_port in captures_by_url - assert 'http://localhost:%s/site7/boosh.txt' % httpd.server_port in captures_by_url + assert make_url(httpd, '/site7/foo.html') in captures_by_url + assert make_url(httpd, '/site7/whee.txt') in captures_by_url + assert make_url(httpd, '/site7/boosh.txt') in captures_by_url assert 'screenshot:%s' % seed_url in captures_by_url assert 'thumbnail:%s' % seed_url in captures_by_url - assert 'screenshot:http://localhost:%s/site7/foo.html' % httpd.server_port in captures_by_url - assert 'thumbnail:http://localhost:%s/site7/foo.html' % httpd.server_port in captures_by_url + assert 'screenshot:%s' % make_url(httpd, '/site7/foo.html') in captures_by_url + assert 'thumbnail:%s' % make_url(httpd, '/site7/foo.html') in captures_by_url def test_redirect_hashtags(httpd): test_id = 'test_hashtags-%s' % datetime.datetime.utcnow().isoformat() rr = doublethink.Rethinker('localhost', db='brozzler') - seed_url = 'http://localhost:%s/site9/' % httpd.server_port + seed_url = make_url(httpd, '/site9/') site = brozzler.Site(rr, { 'seed': seed_url, 'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}}) @@ -553,9 +580,9 @@ def test_redirect_hashtags(httpd): assert pages[0].url == seed_url assert pages[0].hops_from_seed == 0 assert pages[0].brozzle_count == 1 - assert pages[0].outlinks['accepted'] == ['http://localhost:%s/site9/redirect.html' % httpd.server_port] + assert pages[0].outlinks['accepted'] == [make_url(httpd, '/site9/redirect.html')] assert not pages[0].hashtags - assert pages[1].url == 'http://localhost:%s/site9/redirect.html' % httpd.server_port + assert pages[1].url == make_url(httpd, '/site9/redirect.html') assert pages[1].hops_from_seed == 1 assert pages[1].brozzle_count == 1 assert sorted(pages[1].hashtags) == ['#hash1','#hash2',] @@ -563,7 +590,7 @@ def test_redirect_hashtags(httpd): time.sleep(2) # in case warcprox hasn't finished processing urls # take a look at the captures table captures = rr.table('captures').filter({'test_id':test_id}).run() - redirect_captures = [c for c in captures if c['url'] == 'http://localhost:%s/site9/redirect.html' % httpd.server_port and c['http_method'] == 'GET'] + redirect_captures = [c for c in captures if c['url'] == make_url(httpd, '/site9/redirect.html') and c['http_method'] == 'GET'] assert len(redirect_captures) == 2 # youtube-dl + browser, no hashtags # === expected captures === @@ -589,9 +616,9 @@ def test_stop_crawl(httpd): # create a new job with three sites that could be crawled forever job_conf = {'seeds': [ - {'url': 'http://localhost:%s/infinite/foo/' % httpd.server_port}, - {'url': 'http://localhost:%s/infinite/bar/' % httpd.server_port}, - {'url': 'http://localhost:%s/infinite/baz/' % httpd.server_port}]} + {'url': make_url(httpd, '/infinite/foo/')}, + {'url': make_url(httpd, '/infinite/bar/')}, + {'url': make_url(httpd, '/infinite/baz/')}]} job = brozzler.new_job(frontier, job_conf) assert job.id @@ -675,7 +702,7 @@ def test_warcprox_outage_resiliency(httpd): # put together a site to crawl test_id = 'test_warcprox_death-%s' % datetime.datetime.utcnow().isoformat() site = brozzler.Site(rr, { - 'seed': 'http://localhost:%s/infinite/' % httpd.server_port, + 'seed': make_url(httpd, '/infinite/'), 'warcprox_meta': {'captures-table-extra-fields':{'test_id':test_id}}}) try: @@ -771,7 +798,7 @@ def test_time_limit(httpd): # create a new job with one seed that could be crawled forever job_conf = {'seeds': [{ - 'url': 'http://localhost:%s/infinite/foo/' % httpd.server_port, + 'url': make_url(httpd, '/infinite/foo/'), 'time_limit': 20}]} job = brozzler.new_job(frontier, job_conf) assert job.id @@ -801,7 +828,7 @@ def test_ydl_stitching(httpd): rr = doublethink.Rethinker('localhost', db='brozzler') frontier = brozzler.RethinkDbFrontier(rr) site = brozzler.Site(rr, { - 'seed': 'http://localhost:%s/site10/' % httpd.server_port, + 'seed': make_url(httpd, '/site10/'), 'warcprox_meta': { 'warc-prefix': 'test_ydl_stitching', 'captures-table-extra-fields': {'test_id':test_id}}}) @@ -819,7 +846,7 @@ def test_ydl_stitching(httpd): assert len(pages) == 1 page = pages[0] assert len(page.videos) == 6 - stitched_url = 'youtube-dl:00001:http://localhost:%s/site10/' % httpd.server_port + stitched_url = 'youtube-dl:00001:%s' % make_url(httpd, '/site10/') assert { 'blame': 'youtube-dl', 'content-length': 267900, From c651bcdd1896a35bde1e6fa1e858fee979de5667 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 16 May 2019 00:21:28 -0700 Subject: [PATCH 23/23] remove some travis-ci debugging stuff --- .travis.yml | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/.travis.yml b/.travis.yml index a2d8f2c..c20872e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,6 @@ before_install: - sudo pip install ansible==2.1.3.0 install: - ansible-playbook --extra-vars="brozzler_pip_name=file://$TRAVIS_BUILD_DIR#egg=brozzler user=travis" --inventory-file=ansible/hosts-localhost ansible/playbook.yml -- head -999 /etc/service/*/run - pip install $TRAVIS_BUILD_DIR git+https://github.com/internetarchive/warcprox.git#egg=warcprox pytest==4.3.0 - chromium-browser --version - sudo apt-get update @@ -19,16 +18,10 @@ install: - chromium-browser --version - ps ww -fHe - sudo cat /var/log/Xvnc.log -- time DISPLAY=:1 timeout 20 chromium-browser || true -- time DISPLAY=:1 sudo -u brozzler timeout 20 chromium-browser || true -- mkdir -vp /tmp/chium -- time HOME=/tmp/chium DISPLAY=:1 timeout 20 chromium-browser --remote-debugging-port=9222 --use-mock-keychain --user-data-dir=/tmp/chium/chrome-user-data --disable-background-networking --disable-renderer-backgrounding --disable-hang-monitor --disable-background-timer-throttling --mute-audio --disable-web-sockets --window-size=1100,900 --no-default-browser-check --disable-first-run-ui --no-first-run --homepage=about:blank --disable-direct-npapi-requests --disable-web-security --disable-notifications --disable-extensions --disable-save-password-bubble --ignore-certificate-errors --proxy-server=localhost:8000 about:blank || true -- sudo svc -t /etc/service/brozzler-worker -- sleep 10 - sudo cat /var/log/brozzler-worker.log - sudo cat /var/log/warcprox.log script: -- DISPLAY=:1 py.test --tb=native -x -v tests +- DISPLAY=:1 py.test --tb=native -v tests after_failure: - chromium-browser --version - sudo cat /var/log/warcprox.log