Merge branch 'master' into qa

This commit is contained in:
Barbara Miller 2016-06-22 17:45:44 -07:00
commit 8f726eac76
13 changed files with 360 additions and 84 deletions

View file

@ -20,19 +20,56 @@ archiving.
Installation Installation
------------ ------------
XXX These instructions don't work at the moment. Brozzler requires some
customized packages not easily installable in the outside world. I intend to
remedy the situation soon.
:: ::
# set up virtualenv if desired # set up virtualenv if desired
pip install git+https://github.com/nlevitt/brozzler.git pip install brozzler
Brozzler also requires a rethinkdb deployment. Brozzler also requires a rethinkdb deployment.
Fonts for good screenshots Usage
-------------------------- -----
Launch one or more workers:
::
brozzler-worker -e chromium
Submit jobs:
::
brozzler-new-job myjob.yaml
Job Configuration
-----------------
Jobs are defined using yaml files. Options may be specified either at the
top-level or on individual seeds. A job id and at least one seed url
must be specified, everything else is optional.
::
id: myjob
time_limit: 60 # seconds
proxy: 127.0.0.1:8000 # point at warcprox for archiving
ignore_robots: false
enable_warcprox_features: false
warcprox_meta: null
metadata: {}
seeds:
- url: http://one.example.org/
- url: http://two.example.org/
time_limit: 30
- url: http://three.example.org/
time_limit: 10
ignore_robots: true
scope:
surt: http://(org,example,
Fonts (for decent screenshots)
------------------------------
On ubuntu 14.04 trusty I installed these packages: On ubuntu 14.04 trusty I installed these packages:
@ -42,12 +79,10 @@ fonts-arphic-ukai fonts-farsiweb fonts-nafees fonts-sil-abyssinica
fonts-sil-ezra fonts-sil-padauk fonts-unfonts-extra fonts-unfonts-core fonts-sil-ezra fonts-sil-padauk fonts-unfonts-extra fonts-unfonts-core
ttf-indic-fonts fonts-thai-tlwg fonts-lklug-sinhala ttf-indic-fonts fonts-thai-tlwg fonts-lklug-sinhala
Haven't looked much at the resulting screenshots yet though.
License License
------- -------
Copyright 2015 Internet Archive Copyright 2015-2016 Internet Archive
Licensed under the Apache License, Version 2.0 (the "License"); you may Licensed under the Apache License, Version 2.0 (the "License"); you may
not use this software except in compliance with the License. You may not use this software except in compliance with the License. You may

View file

@ -27,6 +27,7 @@ import re
import rethinkstuff import rethinkstuff
import warnings import warnings
import requests import requests
import json
arg_parser = argparse.ArgumentParser(prog=os.path.basename(__file__), arg_parser = argparse.ArgumentParser(prog=os.path.basename(__file__),
description="brozzler-new-site - register site to brozzle", description="brozzler-new-site - register site to brozzle",
@ -63,7 +64,7 @@ site = brozzler.Site(
time_limit=int(args.time_limit) if args.time_limit else None, time_limit=int(args.time_limit) if args.time_limit else None,
ignore_robots=args.ignore_robots, ignore_robots=args.ignore_robots,
enable_warcprox_features=args.enable_warcprox_features, enable_warcprox_features=args.enable_warcprox_features,
warcprox_meta=json.loads(args.warcprox_meta)) warcprox_meta=json.loads(args.warcprox_meta) if args.warcprox_meta else None)
r = rethinkstuff.Rethinker(args.rethinkdb_servers.split(","), args.rethinkdb_db) r = rethinkstuff.Rethinker(args.rethinkdb_servers.split(","), args.rethinkdb_db)
frontier = brozzler.RethinkDbFrontier(r) frontier = brozzler.RethinkDbFrontier(r)

View file

@ -0,0 +1,159 @@
/*
* brozzler/behaviors.d/fec_gov.js - click on links that execute JavaScript to
* download report csv files for fec.gov/data
*
* Copyright (C) 2014-2016 Internet Archive
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
var umbraBehavior = {
IDLE_TIMEOUT_SEC : 10,
idleSince : null,
alreadyClicked : {},
intervalFunc : function() {
var clickedSomething = false;
var somethingLeftBelow = false;
var somethingLeftAbove = false;
var cssDownloadLinkSelector = "a[id^='id_csv']";
var iframes = document.querySelectorAll("iframe");
var documents = Array(iframes.length + 1);
documents[0] = document;
for (var i = 0; i < iframes.length; i++) {
documents[i+1] = iframes[i].contentWindow.document;
}
for (var j = 0; j < documents.length; j++) {
var clickDownloadLinkTargets = documents[j].querySelectorAll(cssDownloadLinkSelector);
for (var i = 0; i < clickDownloadLinkTargets.length; i++) {
var sourceName = clickDownloadLinkTargets[i].id.substring(7);
var clickRadioButtonTargets = documents[j].querySelectorAll("input[name='" + sourceName + "']");
if (clickRadioButtonTargets.length == 0) {
if (clickDownloadLinkTargets[i].umbraClicked) {
continue;
}
var mouseOverEvent = document.createEvent('Events');
mouseOverEvent.initEvent("mouseover",true, false);
clickDownloadLinkTargets[i].dispatchEvent(mouseOverEvent);
clickDownloadLinkTargets[i].click(); //click the link to download the csv
clickedSomething = true;
this.idleSince = null;
clickDownloadLinkTargets[i].umbraClicked = true;
}
else {
for (var k = 0; k < clickRadioButtonTargets.length; ++k) {
if (clickRadioButtonTargets[k].umbraClicked) {
continue;
}
var where = this.aboveBelowOrOnScreen(clickRadioButtonTargets[k]);
if (where == 0) {
console.log("clicking on " + clickRadioButtonTargets[k]);
// do mouse over event on click target
// since some urls are requsted only on
// this event - see
// https://webarchive.jira.com/browse/AITFIVE-451
var mouseOverEvent = document.createEvent('Events');
mouseOverEvent.initEvent("mouseover",true, false);
clickRadioButtonTargets[k].dispatchEvent(mouseOverEvent);
clickRadioButtonTargets[k].click(); //select the correct date with the radio button
mouseOverEvent = document.createEvent('Events');
mouseOverEvent.initEvent("mouseover",true, false);
clickDownloadLinkTargets[i].dispatchEvent(mouseOverEvent);
clickDownloadLinkTargets[i].click(); //click the link to download the csv for the selected date
clickedSomething = true;
this.idleSince = null;
clickRadioButtonTargets[k].umbraClicked = true;
//alert("clicking on " + clickRadioButtonTargets[k].name);
//alert("clicking on " + clickDownloadLinkTargets[i].id);
break; //break from clickTargets loop, but not from iframe loop
} else if (where > 0) {
somethingLeftBelow = true;
} else if (where < 0) {
somethingLeftAbove = true;
}
}
}
}
}
if (!clickedSomething) {
if (somethingLeftAbove) {
// console.log("scrolling UP because everything on this screen has been clicked but we missed something above");
window.scrollBy(0, -500);
this.idleSince = null;
} else if (somethingLeftBelow) {
// console.log("scrolling because everything on this screen has been clicked but there's more below document.body.clientHeight="
// + document.body.clientHeight);
window.scrollBy(0, 200);
this.idleSince = null;
} else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) {
// console.log("scrolling because we're not to the bottom yet document.body.clientHeight="
// + document.body.clientHeight);
window.scrollBy(0, 200);
this.idleSince = null;
} else if (this.idleSince == null) {
this.idleSince = Date.now();
}
}
if (!this.idleSince) {
this.idleSince = Date.now();
}
},
start : function() {
var that = this;
this.intervalId = setInterval(function() {
that.intervalFunc()
}, 250);
},
isFinished : function() {
if (this.idleSince != null) {
var idleTimeMs = Date.now() - this.idleSince;
if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) {
clearInterval(this.intervalId);
return true;
}
}
return false;
},
aboveBelowOrOnScreen : function(e) {
var eTop = e.getBoundingClientRect().top;
if (eTop < window.scrollY) {
return -1; // above
} else if (eTop > window.scrollY + window.innerHeight) {
return 1; // below
} else {
return 0; // on screen
}
},
};
// Called from outside of this script.
var umbraBehaviorFinished = function() {
return umbraBehavior.isFinished()
};
umbraBehavior.start();

View file

@ -98,6 +98,10 @@ behaviors:
click_css_selector: button[data-more-results-bottom-button] click_css_selector: button[data-more-results-bottom-button]
click_until_hard_timeout: True click_until_hard_timeout: True
request_idle_timeout_sec: 10 request_idle_timeout_sec: 10
- # https://webarchive.jira.com/browse/ARI-4692
url_regex: '^https?://(?:www\.)?fec.gov/data/.*$'
behavior_js: fec_gov.js
request_idle_timeout_sec: 10
- # default fallback behavior - # default fallback behavior
url_regex: '^.*$' url_regex: '^.*$'
request_idle_timeout_sec: 10 request_idle_timeout_sec: 10

View file

@ -1,21 +1,21 @@
# '''
# brozzler/browser.py - classes responsible for running web browsers brozzler/browser.py - classes responsible for running web browsers
# (chromium/chromium) and browsing web pages in them (chromium/chromium) and browsing web pages in them
#
# Copyright (C) 2014-2016 Internet Archive Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
# You may obtain a copy of the License at You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and See the License for the specific language governing permissions and
# limitations under the License. limitations under the License.
# '''
import logging import logging
import json import json
@ -58,7 +58,10 @@ class BrowserPool:
self.logger.info("browser ports: {}".format([browser.chrome_port for browser in self._available])) self.logger.info("browser ports: {}".format([browser.chrome_port for browser in self._available]))
def acquire(self): def acquire(self):
"""Returns browser from pool if available, raises NoBrowsersAvailable otherwise.""" """
Returns browser from pool if available, raises NoBrowsersAvailable
otherwise.
"""
with self._lock: with self._lock:
try: try:
browser = self._available.pop() browser = self._available.pop()
@ -277,11 +280,23 @@ class Browser:
self.logger.info("retrieving outlinks for %s", self.url) self.logger.info("retrieving outlinks for %s", self.url)
self._waiting_on_outlinks_msg_id = self.send_to_chrome( self._waiting_on_outlinks_msg_id = self.send_to_chrome(
method="Runtime.evaluate", method="Runtime.evaluate",
params={"expression":"Array.prototype.slice.call(document.querySelectorAll('a[href]')).join(' ')"}) params={"expression": self.OUTLINKS_JS})
return False return False
else: # self._waiting_on_outlinks_msg_id else: # self._waiting_on_outlinks_msg_id
return False return False
OUTLINKS_JS = """
var compileOutlinks = function(frame) {
var outlinks = Array.prototype.slice.call(
frame.document.querySelectorAll('a[href]'));
for (var i = 0; i < frame.frames.length; i++) {
outlinks = outlinks.concat(compileOutlinks(frame.frames[i]));
}
return outlinks;
}
compileOutlinks(window).join(' ');
"""
def _browse_interval_func(self): def _browse_interval_func(self):
"""Called periodically while page is being browsed. Returns True when """Called periodically while page is being browsed. Returns True when
finished browsing.""" finished browsing."""
@ -393,7 +408,8 @@ class Browser:
self._waiting_on_scroll_to_top_msg_id = None self._waiting_on_scroll_to_top_msg_id = None
elif message["id"] == self._waiting_on_outlinks_msg_id: elif message["id"] == self._waiting_on_outlinks_msg_id:
self.logger.debug("got outlinks message=%s", message) self.logger.debug("got outlinks message=%s", message)
self._outlinks = frozenset(message["result"]["result"]["value"].split(" ")) self._outlinks = frozenset(
message["result"]["result"]["value"].split())
elif message["id"] == self._waiting_on_document_url_msg_id: elif message["id"] == self._waiting_on_document_url_msg_id:
if message["result"]["result"]["value"] != self.url: if message["result"]["result"]["value"] != self.url:
if self.on_url_change: if self.on_url_change:

View file

@ -39,21 +39,43 @@ class RethinkDbFrontier:
def _ensure_db(self): def _ensure_db(self):
dbs = self.r.db_list().run() dbs = self.r.db_list().run()
if not self.r.dbname in dbs: if not self.r.dbname in dbs:
self.logger.info("creating rethinkdb database %s", repr(self.r.dbname)) self.logger.info(
"creating rethinkdb database %s", repr(self.r.dbname))
self.r.db_create(self.r.dbname).run() self.r.db_create(self.r.dbname).run()
tables = self.r.table_list().run() tables = self.r.table_list().run()
if not "sites" in tables: if not "sites" in tables:
self.logger.info("creating rethinkdb table 'sites' in database %s", repr(self.r.dbname)) self.logger.info(
self.r.table_create("sites", shards=self.shards, replicas=self.replicas).run() "creating rethinkdb table 'sites' in database %s",
self.r.table("sites").index_create("sites_last_disclaimed", [self.r.row["status"], self.r.row["last_disclaimed"]]).run() repr(self.r.dbname))
self.r.table_create(
"sites", shards=self.shards, replicas=self.replicas).run()
self.r.table("sites").index_create(
"sites_last_disclaimed", [
self.r.row["status"],
self.r.row["last_disclaimed"]]).run()
self.r.table("sites").index_create("job_id").run() self.r.table("sites").index_create("job_id").run()
if not "pages" in tables: if not "pages" in tables:
self.logger.info("creating rethinkdb table 'pages' in database %s", repr(self.r.dbname)) self.logger.info(
self.r.table_create("pages", shards=self.shards, replicas=self.replicas).run() "creating rethinkdb table 'pages' in database %s",
self.r.table("pages").index_create("priority_by_site", [self.r.row["site_id"], self.r.row["brozzle_count"], self.r.row["claimed"], self.r.row["priority"]]).run() repr(self.r.dbname))
self.r.table_create(
"pages", shards=self.shards, replicas=self.replicas).run()
self.r.table("pages").index_create(
"priority_by_site", [
self.r.row["site_id"], self.r.row["brozzle_count"],
self.r.row["claimed"], self.r.row["priority"]]).run()
# this index is for displaying pages in a sensible order in the web
# console
self.r.table("pages").index_create(
"least_hops", [
r.row["site_id"], r.row["brozzle_count"],
r.row["hops_from_seed"]])
if not "jobs" in tables: if not "jobs" in tables:
self.logger.info("creating rethinkdb table 'jobs' in database %s", repr(self.r.dbname)) self.logger.info(
self.r.table_create("jobs", shards=self.shards, replicas=self.replicas).run() "creating rethinkdb table 'jobs' in database %s",
repr(self.r.dbname))
self.r.table_create(
"jobs", shards=self.shards, replicas=self.replicas).run()
def _vet_result(self, result, **kwargs): def _vet_result(self, result, **kwargs):
# self.logger.debug("vetting expected=%s result=%s", kwargs, result) # self.logger.debug("vetting expected=%s result=%s", kwargs, result)

View file

@ -19,10 +19,11 @@
import setuptools import setuptools
import glob import glob
setuptools.setup(name='brozzler', setuptools.setup(
version='1.1.dev10', name='brozzler',
version='1.1.dev20',
description='Distributed web crawling with browsers', description='Distributed web crawling with browsers',
url='https://github.com/nlevitt/brozzler', url='https://github.com/internetarchive/brozzler',
author='Noah Levitt', author='Noah Levitt',
author_email='nlevitt@archive.org', author_email='nlevitt@archive.org',
long_description=open('README.rst', encoding='UTF-8').read(), long_description=open('README.rst', encoding='UTF-8').read(),
@ -41,10 +42,10 @@ setuptools.setup(name='brozzler',
'rethinkstuff', 'rethinkstuff',
'rethinkdb>=2.3,<2.4', 'rethinkdb>=2.3,<2.4',
'psutil', 'psutil',
], ],
zip_safe=False, zip_safe=False,
classifiers=[ classifiers=[
'Development Status :: 3 - Alpha', 'Development Status :: 4 - Beta',
'Environment :: Console', 'Environment :: Console',
'License :: OSI Approved :: Apache Software License', 'License :: OSI Approved :: Apache Software License',
'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.4',

View file

@ -1 +1 @@
flask --debug --app=brozzler-webconsole.py run --host=0.0.0.0 --port=8081 gunicorn --bind=0.0.0.0:8081 brozzler-webconsole:app

View file

@ -1,21 +1,21 @@
# '''
# brozzler-webconsole/__init__.py - flask app for brozzler web console, defines brozzler-webconsole/__init__.py - flask app for brozzler web console, defines
# api endspoints etc api endspoints etc
#
# Copyright (C) 2014-2016 Internet Archive Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
# You may obtain a copy of the License at You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and See the License for the specific language governing permissions and
# limitations under the License. limitations under the License.
# '''
import flask import flask
import rethinkstuff import rethinkstuff
@ -24,16 +24,26 @@ import sys
import os import os
import importlib import importlib
import rethinkdb import rethinkdb
import logging
import yaml
# XXX flask does its own logging config # flask does its own logging config
# import logging # logging.basicConfig(
# logging.basicConfig(stream=sys.stdout, level=logging.INFO, # stream=sys.stdout, level=logging.INFO,
# format="%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s") # format=(
# "%(asctime)s %(process)d %(levelname)s %(threadName)s "
# "%(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s")
app = flask.Flask(__name__) app = flask.Flask(__name__)
# http://stackoverflow.com/questions/26578733/why-is-flask-application-not-creating-any-logs-when-hosted-by-gunicorn
gunicorn_error_logger = logging.getLogger('gunicorn.error')
app.logger.handlers.extend(gunicorn_error_logger.handlers)
app.logger.setLevel(logging.INFO)
app.logger.info('will this show in the log?')
# configure with environment variables # configure with environment variables
SETTINGS= { SETTINGS = {
'RETHINKDB_SERVERS': os.environ.get( 'RETHINKDB_SERVERS': os.environ.get(
'RETHINKDB_SERVERS', 'localhost').split(','), 'RETHINKDB_SERVERS', 'localhost').split(','),
'RETHINKDB_DB': os.environ.get('RETHINKDB_DB', 'brozzler'), 'RETHINKDB_DB': os.environ.get('RETHINKDB_DB', 'brozzler'),
@ -81,10 +91,10 @@ def pages(site_id):
app.logger.info("flask.request.args=%s", flask.request.args) app.logger.info("flask.request.args=%s", flask.request.args)
start = int(flask.request.args.get("start", 0)) start = int(flask.request.args.get("start", 0))
end = int(flask.request.args.get("end", start + 90)) end = int(flask.request.args.get("end", start + 90))
app.logger.info("yes new query")
pages_ = r.table("pages").between( pages_ = r.table("pages").between(
[site_id, 1, False, r.minval], [site_id, 1, r.minval], [site_id, r.maxval, r.maxval],
[site_id, r.maxval, False, r.maxval], index="least_hops").order_by(index="least_hops")[start:end].run()
index="priority_by_site")[start:end].run()
return flask.jsonify(pages=list(pages_)) return flask.jsonify(pages=list(pages_))
@app.route("/api/sites/<site_id>") @app.route("/api/sites/<site_id>")
@ -110,6 +120,14 @@ def job(job_id):
job_ = r.table("jobs").get(job_id).run() job_ = r.table("jobs").get(job_id).run()
return flask.jsonify(job_) return flask.jsonify(job_)
@app.route("/api/jobs/<int:job_id>/yaml")
@app.route("/api/job/<int:job_id>/yaml")
def job_yaml(job_id):
job_ = r.table("jobs").get(job_id).run()
return app.response_class(
yaml.dump(job_, default_flow_style=False),
mimetype='application/yaml')
@app.route("/api/workers") @app.route("/api/workers")
def workers(): def workers():
workers_ = service_registry.available_services("brozzler-worker") workers_ = service_registry.available_services("brozzler-worker")

View file

@ -125,11 +125,10 @@ function loadSiteStats($http, site, job) {
$http.get("/api/sites/" + site.id + "/page_count").success(pageCountSuccessCallback(site, job)); $http.get("/api/sites/" + site.id + "/page_count").success(pageCountSuccessCallback(site, job));
$http.get("/api/sites/" + site.id + "/queued_count").success(queuedCountSuccessCallback(site, job)); $http.get("/api/sites/" + site.id + "/queued_count").success(queuedCountSuccessCallback(site, job));
// parse Warcprox-Meta to find stats bucket // look at Warcprox-Meta to find stats bucket
var warcprox_meta = angular.fromJson(site.extra_headers["Warcprox-Meta"]); for (var j = 0; j < site.warcprox_meta.stats.buckets.length; j++) {
for (var j = 0; j < warcprox_meta.stats.buckets.length; j++) { if (site.warcprox_meta.stats.buckets[j].indexOf("seed") >= 0) {
if (warcprox_meta.stats.buckets[j].indexOf("seed") >= 0) { var bucket = site.warcprox_meta.stats.buckets[j];
var bucket = warcprox_meta.stats.buckets[j];
// console.log("warcprox_meta.stats.buckets[" + j + "]=" + bucket); // console.log("warcprox_meta.stats.buckets[" + j + "]=" + bucket);
$http.get("/api/stats/" + bucket).success(statsSuccessCallback(site, bucket)); $http.get("/api/stats/" + bucket).success(statsSuccessCallback(site, bucket));
} }
@ -138,7 +137,8 @@ function loadSiteStats($http, site, job) {
brozzlerControllers.controller("JobController", ["$scope", "$routeParams", "$http", brozzlerControllers.controller("JobController", ["$scope", "$routeParams", "$http",
function($scope, $routeParams, $http) { function($scope, $routeParams, $http) {
console.log('JobController'); $scope.show_yaml = false;
// console.log('JobController');
$http.get("/api/config").success(function(data) { $http.get("/api/config").success(function(data) {
$scope.config = data.config; $scope.config = data.config;
}); });
@ -159,6 +159,9 @@ brozzlerControllers.controller("JobController", ["$scope", "$routeParams", "$htt
} }
}); });
}); });
$http.get("/api/jobs/" + $routeParams.id + "/yaml").success(function(data) {
$scope.job_yaml = data;
});
}]); }]);
brozzlerControllers.controller("SiteController", ["$scope", "$routeParams", "$http", "$window", brozzlerControllers.controller("SiteController", ["$scope", "$routeParams", "$http", "$window",

View file

@ -10,7 +10,12 @@
</div> </div>
<div> <div>
<h2>Job {{job.id}} <small>{{job.started}}-{{job.finished}} {{job.status}}</small></h2> <h2 ng-click="show_yaml = !show_yaml">
<span class="fa fa-caret-right"
ng-class="{ 'fa-caret-right': !show_yaml, 'fa-caret-down': !!show_yaml }"></span>
Job {{job.id}} <small>{{job.started}}-{{job.finished}}: {{job.status}}</small>
</h2>
<pre style="display:{{show_yaml?'block':'none'}}">{{job_yaml}}</pre>
<div class="row bigstats"> <div class="row bigstats">
<div class="col-sm-6 col-md-3"> <div class="col-sm-6 col-md-3">

View file

@ -40,12 +40,22 @@
<div class="col-sm-12"> <div class="col-sm-12">
<h2>Pages</h2> <h2>Pages</h2>
<div class="col-sm-6 col-md-4" ng-repeat="page in pages"> <div class="col-sm-6 col-md-4" ng-repeat="page in pages">
<a class="thumbnail" href="{{config.WAYBACK_BASEURL}}/3/{{page.url}}"> <div class="thumbnail">
<img style="width:300px;height:190px" src="{{config.WAYBACK_BASEURL}}/3/thumbnail:{{page.url}}" alt="thumb"> <img style="border:1px solid #ddd;width:300px;height:190px" src="{{config.WAYBACK_BASEURL}}/3/thumbnail:{{page.url}}" alt="thumb">
<div class="caption"> <div class="caption">
<h5>{{page.url}}</h5> <h5>{{page.url}}</h5>
<ul class="fa-ul">
<li>
<span class="fa fa-li fa-camera"></span>
<a target="_blank" href="{{config.WAYBACK_BASEURL}}/3/screenshot:{{page.url}}">full size screenshot &gt;</a>
</li>
<li>
<span class="fa fa-li fa-university"></span>
<a target="_blank" href="{{config.WAYBACK_BASEURL}}/3/{{page.url}}">wayback &gt;</a>
</li>
</ul>
</div> </div>
</a> </div>
</div> </div>
</div> </div>
<div class="col-sm-12" ng-show="loading"> <div class="col-sm-12" ng-show="loading">

View file

@ -1,2 +1,4 @@
git+https://github.com/mitsuhiko/flask.git
rethinkstuff>=0.1.5 rethinkstuff>=0.1.5
flask>=0.11
gunicorn
PyYAML