Merge branch 'master' into qa

2025-07-14 10:39:27 -04:00 · 2016-06-22 17:45:44 -07:00 · 2016-06-22 17:45:44 -07:00 · 8f726eac76
commit 8f726eac76
parent 66d697e662 366e467501
13 changed files with 360 additions and 84 deletions
--- a/README.rst
+++ b/README.rst
@ -20,19 +20,56 @@ archiving.
 Installation
 ------------

-XXX These instructions don't work at the moment. Brozzler requires some
-customized packages not easily installable in the outside world. I intend to
-remedy the situation soon.
-
 ::

    # set up virtualenv if desired
-    pip install git+https://github.com/nlevitt/brozzler.git
+    pip install brozzler

 Brozzler also requires a rethinkdb deployment.

-Fonts for good screenshots
--------------------------
+Usage
+-----
+
+Launch one or more workers:
+
+::
+
+    brozzler-worker -e chromium
+
+Submit jobs:
+
+::
+
+    brozzler-new-job myjob.yaml
+
+Job Configuration
+-----------------
+
+Jobs are defined using yaml files. Options may be specified either at the
+top-level or on individual seeds. A job id and at least one seed url
+must be specified, everything else is optional.
+
+::
+
+    id: myjob
+    time_limit: 60 # seconds
+    proxy: 127.0.0.1:8000 # point at warcprox for archiving
+    ignore_robots: false
+    enable_warcprox_features: false
+    warcprox_meta: null
+    metadata: {}
+    seeds:
+      - url: http://one.example.org/
+      - url: http://two.example.org/
+        time_limit: 30
+      - url: http://three.example.org/
+        time_limit: 10
+        ignore_robots: true
+        scope:
+          surt: http://(org,example,
+
+Fonts (for decent screenshots)
+------------------------------

 On ubuntu 14.04 trusty I installed these packages:

@ -42,12 +79,10 @@ fonts-arphic-ukai fonts-farsiweb fonts-nafees fonts-sil-abyssinica
 fonts-sil-ezra fonts-sil-padauk fonts-unfonts-extra fonts-unfonts-core
 ttf-indic-fonts fonts-thai-tlwg fonts-lklug-sinhala

-Haven't looked much at the resulting screenshots yet though.
-
 License
 -------

-Copyright 2015 Internet Archive
+Copyright 2015-2016 Internet Archive

 Licensed under the Apache License, Version 2.0 (the "License"); you may
 not use this software except in compliance with the License. You may
--- a/bin/brozzler-new-site
+++ b/bin/brozzler-new-site
@ -27,6 +27,7 @@ import re
 import rethinkstuff
 import warnings
 import requests
+import json

 arg_parser = argparse.ArgumentParser(prog=os.path.basename(__file__),
        description="brozzler-new-site - register site to brozzle",
@ -63,7 +64,7 @@ site = brozzler.Site(
        time_limit=int(args.time_limit) if args.time_limit else None,
        ignore_robots=args.ignore_robots,
        enable_warcprox_features=args.enable_warcprox_features,
-        warcprox_meta=json.loads(args.warcprox_meta))
+        warcprox_meta=json.loads(args.warcprox_meta) if args.warcprox_meta else None)

 r = rethinkstuff.Rethinker(args.rethinkdb_servers.split(","), args.rethinkdb_db)
 frontier = brozzler.RethinkDbFrontier(r)
--- a/brozzler/behaviors.d/fec_gov.js
+++ b/brozzler/behaviors.d/fec_gov.js
@ -0,0 +1,159 @@
+/*
+ * brozzler/behaviors.d/fec_gov.js - click on links that execute JavaScript to
+ * download report csv files for fec.gov/data
+ *
+ * Copyright (C) 2014-2016 Internet Archive
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+var umbraBehavior = {
+	IDLE_TIMEOUT_SEC : 10,
+	idleSince : null,
+	alreadyClicked : {},
+
+	intervalFunc : function() {
+		var clickedSomething = false;
+		var somethingLeftBelow = false;
+		var somethingLeftAbove = false;
+		var cssDownloadLinkSelector = "a[id^='id_csv']";
+
+		var iframes = document.querySelectorAll("iframe");
+		var documents = Array(iframes.length + 1);
+		documents[0] = document;
+
+		for (var i = 0; i < iframes.length; i++) {
+			documents[i+1] = iframes[i].contentWindow.document;
+		}
+
+		for (var j = 0; j < documents.length; j++) {
+			var clickDownloadLinkTargets = documents[j].querySelectorAll(cssDownloadLinkSelector);
+			for (var i = 0; i < clickDownloadLinkTargets.length; i++) {
+				var sourceName = clickDownloadLinkTargets[i].id.substring(7);
+				var clickRadioButtonTargets = documents[j].querySelectorAll("input[name='" + sourceName + "']");
+
+				if (clickRadioButtonTargets.length == 0) {
+					if (clickDownloadLinkTargets[i].umbraClicked) {
+						continue;
+					}
+
+					var mouseOverEvent = document.createEvent('Events');
+					mouseOverEvent.initEvent("mouseover",true, false);
+					clickDownloadLinkTargets[i].dispatchEvent(mouseOverEvent);
+					clickDownloadLinkTargets[i].click(); //click the link to download the csv
+					clickedSomething = true;
+					this.idleSince = null;
+					clickDownloadLinkTargets[i].umbraClicked = true;
+				}
+				else {
+					for (var k = 0; k < clickRadioButtonTargets.length; ++k) {
+						if (clickRadioButtonTargets[k].umbraClicked) {
+							continue;
+						}
+
+						var where = this.aboveBelowOrOnScreen(clickRadioButtonTargets[k]);
+						if (where == 0) {
+							console.log("clicking on " + clickRadioButtonTargets[k]);
+							// do mouse over event on click target
+							// since some urls are requsted only on
+							// this event - see
+							// https://webarchive.jira.com/browse/AITFIVE-451
+							var mouseOverEvent = document.createEvent('Events');
+							mouseOverEvent.initEvent("mouseover",true, false);
+							clickRadioButtonTargets[k].dispatchEvent(mouseOverEvent);
+							clickRadioButtonTargets[k].click(); //select the correct date with the radio button
+							mouseOverEvent = document.createEvent('Events');
+							mouseOverEvent.initEvent("mouseover",true, false);
+							clickDownloadLinkTargets[i].dispatchEvent(mouseOverEvent);
+							clickDownloadLinkTargets[i].click(); //click the link to download the csv for the selected date
+							clickedSomething = true;
+							this.idleSince = null;
+							clickRadioButtonTargets[k].umbraClicked = true;
+
+							//alert("clicking on " + clickRadioButtonTargets[k].name);
+							//alert("clicking on " + clickDownloadLinkTargets[i].id);
+
+
+							break; //break from clickTargets loop, but not from iframe loop
+						} else if (where > 0) {
+							somethingLeftBelow = true;
+						} else if (where < 0) {
+							somethingLeftAbove = true;
+						}
+					}
+				}
+
+			}
+		}
+
+		if (!clickedSomething) {
+			if (somethingLeftAbove) {
+				// console.log("scrolling UP because everything on this screen has been clicked but we missed something above");
+				window.scrollBy(0, -500);
+				this.idleSince = null;
+			} else if (somethingLeftBelow) {
+				// console.log("scrolling because everything on this screen has been clicked but there's more below document.body.clientHeight="
+				// 				+ document.body.clientHeight);
+				window.scrollBy(0, 200);
+				this.idleSince = null;
+			} else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) {
+				// console.log("scrolling because we're not to the bottom yet document.body.clientHeight="
+				// 				+ document.body.clientHeight);
+				window.scrollBy(0, 200);
+				this.idleSince = null;
+			} else if (this.idleSince == null) {
+				this.idleSince = Date.now();
+			}
+		}
+
+		if (!this.idleSince) {
+			this.idleSince = Date.now();
+		}
+	},
+
+	start : function() {
+		var that = this;
+		this.intervalId = setInterval(function() {
+			that.intervalFunc()
+		}, 250);
+	},
+
+	isFinished : function() {
+		if (this.idleSince != null) {
+			var idleTimeMs = Date.now() - this.idleSince;
+			if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) {
+				clearInterval(this.intervalId);
+				return true;
+			}
+		}
+		return false;
+	},
+
+	aboveBelowOrOnScreen : function(e) {
+		var eTop = e.getBoundingClientRect().top;
+		if (eTop < window.scrollY) {
+			return -1; // above
+		} else if (eTop > window.scrollY + window.innerHeight) {
+			return 1; // below
+		} else {
+			return 0; // on screen
+		}
+	},
+};
+
+// Called from outside of this script.
+var umbraBehaviorFinished = function() {
+	return umbraBehavior.isFinished()
+};
+
+umbraBehavior.start();
--- a/brozzler/behaviors.yaml
+++ b/brozzler/behaviors.yaml
@ -98,6 +98,10 @@ behaviors:
      click_css_selector: button[data-more-results-bottom-button]
      click_until_hard_timeout: True
   request_idle_timeout_sec: 10
+ - # https://webarchive.jira.com/browse/ARI-4692
+   url_regex: '^https?://(?:www\.)?fec.gov/data/.*$'
+   behavior_js: fec_gov.js
+   request_idle_timeout_sec: 10
 - # default fallback behavior
   url_regex: '^.*$'
   request_idle_timeout_sec: 10
--- a/brozzler/browser.py
+++ b/brozzler/browser.py
@ -1,21 +1,21 @@
-#
-# brozzler/browser.py - classes responsible for running web browsers
-# (chromium/chromium) and browsing web pages in them
-#
-# Copyright (C) 2014-2016 Internet Archive
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
+'''
+brozzler/browser.py - classes responsible for running web browsers
+(chromium/chromium) and browsing web pages in them
+
+Copyright (C) 2014-2016 Internet Archive
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''

 import logging
 import json
@ -58,7 +58,10 @@ class BrowserPool:
        self.logger.info("browser ports: {}".format([browser.chrome_port for browser in self._available]))

    def acquire(self):
-        """Returns browser from pool if available, raises NoBrowsersAvailable otherwise."""
+        """
+        Returns browser from pool if available, raises NoBrowsersAvailable
+        otherwise.
+        """
        with self._lock:
            try:
                browser = self._available.pop()
@ -277,11 +280,23 @@ class Browser:
            self.logger.info("retrieving outlinks for %s", self.url)
            self._waiting_on_outlinks_msg_id = self.send_to_chrome(
                    method="Runtime.evaluate",
-                    params={"expression":"Array.prototype.slice.call(document.querySelectorAll('a[href]')).join(' ')"})
+                    params={"expression": self.OUTLINKS_JS})
            return False
        else: # self._waiting_on_outlinks_msg_id
            return False

+    OUTLINKS_JS = """
+var compileOutlinks = function(frame) {
+    var outlinks = Array.prototype.slice.call(
+            frame.document.querySelectorAll('a[href]'));
+    for (var i = 0; i < frame.frames.length; i++) {
+        outlinks = outlinks.concat(compileOutlinks(frame.frames[i]));
+    }
+    return outlinks;
+}
+compileOutlinks(window).join(' ');
+"""
+
    def _browse_interval_func(self):
        """Called periodically while page is being browsed. Returns True when
        finished browsing."""
@ -393,7 +408,8 @@ class Browser:
            self._waiting_on_scroll_to_top_msg_id = None
        elif message["id"] == self._waiting_on_outlinks_msg_id:
            self.logger.debug("got outlinks message=%s", message)
-            self._outlinks = frozenset(message["result"]["result"]["value"].split(" "))
+            self._outlinks = frozenset(
+                    message["result"]["result"]["value"].split())
        elif message["id"] == self._waiting_on_document_url_msg_id:
            if message["result"]["result"]["value"] != self.url:
                if self.on_url_change:
--- a/brozzler/frontier.py
+++ b/brozzler/frontier.py
@ -39,21 +39,43 @@ class RethinkDbFrontier:
    def _ensure_db(self):
        dbs = self.r.db_list().run()
        if not self.r.dbname in dbs:
-            self.logger.info("creating rethinkdb database %s", repr(self.r.dbname))
+            self.logger.info(
+                    "creating rethinkdb database %s", repr(self.r.dbname))
            self.r.db_create(self.r.dbname).run()
        tables = self.r.table_list().run()
        if not "sites" in tables:
-            self.logger.info("creating rethinkdb table 'sites' in database %s", repr(self.r.dbname))
-            self.r.table_create("sites", shards=self.shards, replicas=self.replicas).run()
-            self.r.table("sites").index_create("sites_last_disclaimed", [self.r.row["status"], self.r.row["last_disclaimed"]]).run()
+            self.logger.info(
+                    "creating rethinkdb table 'sites' in database %s",
+                    repr(self.r.dbname))
+            self.r.table_create(
+                    "sites", shards=self.shards, replicas=self.replicas).run()
+            self.r.table("sites").index_create(
+                    "sites_last_disclaimed", [
+                        self.r.row["status"],
+                        self.r.row["last_disclaimed"]]).run()
            self.r.table("sites").index_create("job_id").run()
        if not "pages" in tables:
-            self.logger.info("creating rethinkdb table 'pages' in database %s", repr(self.r.dbname))
-            self.r.table_create("pages", shards=self.shards, replicas=self.replicas).run()
-            self.r.table("pages").index_create("priority_by_site", [self.r.row["site_id"], self.r.row["brozzle_count"], self.r.row["claimed"], self.r.row["priority"]]).run()
+            self.logger.info(
+                    "creating rethinkdb table 'pages' in database %s",
+                    repr(self.r.dbname))
+            self.r.table_create(
+                    "pages", shards=self.shards, replicas=self.replicas).run()
+            self.r.table("pages").index_create(
+                    "priority_by_site", [
+                        self.r.row["site_id"], self.r.row["brozzle_count"],
+                        self.r.row["claimed"], self.r.row["priority"]]).run()
+            # this index is for displaying pages in a sensible order in the web
+            # console
+            self.r.table("pages").index_create(
+                    "least_hops", [
+                        r.row["site_id"], r.row["brozzle_count"],
+                        r.row["hops_from_seed"]])
        if not "jobs" in tables:
-            self.logger.info("creating rethinkdb table 'jobs' in database %s", repr(self.r.dbname))
-            self.r.table_create("jobs", shards=self.shards, replicas=self.replicas).run()
+            self.logger.info(
+                    "creating rethinkdb table 'jobs' in database %s",
+                    repr(self.r.dbname))
+            self.r.table_create(
+                    "jobs", shards=self.shards, replicas=self.replicas).run()

    def _vet_result(self, result, **kwargs):
        # self.logger.debug("vetting expected=%s result=%s", kwargs, result)
--- a/setup.py
+++ b/setup.py
@ -19,10 +19,11 @@
 import setuptools
 import glob

-setuptools.setup(name='brozzler',
-        version='1.1.dev10',
+setuptools.setup(
+        name='brozzler',
+        version='1.1.dev20',
        description='Distributed web crawling with browsers',
-        url='https://github.com/nlevitt/brozzler',
+        url='https://github.com/internetarchive/brozzler',
        author='Noah Levitt',
        author_email='nlevitt@archive.org',
        long_description=open('README.rst', encoding='UTF-8').read(),
@ -41,10 +42,10 @@ setuptools.setup(name='brozzler',
            'rethinkstuff',
            'rethinkdb>=2.3,<2.4',
            'psutil',
-            ],
+        ],
        zip_safe=False,
        classifiers=[
-            'Development Status :: 3 - Alpha',
+            'Development Status :: 4 - Beta',
            'Environment :: Console',
            'License :: OSI Approved :: Apache Software License',
            'Programming Language :: Python :: 3.4',
--- a/webconsole/README.rst
+++ b/webconsole/README.rst
@ -1 +1 @@
-flask --debug --app=brozzler-webconsole.py run --host=0.0.0.0 --port=8081
+gunicorn --bind=0.0.0.0:8081 brozzler-webconsole:app
--- a/webconsole/brozzler-webconsole/init.py
+++ b/webconsole/brozzler-webconsole/init.py
@ -1,21 +1,21 @@
-#
-# brozzler-webconsole/__init__.py - flask app for brozzler web console, defines
-# api endspoints etc
-#
-# Copyright (C) 2014-2016 Internet Archive
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
+'''
+brozzler-webconsole/__init__.py - flask app for brozzler web console, defines
+api endspoints etc
+
+Copyright (C) 2014-2016 Internet Archive
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''

 import flask
 import rethinkstuff
@ -24,16 +24,26 @@ import sys
 import os
 import importlib
 import rethinkdb
+import logging
+import yaml

-# XXX flask does its own logging config
-# import logging
-# logging.basicConfig(stream=sys.stdout, level=logging.INFO,
-#         format="%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s")
+# flask does its own logging config
+# logging.basicConfig(
+#         stream=sys.stdout, level=logging.INFO,
+#         format=(
+#             "%(asctime)s %(process)d %(levelname)s %(threadName)s "
+#             "%(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s")

 app = flask.Flask(__name__)

+# http://stackoverflow.com/questions/26578733/why-is-flask-application-not-creating-any-logs-when-hosted-by-gunicorn
+gunicorn_error_logger = logging.getLogger('gunicorn.error')
+app.logger.handlers.extend(gunicorn_error_logger.handlers)
+app.logger.setLevel(logging.INFO)
+app.logger.info('will this show in the log?')
+
 # configure with environment variables
-SETTINGS= {
+SETTINGS = {
    'RETHINKDB_SERVERS': os.environ.get(
        'RETHINKDB_SERVERS', 'localhost').split(','),
    'RETHINKDB_DB': os.environ.get('RETHINKDB_DB', 'brozzler'),
@ -81,10 +91,10 @@ def pages(site_id):
    app.logger.info("flask.request.args=%s", flask.request.args)
    start = int(flask.request.args.get("start", 0))
    end = int(flask.request.args.get("end", start + 90))
+    app.logger.info("yes new query")
    pages_ = r.table("pages").between(
-            [site_id, 1, False, r.minval],
-            [site_id, r.maxval, False, r.maxval],
-            index="priority_by_site")[start:end].run()
+            [site_id, 1, r.minval], [site_id, r.maxval, r.maxval],
+            index="least_hops").order_by(index="least_hops")[start:end].run()
    return flask.jsonify(pages=list(pages_))

@app.route("/api/sites/<site_id>")
@ -110,6 +120,14 @@ def job(job_id):
    job_ = r.table("jobs").get(job_id).run()
    return flask.jsonify(job_)

+@app.route("/api/jobs/<int:job_id>/yaml")
+@app.route("/api/job/<int:job_id>/yaml")
+def job_yaml(job_id):
+    job_ = r.table("jobs").get(job_id).run()
+    return app.response_class(
+            yaml.dump(job_, default_flow_style=False),
+            mimetype='application/yaml')
+
@app.route("/api/workers")
 def workers():
    workers_ = service_registry.available_services("brozzler-worker")
--- a/webconsole/brozzler-webconsole/static/js/app.js
+++ b/webconsole/brozzler-webconsole/static/js/app.js
@ -125,11 +125,10 @@ function loadSiteStats($http, site, job) {
    $http.get("/api/sites/" + site.id + "/page_count").success(pageCountSuccessCallback(site, job));
    $http.get("/api/sites/" + site.id + "/queued_count").success(queuedCountSuccessCallback(site, job));

-    // parse Warcprox-Meta to find stats bucket
-    var warcprox_meta = angular.fromJson(site.extra_headers["Warcprox-Meta"]);
-    for (var j = 0; j < warcprox_meta.stats.buckets.length; j++) {
-        if (warcprox_meta.stats.buckets[j].indexOf("seed") >= 0) {
-            var bucket = warcprox_meta.stats.buckets[j];
+    // look at Warcprox-Meta to find stats bucket
+    for (var j = 0; j < site.warcprox_meta.stats.buckets.length; j++) {
+        if (site.warcprox_meta.stats.buckets[j].indexOf("seed") >= 0) {
+            var bucket = site.warcprox_meta.stats.buckets[j];
            // console.log("warcprox_meta.stats.buckets[" + j + "]=" + bucket);
            $http.get("/api/stats/" + bucket).success(statsSuccessCallback(site, bucket));
        }
@ -138,7 +137,8 @@ function loadSiteStats($http, site, job) {

 brozzlerControllers.controller("JobController", ["$scope", "$routeParams", "$http",
    function($scope, $routeParams, $http) {
-        console.log('JobController');
+        $scope.show_yaml = false;
+        // console.log('JobController');
        $http.get("/api/config").success(function(data) {
            $scope.config = data.config;
        });
@ -159,6 +159,9 @@ brozzlerControllers.controller("JobController", ["$scope", "$routeParams", "$htt
                }
            });
        });
+        $http.get("/api/jobs/" + $routeParams.id + "/yaml").success(function(data) {
+            $scope.job_yaml = data;
+        });
    }]);

 brozzlerControllers.controller("SiteController", ["$scope", "$routeParams", "$http", "$window",
--- a/webconsole/brozzler-webconsole/static/partials/job.html
+++ b/webconsole/brozzler-webconsole/static/partials/job.html
@ -10,7 +10,12 @@
 </div>

 <div>
-    <h2>Job {{job.id}} <small>{{job.started}}-{{job.finished}} {{job.status}}</small></h2>
+    <h2 ng-click="show_yaml = !show_yaml">
+        <span class="fa fa-caret-right"
+            ng-class="{ 'fa-caret-right': !show_yaml, 'fa-caret-down': !!show_yaml }"></span>
+        Job {{job.id}} <small>{{job.started}}-{{job.finished}}: {{job.status}}</small>
+    </h2>
+    <pre style="display:{{show_yaml?'block':'none'}}">{{job_yaml}}</pre>

    <div class="row bigstats">
        <div class="col-sm-6 col-md-3">
--- a/webconsole/brozzler-webconsole/static/partials/site.html
+++ b/webconsole/brozzler-webconsole/static/partials/site.html
@ -40,12 +40,22 @@
        <div class="col-sm-12">
            <h2>Pages</h2>
            <div class="col-sm-6 col-md-4" ng-repeat="page in pages">
-                <a class="thumbnail" href="{{config.WAYBACK_BASEURL}}/3/{{page.url}}">
-                    <img style="width:300px;height:190px" src="{{config.WAYBACK_BASEURL}}/3/thumbnail:{{page.url}}" alt="thumb">
+                <div class="thumbnail">
+                    <img style="border:1px solid #ddd;width:300px;height:190px" src="{{config.WAYBACK_BASEURL}}/3/thumbnail:{{page.url}}" alt="thumb">
                    <div class="caption">
                        <h5>{{page.url}}</h5>
+                        <ul class="fa-ul">
+                            <li>
+                            <span class="fa fa-li fa-camera"></span>
+                            <a target="_blank" href="{{config.WAYBACK_BASEURL}}/3/screenshot:{{page.url}}">full size screenshot &gt;</a>
+                            </li>
+                            <li>
+                            <span class="fa fa-li fa-university"></span>
+                            <a target="_blank" href="{{config.WAYBACK_BASEURL}}/3/{{page.url}}">wayback &gt;</a>
+                            </li>
+                        </ul>
                    </div>
-                </a>
+                </div>
            </div>
        </div>
        <div class="col-sm-12" ng-show="loading">
--- a/webconsole/requirements.txt
+++ b/webconsole/requirements.txt
@ -1,2 +1,4 @@
-git+https://github.com/mitsuhiko/flask.git
 rethinkstuff>=0.1.5
+flask>=0.11
+gunicorn
+PyYAML