Merge branch 'master' into qa

2025-08-12 16:25:34 -04:00 · 2016-06-22 17:45:44 -07:00 · 2016-06-22 17:45:44 -07:00 · 8f726eac76
commit 8f726eac76
parent 66d697e662 366e467501
13 changed files with 360 additions and 84 deletions
--- a/README.rst
+++ b/README.rst
@ -20,19 +20,56 @@ archiving.
 Installation
 ------------
 XXX These instructions don't work at the moment. Brozzler requires some
 customized packages not easily installable in the outside world. I intend to
 remedy the situation soon.
 ::
    # set up virtualenv if desired
-    pip install git+https://github.com/nlevitt/brozzler.git
+    pip install brozzler
 Brozzler also requires a rethinkdb deployment.
-Fonts for good screenshots
+Usage
--------------------------
+-----
 Launch one or more workers:
 ::
    brozzler-worker -e chromium
 Submit jobs:
 ::
    brozzler-new-job myjob.yaml
 Job Configuration
 -----------------
 Jobs are defined using yaml files. Options may be specified either at the
 top-level or on individual seeds. A job id and at least one seed url
 must be specified, everything else is optional.
 ::
    id: myjob
    time_limit: 60 # seconds
    proxy: 127.0.0.1:8000 # point at warcprox for archiving
    ignore_robots: false
    enable_warcprox_features: false
    warcprox_meta: null
    metadata: {}
    seeds:
      - url: http://one.example.org/
      - url: http://two.example.org/
        time_limit: 30
      - url: http://three.example.org/
        time_limit: 10
        ignore_robots: true
        scope:
          surt: http://(org,example,
 Fonts (for decent screenshots)
 ------------------------------
 On ubuntu 14.04 trusty I installed these packages:
@ -42,12 +79,10 @@ fonts-arphic-ukai fonts-farsiweb fonts-nafees fonts-sil-abyssinica
 fonts-sil-ezra fonts-sil-padauk fonts-unfonts-extra fonts-unfonts-core
 ttf-indic-fonts fonts-thai-tlwg fonts-lklug-sinhala
 Haven't looked much at the resulting screenshots yet though.
 License
 -------
-Copyright 2015 Internet Archive
+Copyright 2015-2016 Internet Archive
 Licensed under the Apache License, Version 2.0 (the "License"); you may
 not use this software except in compliance with the License. You may
--- a/bin/brozzler-new-site
+++ b/bin/brozzler-new-site
@ -27,6 +27,7 @@ import re
 import rethinkstuff
 import warnings
 import requests
 import json
 arg_parser = argparse.ArgumentParser(prog=os.path.basename(__file__),
        description="brozzler-new-site - register site to brozzle",
@ -63,7 +64,7 @@ site = brozzler.Site(
        time_limit=int(args.time_limit) if args.time_limit else None,
        ignore_robots=args.ignore_robots,
        enable_warcprox_features=args.enable_warcprox_features,
-        warcprox_meta=json.loads(args.warcprox_meta))
+        warcprox_meta=json.loads(args.warcprox_meta) if args.warcprox_meta else None)
 r = rethinkstuff.Rethinker(args.rethinkdb_servers.split(","), args.rethinkdb_db)
 frontier = brozzler.RethinkDbFrontier(r)
--- a/brozzler/behaviors.d/fec_gov.js
+++ b/brozzler/behaviors.d/fec_gov.js
@ -0,0 +1,159 @@
 /*
 * brozzler/behaviors.d/fec_gov.js - click on links that execute JavaScript to
 * download report csv files for fec.gov/data
 *
 * Copyright (C) 2014-2016 Internet Archive
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 var umbraBehavior = {
 	IDLE_TIMEOUT_SEC : 10,
 	idleSince : null,
 	alreadyClicked : {},
 	intervalFunc : function() {
 		var clickedSomething = false;
 		var somethingLeftBelow = false;
 		var somethingLeftAbove = false;
 		var cssDownloadLinkSelector = "a[id^='id_csv']";
 		var iframes = document.querySelectorAll("iframe");
 		var documents = Array(iframes.length + 1);
 		documents[0] = document;
 		for (var i = 0; i < iframes.length; i++) {
 			documents[i+1] = iframes[i].contentWindow.document;
 		}
 		for (var j = 0; j < documents.length; j++) {
 			var clickDownloadLinkTargets = documents[j].querySelectorAll(cssDownloadLinkSelector);
 			for (var i = 0; i < clickDownloadLinkTargets.length; i++) {
 				var sourceName = clickDownloadLinkTargets[i].id.substring(7);
 				var clickRadioButtonTargets = documents[j].querySelectorAll("input[name='" + sourceName + "']");
 				if (clickRadioButtonTargets.length == 0) {
 					if (clickDownloadLinkTargets[i].umbraClicked) {
 						continue;
 					}
 					var mouseOverEvent = document.createEvent('Events');
 					mouseOverEvent.initEvent("mouseover",true, false);
 					clickDownloadLinkTargets[i].dispatchEvent(mouseOverEvent);
 					clickDownloadLinkTargets[i].click(); //click the link to download the csv
 					clickedSomething = true;
 					this.idleSince = null;
 					clickDownloadLinkTargets[i].umbraClicked = true;
 				}
 				else {
 					for (var k = 0; k < clickRadioButtonTargets.length; ++k) {
 						if (clickRadioButtonTargets[k].umbraClicked) {
 							continue;
 						}
 						var where = this.aboveBelowOrOnScreen(clickRadioButtonTargets[k]);
 						if (where == 0) {
 							console.log("clicking on " + clickRadioButtonTargets[k]);
 							// do mouse over event on click target
 							// since some urls are requsted only on
 							// this event - see
 							// https://webarchive.jira.com/browse/AITFIVE-451
 							var mouseOverEvent = document.createEvent('Events');
 							mouseOverEvent.initEvent("mouseover",true, false);
 							clickRadioButtonTargets[k].dispatchEvent(mouseOverEvent);
 							clickRadioButtonTargets[k].click(); //select the correct date with the radio button
 							mouseOverEvent = document.createEvent('Events');
 							mouseOverEvent.initEvent("mouseover",true, false);
 							clickDownloadLinkTargets[i].dispatchEvent(mouseOverEvent);
 							clickDownloadLinkTargets[i].click(); //click the link to download the csv for the selected date
 							clickedSomething = true;
 							this.idleSince = null;
 							clickRadioButtonTargets[k].umbraClicked = true;
 							//alert("clicking on " + clickRadioButtonTargets[k].name);
 							//alert("clicking on " + clickDownloadLinkTargets[i].id);
 							break; //break from clickTargets loop, but not from iframe loop
 						} else if (where > 0) {
 							somethingLeftBelow = true;
 						} else if (where < 0) {
 							somethingLeftAbove = true;
 						}
 					}
 				}
 			}
 		}
 		if (!clickedSomething) {
 			if (somethingLeftAbove) {
 				// console.log("scrolling UP because everything on this screen has been clicked but we missed something above");
 				window.scrollBy(0, -500);
 				this.idleSince = null;
 			} else if (somethingLeftBelow) {
 				// console.log("scrolling because everything on this screen has been clicked but there's more below document.body.clientHeight="
 				// 				+ document.body.clientHeight);
 				window.scrollBy(0, 200);
 				this.idleSince = null;
 			} else if (window.scrollY + window.innerHeight < document.documentElement.scrollHeight) {
 				// console.log("scrolling because we're not to the bottom yet document.body.clientHeight="
 				// 				+ document.body.clientHeight);
 				window.scrollBy(0, 200);
 				this.idleSince = null;
 			} else if (this.idleSince == null) {
 				this.idleSince = Date.now();
 			}
 		}
 		if (!this.idleSince) {
 			this.idleSince = Date.now();
 		}
 	},
 	start : function() {
 		var that = this;
 		this.intervalId = setInterval(function() {
 			that.intervalFunc()
 		}, 250);
 	},
 	isFinished : function() {
 		if (this.idleSince != null) {
 			var idleTimeMs = Date.now() - this.idleSince;
 			if (idleTimeMs / 1000 > this.IDLE_TIMEOUT_SEC) {
 				clearInterval(this.intervalId);
 				return true;
 			}
 		}
 		return false;
 	},
 	aboveBelowOrOnScreen : function(e) {
 		var eTop = e.getBoundingClientRect().top;
 		if (eTop < window.scrollY) {
 			return -1; // above
 		} else if (eTop > window.scrollY + window.innerHeight) {
 			return 1; // below
 		} else {
 			return 0; // on screen
 		}
 	},
 };
 // Called from outside of this script.
 var umbraBehaviorFinished = function() {
 	return umbraBehavior.isFinished()
 };
 umbraBehavior.start();
--- a/brozzler/behaviors.yaml
+++ b/brozzler/behaviors.yaml
@ -98,6 +98,10 @@ behaviors:
      click_css_selector: button[data-more-results-bottom-button]
      click_until_hard_timeout: True
   request_idle_timeout_sec: 10
 - # https://webarchive.jira.com/browse/ARI-4692
   url_regex: '^https?://(?:www\.)?fec.gov/data/.*$'
   behavior_js: fec_gov.js
   request_idle_timeout_sec: 10
 - # default fallback behavior
   url_regex: '^.*$'
   request_idle_timeout_sec: 10
--- a/brozzler/browser.py
+++ b/brozzler/browser.py
@ -1,21 +1,21 @@
-#
+'''
-# brozzler/browser.py - classes responsible for running web browsers
+brozzler/browser.py - classes responsible for running web browsers
-# (chromium/chromium) and browsing web pages in them
+(chromium/chromium) and browsing web pages in them
-#
+
-# Copyright (C) 2014-2016 Internet Archive
+Copyright (C) 2014-2016 Internet Archive
-#
+
-# Licensed under the Apache License, Version 2.0 (the "License");
+Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
+you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+You may obtain a copy of the License at
-#
+
-#     http://www.apache.org/licenses/LICENSE-2.0
+    http://www.apache.org/licenses/LICENSE-2.0
-#
+
-# Unless required by applicable law or agreed to in writing, software
+Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
+distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+See the License for the specific language governing permissions and
-# limitations under the License.
+limitations under the License.
-#
+'''
 import logging
 import json
@ -58,7 +58,10 @@ class BrowserPool:
        self.logger.info("browser ports: {}".format([browser.chrome_port for browser in self._available]))
    def acquire(self):
-        """Returns browser from pool if available, raises NoBrowsersAvailable otherwise."""
+        """
        Returns browser from pool if available, raises NoBrowsersAvailable
        otherwise.
        """
        with self._lock:
            try:
                browser = self._available.pop()
@ -277,11 +280,23 @@ class Browser:
            self.logger.info("retrieving outlinks for %s", self.url)
            self._waiting_on_outlinks_msg_id = self.send_to_chrome(
                    method="Runtime.evaluate",
-                    params={"expression":"Array.prototype.slice.call(document.querySelectorAll('a[href]')).join(' ')"})
+                    params={"expression": self.OUTLINKS_JS})
            return False
        else: # self._waiting_on_outlinks_msg_id
            return False
    OUTLINKS_JS = """
 var compileOutlinks = function(frame) {
    var outlinks = Array.prototype.slice.call(
            frame.document.querySelectorAll('a[href]'));
    for (var i = 0; i < frame.frames.length; i++) {
        outlinks = outlinks.concat(compileOutlinks(frame.frames[i]));
    }
    return outlinks;
 }
 compileOutlinks(window).join(' ');
 """
    def _browse_interval_func(self):
        """Called periodically while page is being browsed. Returns True when
        finished browsing."""
@ -393,7 +408,8 @@ class Browser:
            self._waiting_on_scroll_to_top_msg_id = None
        elif message["id"] == self._waiting_on_outlinks_msg_id:
            self.logger.debug("got outlinks message=%s", message)
-            self._outlinks = frozenset(message["result"]["result"]["value"].split(" "))
+            self._outlinks = frozenset(
                    message["result"]["result"]["value"].split())
        elif message["id"] == self._waiting_on_document_url_msg_id:
            if message["result"]["result"]["value"] != self.url:
                if self.on_url_change:
--- a/brozzler/frontier.py
+++ b/brozzler/frontier.py
@ -39,21 +39,43 @@ class RethinkDbFrontier:
    def _ensure_db(self):
        dbs = self.r.db_list().run()
        if not self.r.dbname in dbs:
-            self.logger.info("creating rethinkdb database %s", repr(self.r.dbname))
+            self.logger.info(
                    "creating rethinkdb database %s", repr(self.r.dbname))
            self.r.db_create(self.r.dbname).run()
        tables = self.r.table_list().run()
        if not "sites" in tables:
-            self.logger.info("creating rethinkdb table 'sites' in database %s", repr(self.r.dbname))
+            self.logger.info(
-            self.r.table_create("sites", shards=self.shards, replicas=self.replicas).run()
+                    "creating rethinkdb table 'sites' in database %s",
-            self.r.table("sites").index_create("sites_last_disclaimed", [self.r.row["status"], self.r.row["last_disclaimed"]]).run()
+                    repr(self.r.dbname))
            self.r.table_create(
                    "sites", shards=self.shards, replicas=self.replicas).run()
            self.r.table("sites").index_create(
                    "sites_last_disclaimed", [
                        self.r.row["status"],
                        self.r.row["last_disclaimed"]]).run()
            self.r.table("sites").index_create("job_id").run()
        if not "pages" in tables:
-            self.logger.info("creating rethinkdb table 'pages' in database %s", repr(self.r.dbname))
+            self.logger.info(
-            self.r.table_create("pages", shards=self.shards, replicas=self.replicas).run()
+                    "creating rethinkdb table 'pages' in database %s",
-            self.r.table("pages").index_create("priority_by_site", [self.r.row["site_id"], self.r.row["brozzle_count"], self.r.row["claimed"], self.r.row["priority"]]).run()
+                    repr(self.r.dbname))
            self.r.table_create(
                    "pages", shards=self.shards, replicas=self.replicas).run()
            self.r.table("pages").index_create(
                    "priority_by_site", [
                        self.r.row["site_id"], self.r.row["brozzle_count"],
                        self.r.row["claimed"], self.r.row["priority"]]).run()
            # this index is for displaying pages in a sensible order in the web
            # console
            self.r.table("pages").index_create(
                    "least_hops", [
                        r.row["site_id"], r.row["brozzle_count"],
                        r.row["hops_from_seed"]])
        if not "jobs" in tables:
-            self.logger.info("creating rethinkdb table 'jobs' in database %s", repr(self.r.dbname))
+            self.logger.info(
-            self.r.table_create("jobs", shards=self.shards, replicas=self.replicas).run()
+                    "creating rethinkdb table 'jobs' in database %s",
                    repr(self.r.dbname))
            self.r.table_create(
                    "jobs", shards=self.shards, replicas=self.replicas).run()
    def _vet_result(self, result, **kwargs):
        # self.logger.debug("vetting expected=%s result=%s", kwargs, result)
--- a/setup.py
+++ b/setup.py
@ -19,10 +19,11 @@
 import setuptools
 import glob
-setuptools.setup(name='brozzler',
+setuptools.setup(
-        version='1.1.dev10',
+        name='brozzler',
        version='1.1.dev20',
        description='Distributed web crawling with browsers',
-        url='https://github.com/nlevitt/brozzler',
+        url='https://github.com/internetarchive/brozzler',
        author='Noah Levitt',
        author_email='nlevitt@archive.org',
        long_description=open('README.rst', encoding='UTF-8').read(),
@ -41,10 +42,10 @@ setuptools.setup(name='brozzler',
            'rethinkstuff',
            'rethinkdb>=2.3,<2.4',
            'psutil',
-            ],
+        ],
        zip_safe=False,
        classifiers=[
-            'Development Status :: 3 - Alpha',
+            'Development Status :: 4 - Beta',
            'Environment :: Console',
            'License :: OSI Approved :: Apache Software License',
            'Programming Language :: Python :: 3.4',
--- a/webconsole/README.rst
+++ b/webconsole/README.rst
@ -1 +1 @@
-flask --debug --app=brozzler-webconsole.py run --host=0.0.0.0 --port=8081
+gunicorn --bind=0.0.0.0:8081 brozzler-webconsole:app
--- a/webconsole/brozzler-webconsole/init.py
+++ b/webconsole/brozzler-webconsole/init.py
@ -1,21 +1,21 @@
-#
+'''
-# brozzler-webconsole/__init__.py - flask app for brozzler web console, defines
+brozzler-webconsole/__init__.py - flask app for brozzler web console, defines
-# api endspoints etc
+api endspoints etc
-#
+
-# Copyright (C) 2014-2016 Internet Archive
+Copyright (C) 2014-2016 Internet Archive
-#
+
-# Licensed under the Apache License, Version 2.0 (the "License");
+Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
+you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+You may obtain a copy of the License at
-#
+
-#     http://www.apache.org/licenses/LICENSE-2.0
+    http://www.apache.org/licenses/LICENSE-2.0
-#
+
-# Unless required by applicable law or agreed to in writing, software
+Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
+distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+See the License for the specific language governing permissions and
-# limitations under the License.
+limitations under the License.
-#
+'''
 import flask
 import rethinkstuff
@ -24,16 +24,26 @@ import sys
 import os
 import importlib
 import rethinkdb
 import logging
 import yaml
-# XXX flask does its own logging config
+# flask does its own logging config
-# import logging
+# logging.basicConfig(
-# logging.basicConfig(stream=sys.stdout, level=logging.INFO,
+#         stream=sys.stdout, level=logging.INFO,
-#         format="%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s")
+#         format=(
 #             "%(asctime)s %(process)d %(levelname)s %(threadName)s "
 #             "%(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s")
 app = flask.Flask(__name__)
 # http://stackoverflow.com/questions/26578733/why-is-flask-application-not-creating-any-logs-when-hosted-by-gunicorn
 gunicorn_error_logger = logging.getLogger('gunicorn.error')
 app.logger.handlers.extend(gunicorn_error_logger.handlers)
 app.logger.setLevel(logging.INFO)
 app.logger.info('will this show in the log?')
 # configure with environment variables
-SETTINGS= {
+SETTINGS = {
    'RETHINKDB_SERVERS': os.environ.get(
        'RETHINKDB_SERVERS', 'localhost').split(','),
    'RETHINKDB_DB': os.environ.get('RETHINKDB_DB', 'brozzler'),
@ -81,10 +91,10 @@ def pages(site_id):
    app.logger.info("flask.request.args=%s", flask.request.args)
    start = int(flask.request.args.get("start", 0))
    end = int(flask.request.args.get("end", start + 90))
    app.logger.info("yes new query")
    pages_ = r.table("pages").between(
-            [site_id, 1, False, r.minval],
+            [site_id, 1, r.minval], [site_id, r.maxval, r.maxval],
-            [site_id, r.maxval, False, r.maxval],
+            index="least_hops").order_by(index="least_hops")[start:end].run()
            index="priority_by_site")[start:end].run()
    return flask.jsonify(pages=list(pages_))
@app.route("/api/sites/<site_id>")
@ -110,6 +120,14 @@ def job(job_id):
    job_ = r.table("jobs").get(job_id).run()
    return flask.jsonify(job_)
@app.route("/api/jobs/<int:job_id>/yaml")
@app.route("/api/job/<int:job_id>/yaml")
 def job_yaml(job_id):
    job_ = r.table("jobs").get(job_id).run()
    return app.response_class(
            yaml.dump(job_, default_flow_style=False),
            mimetype='application/yaml')
@app.route("/api/workers")
 def workers():
    workers_ = service_registry.available_services("brozzler-worker")
--- a/webconsole/brozzler-webconsole/static/js/app.js
+++ b/webconsole/brozzler-webconsole/static/js/app.js
@ -125,11 +125,10 @@ function loadSiteStats($http, site, job) {
    $http.get("/api/sites/" + site.id + "/page_count").success(pageCountSuccessCallback(site, job));
    $http.get("/api/sites/" + site.id + "/queued_count").success(queuedCountSuccessCallback(site, job));
-    // parse Warcprox-Meta to find stats bucket
+    // look at Warcprox-Meta to find stats bucket
-    var warcprox_meta = angular.fromJson(site.extra_headers["Warcprox-Meta"]);
+    for (var j = 0; j < site.warcprox_meta.stats.buckets.length; j++) {
-    for (var j = 0; j < warcprox_meta.stats.buckets.length; j++) {
+        if (site.warcprox_meta.stats.buckets[j].indexOf("seed") >= 0) {
-        if (warcprox_meta.stats.buckets[j].indexOf("seed") >= 0) {
+            var bucket = site.warcprox_meta.stats.buckets[j];
            var bucket = warcprox_meta.stats.buckets[j];
            // console.log("warcprox_meta.stats.buckets[" + j + "]=" + bucket);
            $http.get("/api/stats/" + bucket).success(statsSuccessCallback(site, bucket));
        }
@ -138,7 +137,8 @@ function loadSiteStats($http, site, job) {
 brozzlerControllers.controller("JobController", ["$scope", "$routeParams", "$http",
    function($scope, $routeParams, $http) {
-        console.log('JobController');
+        $scope.show_yaml = false;
        // console.log('JobController');
        $http.get("/api/config").success(function(data) {
            $scope.config = data.config;
        });
@ -159,6 +159,9 @@ brozzlerControllers.controller("JobController", ["$scope", "$routeParams", "$htt
                }
            });
        });
        $http.get("/api/jobs/" + $routeParams.id + "/yaml").success(function(data) {
            $scope.job_yaml = data;
        });
    }]);
 brozzlerControllers.controller("SiteController", ["$scope", "$routeParams", "$http", "$window",
--- a/webconsole/brozzler-webconsole/static/partials/job.html
+++ b/webconsole/brozzler-webconsole/static/partials/job.html
@ -10,7 +10,12 @@
 </div>
 <div>
-    <h2>Job {{job.id}} <small>{{job.started}}-{{job.finished}} {{job.status}}</small></h2>
+    <h2 ng-click="show_yaml = !show_yaml">
        <span class="fa fa-caret-right"
            ng-class="{ 'fa-caret-right': !show_yaml, 'fa-caret-down': !!show_yaml }"></span>
        Job {{job.id}} <small>{{job.started}}-{{job.finished}}: {{job.status}}</small>
    </h2>
    <pre style="display:{{show_yaml?'block':'none'}}">{{job_yaml}}</pre>
    <div class="row bigstats">
        <div class="col-sm-6 col-md-3">
--- a/webconsole/brozzler-webconsole/static/partials/site.html
+++ b/webconsole/brozzler-webconsole/static/partials/site.html
@ -40,12 +40,22 @@
        <div class="col-sm-12">
            <h2>Pages</h2>
            <div class="col-sm-6 col-md-4" ng-repeat="page in pages">
-                <a class="thumbnail" href="{{config.WAYBACK_BASEURL}}/3/{{page.url}}">
+                <div class="thumbnail">
-                    <img style="width:300px;height:190px" src="{{config.WAYBACK_BASEURL}}/3/thumbnail:{{page.url}}" alt="thumb">
+                    <img style="border:1px solid #ddd;width:300px;height:190px" src="{{config.WAYBACK_BASEURL}}/3/thumbnail:{{page.url}}" alt="thumb">
                    <div class="caption">
                        <h5>{{page.url}}</h5>
                        <ul class="fa-ul">
                            <li>
                            <span class="fa fa-li fa-camera"></span>
                            <a target="_blank" href="{{config.WAYBACK_BASEURL}}/3/screenshot:{{page.url}}">full size screenshot &gt;</a>
                            </li>
                            <li>
                            <span class="fa fa-li fa-university"></span>
                            <a target="_blank" href="{{config.WAYBACK_BASEURL}}/3/{{page.url}}">wayback &gt;</a>
                            </li>
                        </ul>
                    </div>
-                </a>
+                </div>
            </div>
        </div>
        <div class="col-sm-12" ng-show="loading">
--- a/webconsole/requirements.txt
+++ b/webconsole/requirements.txt
@ -1,2 +1,4 @@
 git+https://github.com/mitsuhiko/flask.git
 rethinkstuff>=0.1.5
 flask>=0.11
 gunicorn
 PyYAML
`@ -1 +1 @@`
	`flask --debug --app=brozzler-webconsole.py run --host=0.0.0.0 --port=8081`	`gunicorn --bind=0.0.0.0:8081 brozzler-webconsole:app`