incremental progress on web console

This commit is contained in:
Noah Levitt 2015-10-08 00:33:49 +00:00
parent 7ab2eb4fda
commit d1158ab224
8 changed files with 169 additions and 63 deletions

View File

@ -12,6 +12,39 @@ app = flask.Flask(__name__)
r = rethinkstuff.Rethinker(["wbgrp-svc020", "wbgrp-svc035", "wbgrp-svc036"],
db="archiveit_brozzler")
@app.route("/api/sites/<site_id>/queued_count")
@app.route("/api/site/<site_id>/queued_count")
def queued_count(site_id):
count = r.table("pages").between([site_id, 0, False, r.minval], [site_id, 0, False, r.maxval], index="priority_by_site").count().run()
return flask.jsonify(count=count)
@app.route("/api/sites/<site_id>/queue")
@app.route("/api/site/<site_id>/queue")
def queue(site_id):
logging.info("flask.request.args=%s", flask.request.args)
start = flask.request.args.get("start", 0)
end = flask.request.args.get("end", start + 90)
queue_ = r.table("pages").between([site_id, 0, False, r.minval], [site_id, 0, False, r.maxval], index="priority_by_site")[start:end].run()
return flask.jsonify(queue_=list(queue_))
@app.route("/api/sites/<site_id>/pages_count")
@app.route("/api/site/<site_id>/pages_count")
@app.route("/api/sites/<site_id>/page_count")
@app.route("/api/site/<site_id>/page_count")
def page_count(site_id):
count = r.table("pages").between([site_id, 1, False, r.minval], [site_id, r.maxval, False, r.maxval], index="priority_by_site").count().run()
return flask.jsonify(count=count)
@app.route("/api/sites/<site_id>/pages")
@app.route("/api/site/<site_id>/pages")
def pages(site_id):
"""Pages already crawled."""
logging.info("flask.request.args=%s", flask.request.args)
start = flask.request.args.get("start", 0)
end = flask.request.args.get("end", start + 90)
pages_ = r.table("pages").between([site_id, 1, False, r.minval], [site_id, r.maxval, False, r.maxval], index="priority_by_site")[start:end].run()
return flask.jsonify(pages=list(pages_))
@app.route("/api/sites/<site_id>")
@app.route("/api/site/<site_id>")
def site(site_id):

View File

Before

Width:  |  Height:  |  Size: 9.1 KiB

After

Width:  |  Height:  |  Size: 9.1 KiB

View File

@ -7,15 +7,21 @@
<title>Brozzler Console: Jobs</title>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.3.5/css/bootstrap.css">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.3.5/css/bootstrap-theme.css">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.4.0/css/font-awesome.css">
<script src="https://cdnjs.cloudflare.com/ajax/libs/angular.js/1.4.6/angular.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/angular.js/1.4.6/angular-route.js"></script>
<script src="/static/js/app.js"></script>
<script src="/static/js/controllers.js"></script>
<style>
body { padding-top: 1rem; }
.thumbnail:focus, .thumbnail:hover { text-decoration: none; }
.thumbnail { word-wrap: break-word; }
/* .glyphicon { color: #563d7c; } */
</style>
</head>
<body role="document">
<div class="container" role="main">
<div ng-view></div>
<div ng-view class="container" role="main">
</div>
</body>
</html>

View File

@ -20,6 +20,9 @@ brozzlerConsoleApp.config(["$routeProvider", "$locationProvider",
templateUrl: "/static/partials/site.html",
controller: "SiteController"
}).
when("/", {
redirectTo: "/jobs"
}).
otherwise({
template: '<div> <div class="page-header"> <h1>Not Found</h1> </div> <div class="row"> <div class="col-sm-12"> How the heck did you get here? </div> </div> </div> ',
});

View File

@ -14,23 +14,47 @@ brozzlerControllers.controller("JobController", ["$scope", "$routeParams", "$htt
$scope.phoneId = $routeParams.phoneId;
$http.get("/api/jobs/" + $routeParams.id).success(function(data) {
$scope.job = data;
console.log("job=", $scope.job);
// console.log("job=", $scope.job);
});
function statsSuccessCallback(site, bucket) {
return function(data) {
// console.log("site = ", site);
// console.log("/api/stats/" + bucket + " = ", data);
site.stats = data;
}
}
function pageCountSuccessCallback(site, bucket) {
return function(data) {
// console.log("site = ", site);
// console.log("/api/sites/" + site.id + "/page_count = ", data);
site.page_count = data.count;
}
}
function queuedCountSuccessCallback(site, bucket) {
return function(data) {
console.log("site = ", site);
console.log("/api/sites/" + site.id + "/queued_count = ", data);
site.queued_count = data.count;
}
}
$http.get("/api/jobs/" + $routeParams.id + "/sites").success(function(data) {
$scope.sites = data.sites;
console.log("sites=", $scope.sites);
// console.log("sites=", $scope.sites);
for (var i = 0; i < $scope.sites.length; i++) {
var site = $scope.sites[i]; // parse Warcprox-Meta to find stats bucket
$http.get("/api/sites/" + site.id + "/page_count").success(pageCountSuccessCallback(site, bucket));
$http.get("/api/sites/" + site.id + "/queued_count").success(queuedCountSuccessCallback(site, bucket));
var warcprox_meta = angular.fromJson(site.extra_headers["Warcprox-Meta"]);
for (var j = 0; j < warcprox_meta.stats.buckets.length; j++) {
if (warcprox_meta.stats.buckets[j].indexOf("seed") >= 0) {
console.log("warcprox_meta.stats.buckets[" + j + "]=" + warcprox_meta.stats.buckets[j]);
var bucket = warcprox_meta.stats.buckets[j];
$http.get("/api/stats/" + warcprox_meta.stats.buckets[j]).success(function(data) {
console.log("/api/stats/" + bucket + "=", data);
site.stats = data;
});
// console.log("warcprox_meta.stats.buckets[" + j + "]=" + bucket);
$http.get("/api/stats/" + bucket).success(statsSuccessCallback(site, bucket));
}
}
}
@ -41,16 +65,22 @@ brozzlerControllers.controller("SiteController", ["$scope", "$routeParams", "$ht
function($scope, $routeParams, $http) {
$http.get("/api/site/" + $routeParams.id).success(function(data) {
$scope.site = data;
// console.log("site = ", $scope.site);
});
$http.get("/api/site/" + $routeParams.id + "/pages?start=0&end=99").success(function(data) {
$scope.pages = data.pages;
// console.log("pages = ", $scope.pages);
});
}]);
/*
$http.get(...)
.then(function(response){
// successHandler
// do some stuff
return $http.get('/somethingelse') // get more data
})
.then(anotherSuccessHandler)
.catch(errorHandler)
$http.get("/api/site/" + $routeParams.id).then(function(response) {
console.log("/api/site/" + $routeParams.id + " returned", response);
$scope.site = response.data;
return $http.get("/api/site/" + $routeParams.id + "/pages");
}).then(function(response) {
console.log("/api/site/" + $routeParams.id + "/pages returned", response);
$scope.site.pages = response.data.pages;
});
*/

View File

@ -1,41 +1,41 @@
<ol class="breadcrumb">
<li><a href="/jobs">Jobs</a></li>
<li class="active">{{job.id}}</li>
</ol>
<div class="page-header">
<h1>Brozzler
<a href="/"><img src="/static/brozzler.svg" style="height:1.5em;float:right"></a>
</h1>
</div>
<div>
<div class="page-header">
<h1>Job {{job.id}}</h1>
</div>
<h2>Job {{job.id}}</h2>
<div class="row">
<div class="col-sm-12">
<ul>
<li> started {{job.started}} </li>
<li> finished {{job.finished}} </li>
<li> status {{job.status}} </li>
<li> sites={{sites}} </li>
</ul>
<ul>
<li ng-repeat="site in sites">
{{site}}
</li>
</ul>
<h3>Sites</h3>
<div class="col-sm-6 col-md-4" ng-repeat="site in sites">
<a class="thumbnail" href="/sites/{{site.id}}">
<img style="width:300px;height:190px" src="http://wbgrp-svc107.us.archive.org:8091/web/3/thumbnail:{{site.seed}}" alt="thumb">
<div class="caption">
<h5>{{site.seed}}</h5>
<!--
<div><span class="glyphicon glyphicon-file"></span> <strong>{{site.page_count}}</strong> pages crawled</div>
<div><span class="glyphicon glyphicon-duplicate"></span> <strong>{{site.stats.total.urls}}</strong> urls crawled</div>
<div><span class="glyphicon glyphicon-oil"></span> <strong>{{site.stats.total.wire_bytes | byteformat}}</strong> crawled</div>
<div><span class="glyphicon glyphicon-menu-hamburger"></span> <strong>{{site.queued_count}}</strong> pages queued</div>
-->
<table class="table table-striped">
<thead>
<tr>
<th>id</th>
<th>seed</th>
<!-- <th>pages</th> -->
<th>urls</th>
<th>new data</th>
</tr>
</thead>
<tbody>
<tr ng-repeat="site in sites">
<td><a href="/sites/{{site.id}}">{{site.id}}</a></td>
<td>{{site.seed}}</td>
<td>{{site.stats.total.urls}}</td>
<td>{{site.stats.new.wire_bytes | byteformat}}</td>
</tr>
</tbody>
</table>
<ul class="fa-ul">
<li><span class="fa fa-li fa-file-text"></span> <strong>{{site.page_count}}</strong> pages crawled</li>
<li><span class="fa fa-li fa-clone"></span> <strong>{{site.stats.total.urls}}</strong> urls crawled</li>
<li><span class="fa fa-li fa-archive"></span> <strong>{{site.stats.total.wire_bytes | byteformat}}</strong> crawled</li>
<li><span class="fa fa-li fa-ellipsis-h"></span> <strong>{{site.queued_count}}</strong> pages queued</li>
</ul>
</div>
</a>
</div>
</div>
</div>
</div>

View File

@ -1,7 +1,15 @@
<ol class="breadcrumb">
<li class="active">Jobs</li>
</ol>
<div class="page-header">
<h1>Brozzler
<a href="/"><img src="/static/brozzler.svg" style="height:1.5em;float:right"></a>
</h1>
</div>
<div>
<div class="page-header">
<h1>Jobs</h1>
</div>
<h2>Jobs</h2>
<div class="row">
<div class="col-sm-12">
@ -16,13 +24,13 @@
</tr>
</thead>
<tbody>
<tr ng-repeat="job in jobs">
<td><a href="/jobs/{{job.id}}">{{job.id}}</a></td>
<td>{{job.status}}</td>
<td>{{job.started}}</td>
<td>{{job.finished}}</td>
<td>{{job.conf.seeds.length}}</td>
</tr>
<tr ng-repeat="job in jobs">
<td><a href="/jobs/{{job.id}}">{{job.id}}</a></td>
<td>{{job.status}}</td>
<td>{{job.started}}</td>
<td>{{job.finished}}</td>
<td>{{job.conf.seeds.length}}</td>
</tr>
</tbody>
</table>
</div>

View File

@ -1,11 +1,37 @@
<ol class="breadcrumb">
<li><a href="/jobs">Jobs</a></li>
<li><a href="/jobs/{{site.job_id}}">{{site.job_id}}</a></li>
<li class="active">{{site.seed}}</li>
</ol>
<div class="page-header">
<h1>Brozzler
<a href="/"><img src="/static/brozzler.svg" style="height:1.5em;float:right"></a>
</h1>
</div>
<div>
<div class="page-header">
<h1>Site {{site.seed}} (Job <a href="/jobs/{{site.job_id}}">{{site.job_id}}</a>)</h1>
</div>
<h2>Site {{site.seed}} (Job <a href="/jobs/{{site.job_id}}">{{site.job_id}}</a>)</h2>
<div class="row">
<div class="col-sm-12">
site={{site}}
</div>
</div>
<div class="row">
<div class="col-sm-12">
<h2>Pages</h2>
<div class="col-sm-6 col-md-4" ng-repeat="page in pages">
<div class="thumbnail">
<img width="300" height="190" src="http://wbgrp-svc107.us.archive.org:8091/web/3/thumbnail:{{page.url}}" alt="thumb">
<div class="caption">
<h3>Thumbnail label</h3>
{{page}}
</div>
</div>
</div>
</div>
</div>
</div>