mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-23 16:19:49 -05:00
incremental progress on web console
This commit is contained in:
parent
7ab2eb4fda
commit
d1158ab224
@ -12,6 +12,39 @@ app = flask.Flask(__name__)
|
||||
r = rethinkstuff.Rethinker(["wbgrp-svc020", "wbgrp-svc035", "wbgrp-svc036"],
|
||||
db="archiveit_brozzler")
|
||||
|
||||
@app.route("/api/sites/<site_id>/queued_count")
|
||||
@app.route("/api/site/<site_id>/queued_count")
|
||||
def queued_count(site_id):
|
||||
count = r.table("pages").between([site_id, 0, False, r.minval], [site_id, 0, False, r.maxval], index="priority_by_site").count().run()
|
||||
return flask.jsonify(count=count)
|
||||
|
||||
@app.route("/api/sites/<site_id>/queue")
|
||||
@app.route("/api/site/<site_id>/queue")
|
||||
def queue(site_id):
|
||||
logging.info("flask.request.args=%s", flask.request.args)
|
||||
start = flask.request.args.get("start", 0)
|
||||
end = flask.request.args.get("end", start + 90)
|
||||
queue_ = r.table("pages").between([site_id, 0, False, r.minval], [site_id, 0, False, r.maxval], index="priority_by_site")[start:end].run()
|
||||
return flask.jsonify(queue_=list(queue_))
|
||||
|
||||
@app.route("/api/sites/<site_id>/pages_count")
|
||||
@app.route("/api/site/<site_id>/pages_count")
|
||||
@app.route("/api/sites/<site_id>/page_count")
|
||||
@app.route("/api/site/<site_id>/page_count")
|
||||
def page_count(site_id):
|
||||
count = r.table("pages").between([site_id, 1, False, r.minval], [site_id, r.maxval, False, r.maxval], index="priority_by_site").count().run()
|
||||
return flask.jsonify(count=count)
|
||||
|
||||
@app.route("/api/sites/<site_id>/pages")
|
||||
@app.route("/api/site/<site_id>/pages")
|
||||
def pages(site_id):
|
||||
"""Pages already crawled."""
|
||||
logging.info("flask.request.args=%s", flask.request.args)
|
||||
start = flask.request.args.get("start", 0)
|
||||
end = flask.request.args.get("end", start + 90)
|
||||
pages_ = r.table("pages").between([site_id, 1, False, r.minval], [site_id, r.maxval, False, r.maxval], index="priority_by_site")[start:end].run()
|
||||
return flask.jsonify(pages=list(pages_))
|
||||
|
||||
@app.route("/api/sites/<site_id>")
|
||||
@app.route("/api/site/<site_id>")
|
||||
def site(site_id):
|
||||
|
Before Width: | Height: | Size: 9.1 KiB After Width: | Height: | Size: 9.1 KiB |
@ -7,15 +7,21 @@
|
||||
<title>Brozzler Console: Jobs</title>
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.3.5/css/bootstrap.css">
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.3.5/css/bootstrap-theme.css">
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.4.0/css/font-awesome.css">
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/angular.js/1.4.6/angular.js"></script>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/angular.js/1.4.6/angular-route.js"></script>
|
||||
<script src="/static/js/app.js"></script>
|
||||
<script src="/static/js/controllers.js"></script>
|
||||
<style>
|
||||
body { padding-top: 1rem; }
|
||||
.thumbnail:focus, .thumbnail:hover { text-decoration: none; }
|
||||
.thumbnail { word-wrap: break-word; }
|
||||
/* .glyphicon { color: #563d7c; } */
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body role="document">
|
||||
<div class="container" role="main">
|
||||
<div ng-view></div>
|
||||
<div ng-view class="container" role="main">
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -20,6 +20,9 @@ brozzlerConsoleApp.config(["$routeProvider", "$locationProvider",
|
||||
templateUrl: "/static/partials/site.html",
|
||||
controller: "SiteController"
|
||||
}).
|
||||
when("/", {
|
||||
redirectTo: "/jobs"
|
||||
}).
|
||||
otherwise({
|
||||
template: '<div> <div class="page-header"> <h1>Not Found</h1> </div> <div class="row"> <div class="col-sm-12"> How the heck did you get here? </div> </div> </div> ',
|
||||
});
|
||||
|
@ -14,23 +14,47 @@ brozzlerControllers.controller("JobController", ["$scope", "$routeParams", "$htt
|
||||
$scope.phoneId = $routeParams.phoneId;
|
||||
$http.get("/api/jobs/" + $routeParams.id).success(function(data) {
|
||||
$scope.job = data;
|
||||
console.log("job=", $scope.job);
|
||||
// console.log("job=", $scope.job);
|
||||
});
|
||||
|
||||
function statsSuccessCallback(site, bucket) {
|
||||
return function(data) {
|
||||
// console.log("site = ", site);
|
||||
// console.log("/api/stats/" + bucket + " = ", data);
|
||||
site.stats = data;
|
||||
}
|
||||
}
|
||||
|
||||
function pageCountSuccessCallback(site, bucket) {
|
||||
return function(data) {
|
||||
// console.log("site = ", site);
|
||||
// console.log("/api/sites/" + site.id + "/page_count = ", data);
|
||||
site.page_count = data.count;
|
||||
}
|
||||
}
|
||||
|
||||
function queuedCountSuccessCallback(site, bucket) {
|
||||
return function(data) {
|
||||
console.log("site = ", site);
|
||||
console.log("/api/sites/" + site.id + "/queued_count = ", data);
|
||||
site.queued_count = data.count;
|
||||
}
|
||||
}
|
||||
|
||||
$http.get("/api/jobs/" + $routeParams.id + "/sites").success(function(data) {
|
||||
$scope.sites = data.sites;
|
||||
console.log("sites=", $scope.sites);
|
||||
// console.log("sites=", $scope.sites);
|
||||
for (var i = 0; i < $scope.sites.length; i++) {
|
||||
var site = $scope.sites[i]; // parse Warcprox-Meta to find stats bucket
|
||||
$http.get("/api/sites/" + site.id + "/page_count").success(pageCountSuccessCallback(site, bucket));
|
||||
$http.get("/api/sites/" + site.id + "/queued_count").success(queuedCountSuccessCallback(site, bucket));
|
||||
|
||||
var warcprox_meta = angular.fromJson(site.extra_headers["Warcprox-Meta"]);
|
||||
for (var j = 0; j < warcprox_meta.stats.buckets.length; j++) {
|
||||
if (warcprox_meta.stats.buckets[j].indexOf("seed") >= 0) {
|
||||
console.log("warcprox_meta.stats.buckets[" + j + "]=" + warcprox_meta.stats.buckets[j]);
|
||||
var bucket = warcprox_meta.stats.buckets[j];
|
||||
$http.get("/api/stats/" + warcprox_meta.stats.buckets[j]).success(function(data) {
|
||||
console.log("/api/stats/" + bucket + "=", data);
|
||||
site.stats = data;
|
||||
});
|
||||
// console.log("warcprox_meta.stats.buckets[" + j + "]=" + bucket);
|
||||
$http.get("/api/stats/" + bucket).success(statsSuccessCallback(site, bucket));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -41,16 +65,22 @@ brozzlerControllers.controller("SiteController", ["$scope", "$routeParams", "$ht
|
||||
function($scope, $routeParams, $http) {
|
||||
$http.get("/api/site/" + $routeParams.id).success(function(data) {
|
||||
$scope.site = data;
|
||||
// console.log("site = ", $scope.site);
|
||||
});
|
||||
|
||||
$http.get("/api/site/" + $routeParams.id + "/pages?start=0&end=99").success(function(data) {
|
||||
$scope.pages = data.pages;
|
||||
// console.log("pages = ", $scope.pages);
|
||||
});
|
||||
}]);
|
||||
|
||||
/*
|
||||
$http.get(...)
|
||||
.then(function(response){
|
||||
// successHandler
|
||||
// do some stuff
|
||||
return $http.get('/somethingelse') // get more data
|
||||
})
|
||||
.then(anotherSuccessHandler)
|
||||
.catch(errorHandler)
|
||||
$http.get("/api/site/" + $routeParams.id).then(function(response) {
|
||||
console.log("/api/site/" + $routeParams.id + " returned", response);
|
||||
$scope.site = response.data;
|
||||
return $http.get("/api/site/" + $routeParams.id + "/pages");
|
||||
}).then(function(response) {
|
||||
console.log("/api/site/" + $routeParams.id + "/pages returned", response);
|
||||
$scope.site.pages = response.data.pages;
|
||||
});
|
||||
*/
|
||||
|
@ -1,41 +1,41 @@
|
||||
<ol class="breadcrumb">
|
||||
<li><a href="/jobs">Jobs</a></li>
|
||||
<li class="active">{{job.id}}</li>
|
||||
</ol>
|
||||
|
||||
<div class="page-header">
|
||||
<h1>Brozzler
|
||||
<a href="/"><img src="/static/brozzler.svg" style="height:1.5em;float:right"></a>
|
||||
</h1>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<div class="page-header">
|
||||
<h1>Job {{job.id}}</h1>
|
||||
</div>
|
||||
<h2>Job {{job.id}}</h2>
|
||||
|
||||
<div class="row">
|
||||
<div class="col-sm-12">
|
||||
<ul>
|
||||
<li> started {{job.started}} </li>
|
||||
<li> finished {{job.finished}} </li>
|
||||
<li> status {{job.status}} </li>
|
||||
<li> sites={{sites}} </li>
|
||||
</ul>
|
||||
<ul>
|
||||
<li ng-repeat="site in sites">
|
||||
{{site}}
|
||||
</li>
|
||||
</ul>
|
||||
<h3>Sites</h3>
|
||||
<div class="col-sm-6 col-md-4" ng-repeat="site in sites">
|
||||
<a class="thumbnail" href="/sites/{{site.id}}">
|
||||
<img style="width:300px;height:190px" src="http://wbgrp-svc107.us.archive.org:8091/web/3/thumbnail:{{site.seed}}" alt="thumb">
|
||||
<div class="caption">
|
||||
<h5>{{site.seed}}</h5>
|
||||
<!--
|
||||
<div><span class="glyphicon glyphicon-file"></span> <strong>{{site.page_count}}</strong> pages crawled</div>
|
||||
<div><span class="glyphicon glyphicon-duplicate"></span> <strong>{{site.stats.total.urls}}</strong> urls crawled</div>
|
||||
<div><span class="glyphicon glyphicon-oil"></span> <strong>{{site.stats.total.wire_bytes | byteformat}}</strong> crawled</div>
|
||||
<div><span class="glyphicon glyphicon-menu-hamburger"></span> <strong>{{site.queued_count}}</strong> pages queued</div>
|
||||
-->
|
||||
|
||||
<table class="table table-striped">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>id</th>
|
||||
<th>seed</th>
|
||||
<!-- <th>pages</th> -->
|
||||
<th>urls</th>
|
||||
<th>new data</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr ng-repeat="site in sites">
|
||||
<td><a href="/sites/{{site.id}}">{{site.id}}</a></td>
|
||||
<td>{{site.seed}}</td>
|
||||
<td>{{site.stats.total.urls}}</td>
|
||||
<td>{{site.stats.new.wire_bytes | byteformat}}</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<ul class="fa-ul">
|
||||
<li><span class="fa fa-li fa-file-text"></span> <strong>{{site.page_count}}</strong> pages crawled</li>
|
||||
<li><span class="fa fa-li fa-clone"></span> <strong>{{site.stats.total.urls}}</strong> urls crawled</li>
|
||||
<li><span class="fa fa-li fa-archive"></span> <strong>{{site.stats.total.wire_bytes | byteformat}}</strong> crawled</li>
|
||||
<li><span class="fa fa-li fa-ellipsis-h"></span> <strong>{{site.queued_count}}</strong> pages queued</li>
|
||||
</ul>
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
@ -1,7 +1,15 @@
|
||||
<ol class="breadcrumb">
|
||||
<li class="active">Jobs</li>
|
||||
</ol>
|
||||
|
||||
<div class="page-header">
|
||||
<h1>Brozzler
|
||||
<a href="/"><img src="/static/brozzler.svg" style="height:1.5em;float:right"></a>
|
||||
</h1>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<div class="page-header">
|
||||
<h1>Jobs</h1>
|
||||
</div>
|
||||
<h2>Jobs</h2>
|
||||
|
||||
<div class="row">
|
||||
<div class="col-sm-12">
|
||||
@ -16,13 +24,13 @@
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr ng-repeat="job in jobs">
|
||||
<td><a href="/jobs/{{job.id}}">{{job.id}}</a></td>
|
||||
<td>{{job.status}}</td>
|
||||
<td>{{job.started}}</td>
|
||||
<td>{{job.finished}}</td>
|
||||
<td>{{job.conf.seeds.length}}</td>
|
||||
</tr>
|
||||
<tr ng-repeat="job in jobs">
|
||||
<td><a href="/jobs/{{job.id}}">{{job.id}}</a></td>
|
||||
<td>{{job.status}}</td>
|
||||
<td>{{job.started}}</td>
|
||||
<td>{{job.finished}}</td>
|
||||
<td>{{job.conf.seeds.length}}</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
@ -1,11 +1,37 @@
|
||||
<ol class="breadcrumb">
|
||||
<li><a href="/jobs">Jobs</a></li>
|
||||
<li><a href="/jobs/{{site.job_id}}">{{site.job_id}}</a></li>
|
||||
<li class="active">{{site.seed}}</li>
|
||||
</ol>
|
||||
|
||||
<div class="page-header">
|
||||
<h1>Brozzler
|
||||
<a href="/"><img src="/static/brozzler.svg" style="height:1.5em;float:right"></a>
|
||||
</h1>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<div class="page-header">
|
||||
<h1>Site {{site.seed}} (Job <a href="/jobs/{{site.job_id}}">{{site.job_id}}</a>)</h1>
|
||||
</div>
|
||||
<h2>Site {{site.seed}} (Job <a href="/jobs/{{site.job_id}}">{{site.job_id}}</a>)</h2>
|
||||
|
||||
<div class="row">
|
||||
<div class="col-sm-12">
|
||||
site={{site}}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="row">
|
||||
<div class="col-sm-12">
|
||||
<h2>Pages</h2>
|
||||
<div class="col-sm-6 col-md-4" ng-repeat="page in pages">
|
||||
<div class="thumbnail">
|
||||
<img width="300" height="190" src="http://wbgrp-svc107.us.archive.org:8091/web/3/thumbnail:{{page.url}}" alt="thumb">
|
||||
<div class="caption">
|
||||
<h3>Thumbnail label</h3>
|
||||
{{page}}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
Loading…
x
Reference in New Issue
Block a user