make the site page present something sensible

This commit is contained in:
Noah Levitt 2015-10-10 00:30:03 +00:00
parent 549b149e39
commit 3df4a3e109
3 changed files with 80 additions and 71 deletions

View File

@ -40,8 +40,8 @@ def page_count(site_id):
def pages(site_id):
"""Pages already crawled."""
logging.info("flask.request.args=%s", flask.request.args)
start = flask.request.args.get("start", 0)
end = flask.request.args.get("end", start + 90)
start = int(flask.request.args.get("start", 0))
end = int(flask.request.args.get("end", start + 90))
pages_ = r.table("pages").between([site_id, 1, False, r.minval], [site_id, r.maxval, False, r.maxval], index="priority_by_site")[start:end].run()
return flask.jsonify(pages=list(pages_))

View File

@ -3,83 +3,93 @@
var brozzlerControllers = angular.module("brozzlerControllers", []);
brozzlerControllers.controller("JobsListController", ["$scope", "$http",
function($scope, $http) {
$http.get("/api/jobs").success(function(data) {
$scope.jobs = data.jobs;
});
}]);
function($scope, $http) {
$http.get("/api/jobs").success(function(data) {
$scope.jobs = data.jobs;
});
}]);
function statsSuccessCallback(site, bucket) {
return function(data) {
// console.log("site = ", site);
// console.log("/api/stats/" + bucket + " = ", data);
site.stats = data;
}
}
function pageCountSuccessCallback(site, job) {
return function(data) {
// console.log("site = ", site);
// console.log("/api/sites/" + site.id + "/page_count = ", data);
site.page_count = data.count;
if (job) {
job.page_count += data.count;
}
}
}
function queuedCountSuccessCallback(site, job) {
return function(data) {
// console.log("site = ", site);
// console.log("/api/sites/" + site.id + "/queued_count = ", data);
site.queued_count = data.count;
if (job) {
job.queued_count += data.count;
}
}
}
function loadSiteStats($http, site, job) {
$http.get("/api/sites/" + site.id + "/page_count").success(pageCountSuccessCallback(site, job));
$http.get("/api/sites/" + site.id + "/queued_count").success(queuedCountSuccessCallback(site, job));
// parse Warcprox-Meta to find stats bucket
var warcprox_meta = angular.fromJson(site.extra_headers["Warcprox-Meta"]);
for (var j = 0; j < warcprox_meta.stats.buckets.length; j++) {
if (warcprox_meta.stats.buckets[j].indexOf("seed") >= 0) {
var bucket = warcprox_meta.stats.buckets[j];
// console.log("warcprox_meta.stats.buckets[" + j + "]=" + bucket);
$http.get("/api/stats/" + bucket).success(statsSuccessCallback(site, bucket));
}
}
}
brozzlerControllers.controller("JobController", ["$scope", "$routeParams", "$http",
function($scope, $routeParams, $http) {
$http.get("/api/jobs/" + $routeParams.id).success(function(data) {
$scope.job = data;
$scope.job.page_count = $scope.job.queued_count = 0;
console.log("job=", $scope.job);
$http.get("/api/stats/" + $scope.job.conf.warcprox_meta.stats.buckets[0]).success(function(data) {
$scope.job.stats = data;
// console.log("job stats=", $scope.job.stats);
});
function($scope, $routeParams, $http) {
$http.get("/api/jobs/" + $routeParams.id).success(function(data) {
$scope.job = data;
$scope.job.page_count = $scope.job.queued_count = 0;
console.log("job=", $scope.job);
$http.get("/api/stats/" + $scope.job.conf.warcprox_meta.stats.buckets[0]).success(function(data) {
$scope.job.stats = data;
// console.log("job stats=", $scope.job.stats);
});
function statsSuccessCallback(site, bucket) {
return function(data) {
// console.log("site = ", site);
// console.log("/api/stats/" + bucket + " = ", data);
site.stats = data;
}
}
function pageCountSuccessCallback(site, bucket) {
return function(data) {
// console.log("site = ", site);
// console.log("/api/sites/" + site.id + "/page_count = ", data);
site.page_count = data.count;
$scope.job.page_count += data.count;
}
}
function queuedCountSuccessCallback(site, bucket) {
return function(data) {
// console.log("site = ", site);
// console.log("/api/sites/" + site.id + "/queued_count = ", data);
site.queued_count = data.count;
$scope.job.queued_count += data.count;
}
}
$http.get("/api/jobs/" + $routeParams.id + "/sites").success(function(data) {
$scope.sites = data.sites;
// console.log("sites=", $scope.sites);
for (var i = 0; i < $scope.sites.length; i++) {
var site = $scope.sites[i];
$http.get("/api/sites/" + site.id + "/page_count").success(pageCountSuccessCallback(site, bucket));
$http.get("/api/sites/" + site.id + "/queued_count").success(queuedCountSuccessCallback(site, bucket));
// parse Warcprox-Meta to find stats bucket
var warcprox_meta = angular.fromJson(site.extra_headers["Warcprox-Meta"]);
for (var j = 0; j < warcprox_meta.stats.buckets.length; j++) {
if (warcprox_meta.stats.buckets[j].indexOf("seed") >= 0) {
var bucket = warcprox_meta.stats.buckets[j];
// console.log("warcprox_meta.stats.buckets[" + j + "]=" + bucket);
$http.get("/api/stats/" + bucket).success(statsSuccessCallback(site, bucket));
}
}
loadSiteStats($http, $scope.sites[i], $scope.job);
}
});
}]);
});
}]);
brozzlerControllers.controller("SiteController", ["$scope", "$routeParams", "$http",
function($scope, $routeParams, $http) {
$http.get("/api/site/" + $routeParams.id).success(function(data) {
$scope.site = data;
// console.log("site = ", $scope.site);
});
function($scope, $routeParams, $http) {
$http.get("/api/site/" + $routeParams.id).success(function(data) {
$scope.site = data;
loadSiteStats($http, $scope.site);
// console.log("site = ", $scope.site);
});
$http.get("/api/site/" + $routeParams.id + "/pages?start=0&end=99").success(function(data) {
$scope.pages = data.pages;
// console.log("pages = ", $scope.pages);
});
}]);
$http.get("/api/site/" + $routeParams.id + "/pages?start=0&end=99").success(function(data) {
$scope.pages = data.pages;
console.log("pages = ", $scope.pages);
});
}]);
/*
$http.get("/api/site/" + $routeParams.id).then(function(response) {

View File

@ -40,13 +40,12 @@
<div class="col-sm-12">
<h2>Pages</h2>
<div class="col-sm-6 col-md-4" ng-repeat="page in pages">
<div class="thumbnail">
<img width="300" height="190" src="http://wbgrp-svc107.us.archive.org:8091/web/3/thumbnail:{{page.url}}" alt="thumb">
<a class="thumbnail" href="http://wbgrp-svc107.us.archive.org:8091/web/3/{{page.url}}">
<img style="width:300px;height:190px" src="http://wbgrp-svc107.us.archive.org:8091/web/3/thumbnail:{{page.url}}" alt="thumb">
<div class="caption">
<h3>Thumbnail label</h3>
{{page}}
<h5>{{page.url}}</h5>
</div>
</div>
</a>
</div>
</div>
</div>