126 lines
4.1 KiB
Python
Raw Normal View History

2015-09-25 17:59:38 +00:00
import flask
import rethinkstuff
import json
import sys
import os
import importlib
2015-09-25 17:59:38 +00:00
# XXX flask does its own logging config
# import logging
# logging.basicConfig(stream=sys.stdout, level=logging.INFO,
# format="%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s")
2015-09-25 22:48:01 +00:00
app = flask.Flask(__name__)
2015-09-25 17:59:38 +00:00
# configure with environment variables
SETTINGS= {
'RETHINKDB_SERVERS': os.environ.get(
'RETHINKDB_SERVERS', 'localhost').split(','),
'RETHINKDB_DB': os.environ.get('RETHINKDB_DB', 'brozzler'),
'WAYBACK_BASEURL': os.environ.get(
'WAYBACK_BASEURL', 'http://wbgrp-svc107.us.archive.org:8091'),
}
r = rethinkstuff.Rethinker(
SETTINGS['RETHINKDB_SERVERS'], db=SETTINGS['RETHINKDB_DB'])
service_registry = rethinkstuff.ServiceRegistry(r)
2015-09-25 17:59:38 +00:00
2015-10-08 00:33:49 +00:00
@app.route("/api/sites/<site_id>/queued_count")
@app.route("/api/site/<site_id>/queued_count")
def queued_count(site_id):
count = r.table("pages").between(
[site_id, 0, False, r.minval], [site_id, 0, False, r.maxval],
index="priority_by_site").count().run()
2015-10-08 00:33:49 +00:00
return flask.jsonify(count=count)
@app.route("/api/sites/<site_id>/queue")
@app.route("/api/site/<site_id>/queue")
def queue(site_id):
app.logger.info("flask.request.args=%s", flask.request.args)
2015-10-08 00:33:49 +00:00
start = flask.request.args.get("start", 0)
end = flask.request.args.get("end", start + 90)
queue_ = r.table("pages").between(
[site_id, 0, False, r.minval], [site_id, 0, False, r.maxval],
index="priority_by_site")[start:end].run()
2015-10-08 00:33:49 +00:00
return flask.jsonify(queue_=list(queue_))
@app.route("/api/sites/<site_id>/pages_count")
@app.route("/api/site/<site_id>/pages_count")
@app.route("/api/sites/<site_id>/page_count")
@app.route("/api/site/<site_id>/page_count")
def page_count(site_id):
count = r.table("pages").between(
[site_id, 1, False, r.minval],
[site_id, r.maxval, False, r.maxval],
index="priority_by_site").count().run()
2015-10-08 00:33:49 +00:00
return flask.jsonify(count=count)
@app.route("/api/sites/<site_id>/pages")
@app.route("/api/site/<site_id>/pages")
def pages(site_id):
"""Pages already crawled."""
app.logger.info("flask.request.args=%s", flask.request.args)
start = int(flask.request.args.get("start", 0))
end = int(flask.request.args.get("end", start + 90))
pages_ = r.table("pages").between(
[site_id, 1, False, r.minval],
[site_id, r.maxval, False, r.maxval],
index="priority_by_site")[start:end].run()
2015-10-08 00:33:49 +00:00
return flask.jsonify(pages=list(pages_))
2015-09-28 22:05:43 +00:00
@app.route("/api/sites/<site_id>")
@app.route("/api/site/<site_id>")
def site(site_id):
site_ = r.table("sites").get(site_id).run()
return flask.jsonify(site_)
@app.route("/api/stats/<bucket>")
def stats(bucket):
stats_ = r.table("stats").get(bucket).run()
return flask.jsonify(stats_)
@app.route("/api/jobs/<int:job_id>/sites")
2015-09-28 22:05:43 +00:00
@app.route("/api/job/<int:job_id>/sites")
def sites(job_id):
sites_ = r.table("sites").get_all(job_id, index="job_id").run()
2015-09-28 22:05:43 +00:00
return flask.jsonify(sites=list(sites_))
@app.route("/api/jobs/<int:job_id>")
2015-09-28 22:05:43 +00:00
@app.route("/api/job/<int:job_id>")
def job(job_id):
job_ = r.table("jobs").get(job_id).run()
return flask.jsonify(job_)
@app.route("/api/workers")
def workers():
workers_ = service_registry.available_services("brozzler-worker")
return flask.jsonify(workers=list(workers_))
@app.route("/api/services")
def services():
services_ = service_registry.available_services()
return flask.jsonify(services=list(services_))
@app.route("/api/jobs")
2015-09-25 17:59:38 +00:00
def jobs():
jobs_ = list(r.table("jobs").run())
jobs_ = sorted(jobs_, key=lambda j: j['id'], reverse=True)
return flask.jsonify(jobs=jobs_)
@app.route("/api/config")
def config():
return flask.jsonify(config=SETTINGS)
@app.route("/api/<path:path>")
@app.route("/api", defaults={"path":""})
def api404(path):
flask.abort(404)
2015-09-25 22:48:01 +00:00
@app.route("/", defaults={"path": ""})
@app.route("/<path:path>")
2015-09-25 22:48:01 +00:00
def root(path):
return flask.render_template("index.html")
2015-09-25 17:59:38 +00:00
if __name__ == "__main__":
app.run(host="0.0.0.0", port=8081, debug=True)