rethinkdb connection per request, to server chosen randomly from list

This commit is contained in:
Noah Levitt 2015-08-18 23:47:28 +00:00
parent a878730e02
commit 382c826678

View File

@ -4,6 +4,7 @@ import logging
import brozzler
import rethinkdb
r = rethinkdb
import random
class UnexpectedDbResult(Exception):
pass
@ -16,41 +17,33 @@ class BrozzlerRethinkDb:
self.db = db
self.shards = shards
self.replicas = replicas
self._ensure_db()
self._conn = self._connect(servers[0]) # XXX round robin
try:
tables = r.db(self.db).table_list().run(self._conn)
for tbl in "sites", "pages":
if not tbl in tables:
raise Exception("rethinkdb database {} exists but does not have table {}".format(repr(self.db), repr(tbl)))
except rethinkdb.errors.ReqlOpFailedError as e:
self.logger.info("rethinkdb database %s does not exist, initializing", repr(self.db))
self._init_db()
def _connect(self, server):
self.logger.info("connecting to rethinkdb at %s", server)
# https://github.com/rethinkdb/rethinkdb-example-webpy-blog/blob/master/model.py
# "Best practices: Managing connections: a connection per request"
def _random_server_connection(self):
server = random.choice(self.servers)
try:
host, port = server.split(":")
return r.connect(host=host, port=port)
except ValueError:
return r.connect(host=server)
# def _round_robin_connection(self):
# while True:
# for server in self.servers:
# try:
# host, port = server.split(":")
# conn = r.connect(host=host, port=port)
# except ValueError:
# conn = r.connect(host=server)
def _init_db(self):
r.db_create(self.db).run(self._conn)
# r.db("test").table_create("jobs", shards=self.shards, replicas=self.replicas).run(self._conn)
r.db(self.db).table_create("sites", shards=self.shards, replicas=self.replicas).run(self._conn)
r.db(self.db).table_create("pages", shards=self.shards, replicas=self.replicas).run(self._conn)
r.db(self.db).table("pages").index_create("priority_by_site", [r.row["site_id"], r.row["claimed"], r.row["brozzle_count"], r.row["priority"]]).run(self._conn)
self.logger.info("created database %s with tables 'sites' and 'pages'", self.db)
def _ensure_db(self):
with self._random_server_connection() as conn:
try:
tables = r.db(self.db).table_list().run(conn)
for tbl in "sites", "pages":
if not tbl in tables:
raise Exception("rethinkdb database {} exists but does not have table {}".format(repr(self.db), repr(tbl)))
except rethinkdb.errors.ReqlOpFailedError as e:
self.logger.info("rethinkdb database %s does not exist, initializing", repr(self.db))
r.db_create(self.db).run(conn)
# r.db("test").table_create("jobs", shards=self.shards, replicas=self.replicas).run(conn)
r.db(self.db).table_create("sites", shards=self.shards, replicas=self.replicas).run(conn)
r.db(self.db).table_create("pages", shards=self.shards, replicas=self.replicas).run(conn)
r.db(self.db).table("pages").index_create("priority_by_site", [r.row["site_id"], r.row["claimed"], r.row["brozzle_count"], r.row["priority"]]).run(conn)
self.logger.info("created database %s with tables 'sites' and 'pages'", self.db)
def _vet_result(self, result, **kwargs):
self.logger.debug("vetting expected=%s result=%s", kwargs, result)
@ -69,52 +62,60 @@ class BrozzlerRethinkDb:
def new_site(self, site):
self.logger.info("inserting into 'sites' table %s", site)
result = r.db(self.db).table("sites").insert(site.to_dict()).run(self._conn)
self._vet_result(result, inserted=1)
site.id = result["generated_keys"][0]
with self._random_server_connection() as conn:
result = r.db(self.db).table("sites").insert(site.to_dict()).run(conn)
self._vet_result(result, inserted=1)
site.id = result["generated_keys"][0]
def update_site(self, site):
self.logger.debug("updating 'sites' table entry %s", site)
result = r.db(self.db).table("sites").get(site.id).update(site.to_dict()).run(self._conn)
self._vet_result(result, replaced=1)
with self._random_server_connection() as conn:
result = r.db(self.db).table("sites").get(site.id).update(site.to_dict()).run(conn)
self._vet_result(result, replaced=1)
def update_page(self, page):
self.logger.debug("updating 'pages' table entry %s", page)
result = r.db(self.db).table("pages").get(page.id).update(page.to_dict()).run(self._conn)
self._vet_result(result, replaced=[0,1], unchanged=[0,1])
with self._random_server_connection() as conn:
result = r.db(self.db).table("pages").get(page.id).update(page.to_dict()).run(conn)
self._vet_result(result, replaced=[0,1], unchanged=[0,1])
def new_page(self, page):
self.logger.debug("inserting into 'pages' table %s", page)
result = r.db(self.db).table("pages").insert(page.to_dict()).run(self._conn)
self._vet_result(result, inserted=1)
with self._random_server_connection() as conn:
result = r.db(self.db).table("pages").insert(page.to_dict()).run(conn)
self._vet_result(result, inserted=1)
def claim_site(self):
# XXX keep track of aggregate priority and prioritize sites accordingly?
result = r.db(self.db).table("sites").filter({"claimed":False,"status":"ACTIVE"}).limit(1).update({"claimed":True},return_changes=True).run(self._conn)
self._vet_result(result, replaced=[0,1])
if result["replaced"] == 1:
return brozzler.Site(**result["changes"][0]["new_val"])
else:
raise brozzler.NothingToClaim
with self._random_server_connection() as conn:
result = r.db(self.db).table("sites").filter({"claimed":False,"status":"ACTIVE"}).limit(1).update({"claimed":True},return_changes=True).run(conn)
self._vet_result(result, replaced=[0,1])
if result["replaced"] == 1:
return brozzler.Site(**result["changes"][0]["new_val"])
else:
raise brozzler.NothingToClaim
def claim_page(self, site):
result = (r.db(self.db).table("pages")
.between([site.id,False,0,brozzler.MIN_PRIORITY], [site.id,False,0,brozzler.MAX_PRIORITY], index="priority_by_site")
.order_by(index=r.desc("priority_by_site")).limit(1)
.update({"claimed":True},return_changes=True).run(self._conn))
self._vet_result(result, replaced=[0,1])
if result["replaced"] == 1:
return brozzler.Page(**result["changes"][0]["new_val"])
else:
raise brozzler.NothingToClaim
with self._random_server_connection() as conn:
result = (r.db(self.db).table("pages")
.between([site.id,False,0,brozzler.MIN_PRIORITY], [site.id,False,0,brozzler.MAX_PRIORITY], index="priority_by_site")
.order_by(index=r.desc("priority_by_site")).limit(1)
.update({"claimed":True},return_changes=True).run(conn))
self._vet_result(result, replaced=[0,1])
if result["replaced"] == 1:
return brozzler.Page(**result["changes"][0]["new_val"])
else:
raise brozzler.NothingToClaim
def has_outstanding_pages(self, site):
cursor = r.db(self.db).table("pages").between([site.id,False,0,brozzler.MIN_PRIORITY], [site.id,True,0,brozzler.MAX_PRIORITY], index="priority_by_site").limit(1).run(self._conn)
return len(list(cursor)) > 0
with self._random_server_connection() as conn:
cursor = r.db(self.db).table("pages").between([site.id,False,0,brozzler.MIN_PRIORITY], [site.id,True,0,brozzler.MAX_PRIORITY], index="priority_by_site").limit(1).run(conn)
return len(list(cursor)) > 0
def get_page(self, page):
result = r.db(self.db).table("pages").get(page.id).run(self._conn)
if result:
return brozzler.Page(**result)
else:
return None
with self._random_server_connection() as conn:
result = r.db(self.db).table("pages").get(page.id).run(conn)
if result:
return brozzler.Page(**result)
else:
return None