Refactor getting replication updates from database. (#7636)

The aim here is to make it easier to reason about when streams are limited and when they're not, by moving the logic into the database functions themselves. This should mean we can kill of `db_query_to_update_function` function.
This commit is contained in:
Erik Johnston 2020-06-16 17:10:28 +01:00 committed by GitHub
parent 231252516c
commit f6f7511a4c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 251 additions and 72 deletions

View file

@ -16,6 +16,7 @@
import abc
import logging
from typing import List, Tuple
from canonicaljson import json
@ -267,26 +268,79 @@ class ReceiptsWorkerStore(SQLBaseStore):
}
return results
def get_all_updated_receipts(self, last_id, current_id, limit=None):
def get_users_sent_receipts_between(self, last_id: int, current_id: int):
"""Get all users who sent receipts between `last_id` exclusive and
`current_id` inclusive.
Returns:
Deferred[List[str]]
"""
if last_id == current_id:
return defer.succeed([])
def get_all_updated_receipts_txn(txn):
sql = (
"SELECT stream_id, room_id, receipt_type, user_id, event_id, data"
" FROM receipts_linearized"
" WHERE ? < stream_id AND stream_id <= ?"
" ORDER BY stream_id ASC"
)
args = [last_id, current_id]
if limit is not None:
sql += " LIMIT ?"
args.append(limit)
txn.execute(sql, args)
def _get_users_sent_receipts_between_txn(txn):
sql = """
SELECT DISTINCT user_id FROM receipts_linearized
WHERE ? < stream_id AND stream_id <= ?
"""
txn.execute(sql, (last_id, current_id))
return [r[0:5] + (json.loads(r[5]),) for r in txn]
return [r[0] for r in txn]
return self.db.runInteraction(
"get_users_sent_receipts_between", _get_users_sent_receipts_between_txn
)
async def get_all_updated_receipts(
self, instance_name: str, last_id: int, current_id: int, limit: int
) -> Tuple[List[Tuple[int, list]], int, bool]:
"""Get updates for receipts replication stream.
Args:
instance_name: The writer we want to fetch updates from. Unused
here since there is only ever one writer.
last_id: The token to fetch updates from. Exclusive.
current_id: The token to fetch updates up to. Inclusive.
limit: The requested limit for the number of rows to return. The
function may return more or fewer rows.
Returns:
A tuple consisting of: the updates, a token to use to fetch
subsequent updates, and whether we returned fewer rows than exists
between the requested tokens due to the limit.
The token returned can be used in a subsequent call to this
function to get further updatees.
The updates are a list of 2-tuples of stream ID and the row data
"""
if last_id == current_id:
return [], current_id, False
def get_all_updated_receipts_txn(txn):
sql = """
SELECT stream_id, room_id, receipt_type, user_id, event_id, data
FROM receipts_linearized
WHERE ? < stream_id AND stream_id <= ?
ORDER BY stream_id ASC
LIMIT ?
"""
txn.execute(sql, (last_id, current_id, limit))
updates = [(r[0], r[1:5] + (json.loads(r[5]),)) for r in txn]
limited = False
upper_bound = current_id
if len(updates) == limit:
limited = True
upper_bound = updates[-1][0]
return updates, upper_bound, limited
return await self.db.runInteraction(
"get_all_updated_receipts", get_all_updated_receipts_txn
)