From 98933e3db6d43dcb3c8c21d0b65e2647bc3fb303 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 6 Jan 2015 13:03:23 +0000 Subject: [PATCH] Only fetch prev_content when a client is streaming/paginating. Use transactions for event streams. --- synapse/storage/_base.py | 32 ++++++--- synapse/storage/stream.py | 142 ++++++++++++++++++-------------------- 2 files changed, 90 insertions(+), 84 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index d63655643..9702ab4f4 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -434,12 +434,15 @@ class SQLBaseStore(object): return self.runInteraction("_simple_max_id", func) - def _get_events(self, event_ids): + def _get_events(self, event_ids, check_redacted=True, + get_prev_content=False): return self.runInteraction( - "_get_events", self._get_events_txn, event_ids + "_get_events", self._get_events_txn, event_ids, + check_redacted=check_redacted, get_prev_content=get_prev_content, ) - def _get_events_txn(self, txn, event_ids): + def _get_events_txn(self, txn, event_ids, check_redacted=True, + get_prev_content=False): if not event_ids: return [] @@ -450,7 +453,13 @@ class SQLBaseStore(object): n = 50 split = [event_ids[i:i + n] for i in range(0, len(event_ids), n)] for e_ids in split: - events.extend(self._get_events_txn(txn, e_ids)) + events.extend( + self._get_events_txn( + txn, e_ids, + check_redacted=check_redacted, + get_prev_content=get_prev_content, + ) + ) return events logger.debug("_get_events_txn Fetching %d events", len(event_ids)) @@ -467,10 +476,17 @@ class SQLBaseStore(object): res = txn.fetchall() - return [self._get_event_from_row_txn(txn, *r) for r in res] + return [ + self._get_event_from_row_txn( + txn, r[0], r[1], r[2], + check_redacted=check_redacted, + get_prev_content=get_prev_content, + ) + for r in res + ] def _get_event_txn(self, txn, event_id, check_redacted=True, - get_prev_content=True): + get_prev_content=False): sql = ( "SELECT internal_metadata, json, r.event_id FROM event_json as e " "LEFT JOIN redactions as r ON e.event_id = r.redacts " @@ -493,14 +509,12 @@ class SQLBaseStore(object): ) def _get_event_from_row_txn(self, txn, internal_metadata, js, redacted, - check_redacted=True, get_prev_content=True): + check_redacted=True, get_prev_content=False): d = json.loads(js) internal_metadata = json.loads(internal_metadata) ev = FrozenEvent(d, internal_metadata_dict=internal_metadata) - return ev - if check_redacted and redacted: ev = prune_event(ev) diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 3405cb365..c51f48945 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -137,12 +137,12 @@ class StreamStore(SQLBaseStore): with_feedback=with_feedback, ) - @defer.inlineCallbacks @log_function def get_room_events_stream(self, user_id, from_key, to_key, room_id, limit=0, with_feedback=False): # TODO (erikj): Handle compressed feedback + current_room_membership_sql = ( "SELECT m.room_id FROM room_memberships as m " "INNER JOIN current_state_events as c ON m.event_id = c.event_id " @@ -157,11 +157,6 @@ class StreamStore(SQLBaseStore): "WHERE m.user_id = ? " ) - del_sql = ( - "SELECT event_id FROM redactions WHERE redacts = e.event_id " - "LIMIT 1" - ) - if limit: limit = max(limit, MAX_STREAM_SIZE) else: @@ -172,38 +167,42 @@ class StreamStore(SQLBaseStore): to_id = _parse_stream_token(to_key) if from_key == to_key: - defer.returnValue(([], to_key)) - return + return defer.succeed(([], to_key)) sql = ( - "SELECT *, (%(redacted)s) AS redacted FROM events AS e WHERE " + "SELECT e.event_id, e.stream_ordering FROM events AS e WHERE " "(e.outlier = 0 AND (room_id IN (%(current)s)) OR " "(event_id IN (%(invites)s))) " "AND e.stream_ordering > ? AND e.stream_ordering <= ? " "ORDER BY stream_ordering ASC LIMIT %(limit)d " ) % { - "redacted": del_sql, "current": current_room_membership_sql, "invites": membership_sql, "limit": limit } - rows = yield self._execute_and_decode( - sql, - user_id, user_id, from_id, to_id - ) + def f(txn): + txn.execute(sql, (user_id, user_id, from_id, to_id,)) - ret = yield self._parse_events(rows) + rows = self.cursor_to_dict(txn) - if rows: - key = "s%d" % max([r["stream_ordering"] for r in rows]) - else: - # Assume we didn't get anything because there was nothing to get. - key = to_key + ret = self._get_events_txn( + txn, + [r["event_id"] for r in rows], + get_prev_content=True + ) - defer.returnValue((ret, key)) + if rows: + key = "s%d" % max([r["stream_ordering"] for r in rows]) + else: + # Assume we didn't get anything because there was nothing to + # get. + key = to_key + + return ret, key + + return self.runInteraction("get_room_events_stream", f) - @defer.inlineCallbacks @log_function def paginate_room_events(self, room_id, from_key, to_key=None, direction='b', limit=-1, @@ -221,7 +220,9 @@ class StreamStore(SQLBaseStore): bounds = _get_token_bound(from_key, from_comp) if to_key: - bounds = "%s AND %s" % (bounds, _get_token_bound(to_key, to_comp)) + bounds = "%s AND %s" % ( + bounds, _get_token_bound(to_key, to_comp) + ) if int(limit) > 0: args.append(int(limit)) @@ -229,87 +230,78 @@ class StreamStore(SQLBaseStore): else: limit_str = "" - del_sql = ( - "SELECT event_id FROM redactions WHERE redacts = events.event_id " - "LIMIT 1" - ) - sql = ( - "SELECT *, (%(redacted)s) AS redacted FROM events" + "SELECT * FROM events" " WHERE outlier = 0 AND room_id = ? AND %(bounds)s" " ORDER BY topological_ordering %(order)s," " stream_ordering %(order)s %(limit)s" ) % { - "redacted": del_sql, "bounds": bounds, "order": order, "limit": limit_str } - rows = yield self._execute_and_decode( - sql, - *args - ) + def f(txn): + txn.execute(sql, args) - if rows: - topo = rows[-1]["topological_ordering"] - toke = rows[-1]["stream_ordering"] - if direction == 'b': - topo -= 1 - toke -= 1 - next_token = "t%s-%s" % (topo, toke) - else: - # TODO (erikj): We should work out what to do here instead. - next_token = to_key if to_key else from_key + rows = self.cursor_to_dict(txn) - events = yield self._parse_events(rows) + if rows: + topo = rows[-1]["topological_ordering"] + toke = rows[-1]["stream_ordering"] + if direction == 'b': + topo -= 1 + toke -= 1 + next_token = "t%s-%s" % (topo, toke) + else: + # TODO (erikj): We should work out what to do here instead. + next_token = to_key if to_key else from_key - defer.returnValue( - ( - events, - next_token + events = self._get_events_txn( + txn, + [r["event_id"] for r in rows], + get_prev_content=True ) - ) - @defer.inlineCallbacks + return events, next_token, + + return self.runInteraction("paginate_room_events", f) + def get_recent_events_for_room(self, room_id, limit, end_token, with_feedback=False): # TODO (erikj): Handle compressed feedback - del_sql = ( - "SELECT event_id FROM redactions WHERE redacts = events.event_id " - "LIMIT 1" - ) - sql = ( - "SELECT *, (%(redacted)s) AS redacted FROM events " + "SELECT * FROM events " "WHERE room_id = ? AND stream_ordering <= ? AND outlier = 0 " "ORDER BY topological_ordering DESC, stream_ordering DESC LIMIT ? " - ) % { - "redacted": del_sql, - } - - rows = yield self._execute_and_decode( - sql, - room_id, end_token, limit ) - rows.reverse() # As we selected with reverse ordering + def f(txn): + txn.execute(sql, (room_id, end_token, limit,)) - if rows: - topo = rows[0]["topological_ordering"] - toke = rows[0]["stream_ordering"] - start_token = "t%s-%s" % (topo, toke) + rows = self.cursor_to_dict(txn) - token = (start_token, end_token) - else: - token = (end_token, end_token) + rows.reverse() # As we selected with reverse ordering - events = yield self._parse_events(rows) + if rows: + topo = rows[0]["topological_ordering"] + toke = rows[0]["stream_ordering"] + start_token = "t%s-%s" % (topo, toke) - ret = (events, token) + token = (start_token, end_token) + else: + token = (end_token, end_token) - defer.returnValue(ret) + events = self._get_events_txn( + txn, + [r["event_id"] for r in rows], + get_prev_content=True + ) + + return events, token + + return self.runInteraction("get_recent_events_for_room", f) def get_room_events_max_id(self): return self.runInteraction(