From 58f8226c7f2aaf9ebe39703edc91ad5cf1b01112 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Mon, 21 Mar 2016 14:20:34 +0000 Subject: [PATCH 01/35] remove unused current_state variable from on_receive_pdu --- synapse/handlers/federation.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index f599e817a..c172877bd 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -123,7 +123,6 @@ class FederationHandler(BaseHandler): # FIXME (erikj): Awful hack to make the case where we are not currently # in the room work - current_state = None is_in_room = yield self.auth.check_host_in_room( event.room_id, self.server_name @@ -187,7 +186,6 @@ class FederationHandler(BaseHandler): event, state=state, backfilled=backfilled, - current_state=current_state, ) except AuthError as e: raise FederationError( From 3e7fac0d56dca5b389ef7a671c1cd6b0795724c8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 21 Mar 2016 14:03:20 +0000 Subject: [PATCH 02/35] Add published room list edit API --- synapse/api/auth.py | 54 ++++++++++++++++++++++++++--- synapse/handlers/directory.py | 16 +++++++++ synapse/rest/client/v1/directory.py | 42 ++++++++++++++++++++++ synapse/storage/room.py | 8 +++++ 4 files changed, 116 insertions(+), 4 deletions(-) diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 3038df4ab..4f9c3c9db 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -814,17 +814,16 @@ class Auth(object): return auth_ids - @log_function - def _can_send_event(self, event, auth_events): + def _get_send_level(self, etype, state_key, auth_events): key = (EventTypes.PowerLevels, "", ) send_level_event = auth_events.get(key) send_level = None if send_level_event: send_level = send_level_event.content.get("events", {}).get( - event.type + etype ) if send_level is None: - if hasattr(event, "state_key"): + if state_key is not None: send_level = send_level_event.content.get( "state_default", 50 ) @@ -838,6 +837,13 @@ class Auth(object): else: send_level = 0 + return send_level + + @log_function + def _can_send_event(self, event, auth_events): + send_level = self._get_send_level( + event.type, event.get("state_key", None), auth_events + ) user_level = self._get_user_power_level(event.user_id, auth_events) if user_level < send_level: @@ -982,3 +988,43 @@ class Auth(object): "You don't have permission to add ops level greater " "than your own" ) + + @defer.inlineCallbacks + def check_can_change_room_list(self, room_id, user): + """Check if the user is allowed to edit the room's entry in the + published room list. + + Args: + room_id (str) + user (UserID) + """ + + is_admin = yield self.is_server_admin(user) + if is_admin: + defer.returnValue(True) + + user_id = user.to_string() + yield self.check_joined_room(room_id, user_id) + + # We currently require the user is a "moderator" in the room. We do this + # by checking if they would (theoretically) be able to change the + # m.room.aliases events + power_level_event = yield self.state.get_current_state( + room_id, EventTypes.PowerLevels, "" + ) + + auth_events = {} + if power_level_event: + auth_events[(EventTypes.PowerLevels, "")] = power_level_event + + send_level = self._get_send_level( + EventTypes.Aliases, "", auth_events + ) + user_level = self._get_user_power_level(user_id, auth_events) + + if user_level < send_level: + raise AuthError( + 403, + "This server requires you to be a moderator in the room to" + " edit its room list entry" + ) diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index 6bcc5a5e2..b2617c889 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -317,3 +317,19 @@ class DirectoryHandler(BaseHandler): is_admin = yield self.auth.is_server_admin(UserID.from_string(user_id)) defer.returnValue(is_admin) + + @defer.inlineCallbacks + def edit_published_room_list(self, requester, room_id, visibility): + if requester.is_guest: + raise AuthError(403, "Guests cannot edit the published room list") + + if visibility not in ["public", "private"]: + raise SynapseError(400, "Invalide visibility setting") + + room = yield self.store.get_room(room_id) + if room is None: + raise SynapseError(400, "Unknown room") + + yield self.auth.check_can_change_room_list(room_id, requester.user) + + yield self.store.set_room_is_public(room_id, visibility == "public") diff --git a/synapse/rest/client/v1/directory.py b/synapse/rest/client/v1/directory.py index 59a23d6cb..8ac09419d 100644 --- a/synapse/rest/client/v1/directory.py +++ b/synapse/rest/client/v1/directory.py @@ -30,6 +30,7 @@ logger = logging.getLogger(__name__) def register_servlets(hs, http_server): ClientDirectoryServer(hs).register(http_server) + ClientDirectoryListServer(hs).register(http_server) class ClientDirectoryServer(ClientV1RestServlet): @@ -137,3 +138,44 @@ class ClientDirectoryServer(ClientV1RestServlet): ) defer.returnValue((200, {})) + + +class ClientDirectoryListServer(ClientV1RestServlet): + PATTERNS = client_path_patterns("/directory/list/room/(?P[^/]*)$") + + def __init__(self, hs): + super(ClientDirectoryListServer, self).__init__(hs) + self.store = hs.get_datastore() + + @defer.inlineCallbacks + def on_GET(self, request, room_id): + room = yield self.store.get_room(room_id) + if room is None: + raise SynapseError(400, "Unknown room") + + defer.returnValue((200, { + "visibility": "public" if room["is_public"] else "private" + })) + + @defer.inlineCallbacks + def on_PUT(self, request, room_id): + requester = yield self.auth.get_user_by_req(request) + + content = parse_json_object_from_request(request) + visibility = content.get("visibility", "public") + + yield self.handlers.directory_handler.edit_published_room_list( + requester, room_id, visibility, + ) + + defer.returnValue((200, {})) + + @defer.inlineCallbacks + def on_DELETE(self, request, room_id): + requester = yield self.auth.get_user_by_req(request) + + yield self.handlers.directory_handler.edit_published_room_list( + requester, room_id, "private", + ) + + defer.returnValue((200, {})) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 46ab38a31..9be977f38 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -77,6 +77,14 @@ class RoomStore(SQLBaseStore): allow_none=True, ) + def set_room_is_public(self, room_id, is_public): + return self._simple_update_one( + table="rooms", + keyvalues={"room_id": room_id}, + updatevalues={"is_public": is_public}, + desc="set_room_is_public", + ) + def get_public_room_ids(self): return self._simple_select_onecol( table="rooms", From 5244c0b48ebb86273bbb79a1935cf5893aa6f310 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Mon, 21 Mar 2016 18:03:08 +0000 Subject: [PATCH 03/35] Remove unused backfilled parameter from persist_event --- synapse/federation/federation_server.py | 1 - synapse/handlers/federation.py | 38 ++++++++++--------------- synapse/storage/events.py | 22 +++----------- 3 files changed, 19 insertions(+), 42 deletions(-) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 76820b924..429ab6dde 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -531,7 +531,6 @@ class FederationServer(FederationBase): yield self.handler.on_receive_pdu( origin, pdu, - backfilled=False, state=state, auth_chain=auth_chain, ) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index c172877bd..267fedf11 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -102,7 +102,7 @@ class FederationHandler(BaseHandler): @log_function @defer.inlineCallbacks - def on_receive_pdu(self, origin, pdu, backfilled, state=None, + def on_receive_pdu(self, origin, pdu, state=None, auth_chain=None): """ Called by the ReplicationLayer when we have a new pdu. We need to do auth checks and put it through the StateHandler. @@ -185,7 +185,6 @@ class FederationHandler(BaseHandler): origin, event, state=state, - backfilled=backfilled, ) except AuthError as e: raise FederationError( @@ -214,18 +213,17 @@ class FederationHandler(BaseHandler): except StoreError: logger.exception("Failed to store room.") - if not backfilled: - extra_users = [] - if event.type == EventTypes.Member: - target_user_id = event.state_key - target_user = UserID.from_string(target_user_id) - extra_users.append(target_user) + extra_users = [] + if event.type == EventTypes.Member: + target_user_id = event.state_key + target_user = UserID.from_string(target_user_id) + extra_users.append(target_user) - with PreserveLoggingContext(): - self.notifier.on_new_room_event( - event, event_stream_id, max_stream_id, - extra_users=extra_users - ) + with PreserveLoggingContext(): + self.notifier.on_new_room_event( + event, event_stream_id, max_stream_id, + extra_users=extra_users + ) if event.type == EventTypes.Member: if event.membership == Membership.JOIN: @@ -645,7 +643,7 @@ class FederationHandler(BaseHandler): continue try: - self.on_receive_pdu(origin, p, backfilled=False) + self.on_receive_pdu(origin, p) except: logger.exception("Couldn't handle pdu") @@ -777,7 +775,6 @@ class FederationHandler(BaseHandler): event_stream_id, max_stream_id = yield self.store.persist_event( event, context=context, - backfilled=False, ) target_user = UserID.from_string(event.state_key) @@ -817,7 +814,6 @@ class FederationHandler(BaseHandler): event_stream_id, max_stream_id = yield self.store.persist_event( event, context=context, - backfilled=False, ) target_user = UserID.from_string(event.state_key) @@ -1072,8 +1068,7 @@ class FederationHandler(BaseHandler): @defer.inlineCallbacks @log_function - def _handle_new_event(self, origin, event, state=None, backfilled=False, - current_state=None, auth_events=None): + def _handle_new_event(self, origin, event, state=None, auth_events=None): outlier = event.internal_metadata.is_outlier() @@ -1083,7 +1078,7 @@ class FederationHandler(BaseHandler): auth_events=auth_events, ) - if not backfilled and not event.internal_metadata.is_outlier(): + if not event.internal_metadata.is_outlier(): action_generator = ActionGenerator(self.hs) yield action_generator.handle_push_actions_for_event( event, context, self @@ -1092,9 +1087,7 @@ class FederationHandler(BaseHandler): event_stream_id, max_stream_id = yield self.store.persist_event( event, context=context, - backfilled=backfilled, - is_new_state=(not outlier and not backfilled), - current_state=current_state, + is_new_state=not outlier, ) defer.returnValue((context, event_stream_id, max_stream_id)) @@ -1192,7 +1185,6 @@ class FederationHandler(BaseHandler): event_stream_id, max_stream_id = yield self.store.persist_event( event, new_event_context, - backfilled=False, is_new_state=True, current_state=state, ) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 285c586cf..e444b64ce 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -101,30 +101,16 @@ class EventsStore(SQLBaseStore): @defer.inlineCallbacks @log_function - def persist_event(self, event, context, backfilled=False, + def persist_event(self, event, context, is_new_state=True, current_state=None): - stream_ordering = None - if backfilled: - self.min_stream_token -= 1 - stream_ordering = self.min_stream_token - - if stream_ordering is None: - stream_ordering_manager = self._stream_id_gen.get_next() - else: - @contextmanager - def stream_ordering_manager(): - yield stream_ordering - stream_ordering_manager = stream_ordering_manager() - try: - with stream_ordering_manager as stream_ordering: + with self._stream_id_gen.get_next() as stream_ordering: event.internal_metadata.stream_ordering = stream_ordering yield self.runInteraction( "persist_event", self._persist_event_txn, event=event, context=context, - backfilled=backfilled, is_new_state=is_new_state, current_state=current_state, ) @@ -166,7 +152,7 @@ class EventsStore(SQLBaseStore): defer.returnValue(events[0] if events else None) @log_function - def _persist_event_txn(self, txn, event, context, backfilled, + def _persist_event_txn(self, txn, event, context, is_new_state=True, current_state=None): # We purposefully do this first since if we include a `current_state` # key, we *want* to update the `current_state_events` table @@ -198,7 +184,7 @@ class EventsStore(SQLBaseStore): return self._persist_events_txn( txn, [(event, context)], - backfilled=backfilled, + backfilled=False, is_new_state=is_new_state, ) From d3654694d09ccbc672c8a2373f09b4a3a24442b8 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 22 Mar 2016 00:52:31 +0000 Subject: [PATCH 04/35] an invalide is something else... --- synapse/handlers/directory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index b2617c889..f6143521f 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -324,7 +324,7 @@ class DirectoryHandler(BaseHandler): raise AuthError(403, "Guests cannot edit the published room list") if visibility not in ["public", "private"]: - raise SynapseError(400, "Invalide visibility setting") + raise SynapseError(400, "Invalid visibility setting") room = yield self.store.get_room(room_id) if room is None: From b5912776207b328ad1c2722ef01f836b54e5383e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 22 Mar 2016 10:32:50 +0000 Subject: [PATCH 05/35] Make stateGroupCache honour CACHE_SIZE_FACTOR --- synapse/storage/_base.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 7dc67ecd5..583b77a83 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -26,6 +26,10 @@ from twisted.internet import defer import sys import time import threading +import os + + +CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.1)) logger = logging.getLogger(__name__) @@ -163,7 +167,9 @@ class SQLBaseStore(object): self._get_event_cache = Cache("*getEvent*", keylen=3, lru=True, max_entries=hs.config.event_cache_size) - self._state_group_cache = DictionaryCache("*stateGroupCache*", 2000) + self._state_group_cache = DictionaryCache( + "*stateGroupCache*", 2000 * CACHE_SIZE_FACTOR + ) self._event_fetch_lock = threading.Condition() self._event_fetch_list = [] From 97785bfc0fe42619183e73432b897d2740fa74f8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 22 Mar 2016 10:41:44 +0000 Subject: [PATCH 06/35] Doc string --- synapse/handlers/directory.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index f6143521f..8eeb22581 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -320,6 +320,12 @@ class DirectoryHandler(BaseHandler): @defer.inlineCallbacks def edit_published_room_list(self, requester, room_id, visibility): + """Edit the entry of the room in the published room list. + + requester + room_id (str) + visibility (str): "public" or "private" + """ if requester.is_guest: raise AuthError(403, "Guests cannot edit the published room list") From 2c86187a1bd61901ecf4adca3dcced9f68cf26de Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 22 Mar 2016 11:59:31 +0000 Subject: [PATCH 07/35] Don't cache events in _state_group_cache Instead, simply cache the event ids, relying on the event cache to cache the actual events. The problem was that while the state groups cache was limited in the number of groups it could hold, each individual group could consist of thousands of events. --- synapse/storage/state.py | 108 +++++++++++++++++---------------------- 1 file changed, 48 insertions(+), 60 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 8ed8a21b0..f06c734c4 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -172,7 +172,7 @@ class StateStore(SQLBaseStore): defer.returnValue(events) def _get_state_groups_from_groups(self, groups, types): - """Returns dictionary state_group -> state event ids + """Returns dictionary state_group -> (dict of (type, state_key) -> event id) """ def f(txn, groups): if types is not None: @@ -183,7 +183,8 @@ class StateStore(SQLBaseStore): where_clause = "" sql = ( - "SELECT state_group, event_id FROM state_groups_state WHERE" + "SELECT state_group, event_id, type, state_key" + " FROM state_groups_state WHERE" " state_group IN (%s) %s" % ( ",".join("?" for _ in groups), where_clause, @@ -199,7 +200,8 @@ class StateStore(SQLBaseStore): results = {} for row in rows: - results.setdefault(row["state_group"], []).append(row["event_id"]) + key = (row["type"], row["state_key"]) + results.setdefault(row["state_group"], {})[key] = row["event_id"] return results chunks = [groups[i:i + 100] for i in xrange(0, len(groups), 100)] @@ -296,7 +298,7 @@ class StateStore(SQLBaseStore): where a `state_key` of `None` matches all state_keys for the `type`. """ - is_all, state_dict = self._state_group_cache.get(group) + is_all, state_dict_ids = self._state_group_cache.get(group) type_to_key = {} missing_types = set() @@ -308,7 +310,7 @@ class StateStore(SQLBaseStore): if type_to_key.get(typ, object()) is not None: type_to_key.setdefault(typ, set()).add(state_key) - if (typ, state_key) not in state_dict: + if (typ, state_key) not in state_dict_ids: missing_types.add((typ, state_key)) sentinel = object() @@ -326,7 +328,7 @@ class StateStore(SQLBaseStore): got_all = not (missing_types or types is None) return { - k: v for k, v in state_dict.items() + k: v for k, v in state_dict_ids.items() if include(k[0], k[1]) }, missing_types, got_all @@ -340,8 +342,9 @@ class StateStore(SQLBaseStore): Args: group: The state group to lookup """ - is_all, state_dict = self._state_group_cache.get(group) - return state_dict, is_all + is_all, state_dict_ids = self._state_group_cache.get(group) + + return state_dict_ids, is_all @defer.inlineCallbacks def _get_state_for_groups(self, groups, types=None): @@ -354,84 +357,69 @@ class StateStore(SQLBaseStore): missing_groups = [] if types is not None: for group in set(groups): - state_dict, missing_types, got_all = self._get_some_state_from_cache( + state_dict_ids, missing_types, got_all = self._get_some_state_from_cache( group, types ) - results[group] = state_dict + results[group] = state_dict_ids if not got_all: missing_groups.append(group) else: for group in set(groups): - state_dict, got_all = self._get_all_state_from_cache( + state_dict_ids, got_all = self._get_all_state_from_cache( group ) - results[group] = state_dict + + results[group] = state_dict_ids if not got_all: missing_groups.append(group) - if not missing_groups: - defer.returnValue({ - group: { - type_tuple: event - for type_tuple, event in state.items() - if event - } - for group, state in results.items() - }) + if missing_groups: + # Okay, so we have some missing_types, lets fetch them. + cache_seq_num = self._state_group_cache.sequence - # Okay, so we have some missing_types, lets fetch them. - cache_seq_num = self._state_group_cache.sequence + group_to_state_dict = yield self._get_state_groups_from_groups( + missing_groups, types + ) - group_state_dict = yield self._get_state_groups_from_groups( - missing_groups, types - ) + # Now we want to update the cache with all the things we fetched + # from the database. + for group, group_state_dict in group_to_state_dict.items(): + if types: + # We delibrately put key -> None mappings into the cache to + # cache absence of the key, on the assumption that if we've + # explicitly asked for some types then we will probably ask + # for them again. + state_dict = {key: None for key in types} + state_dict.update(results[group]) + results[group] = state_dict + else: + state_dict = results[group] + + state_dict.update(group_state_dict) + + self._state_group_cache.update( + cache_seq_num, + key=group, + value=state_dict, + full=(types is None), + ) state_events = yield self._get_events( - [e_id for l in group_state_dict.values() for e_id in l], + [ev_id for sd in results.values() for ev_id in sd.values()], get_prev_content=False ) state_events = {e.event_id: e for e in state_events} - # Now we want to update the cache with all the things we fetched - # from the database. - for group, state_ids in group_state_dict.items(): - if types: - # We delibrately put key -> None mappings into the cache to - # cache absence of the key, on the assumption that if we've - # explicitly asked for some types then we will probably ask - # for them again. - state_dict = {key: None for key in types} - state_dict.update(results[group]) - results[group] = state_dict - else: - state_dict = results[group] - - for event_id in state_ids: - try: - state_event = state_events[event_id] - state_dict[(state_event.type, state_event.state_key)] = state_event - except KeyError: - # Hmm. So we do don't have that state event? Interesting. - logger.warn( - "Can't find state event %r for state group %r", - event_id, group, - ) - - self._state_group_cache.update( - cache_seq_num, - key=group, - value=state_dict, - full=(types is None), - ) - # Remove all the entries with None values. The None values were just # used for bookkeeping in the cache. for group, state_dict in results.items(): results[group] = { - key: event for key, event in state_dict.items() if event + key: state_events[event_id] + for key, event_id in state_dict.items() + if event_id and event_id in state_events } defer.returnValue(results) From 5defb25ac622d979c13d9e9d311e69c2ef7c15a5 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Tue, 22 Mar 2016 13:52:45 +0000 Subject: [PATCH 08/35] Use get_users_in_room to count the number of room members rather than using read_receipts --- synapse/push/bulk_push_rule_evaluator.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 87d5061fb..76d7eb7ce 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -107,7 +107,9 @@ class BulkPushRuleEvaluator: users_dict.items(), [event], {event.event_id: current_state} ) - evaluator = PushRuleEvaluatorForEvent(event, len(self.users_in_room)) + room_members = yield self.store.get_users_in_room(self.room_id) + + evaluator = PushRuleEvaluatorForEvent(event, len(room_members)) condition_cache = {} From 76d18a577661257abe0d047129383dbd09bbe9b1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 22 Mar 2016 14:08:13 +0000 Subject: [PATCH 09/35] Bump get_aliases_for_room cache --- synapse/storage/directory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/directory.py b/synapse/storage/directory.py index 012a0b414..ef231a04d 100644 --- a/synapse/storage/directory.py +++ b/synapse/storage/directory.py @@ -155,7 +155,7 @@ class DirectoryStore(SQLBaseStore): return room_id - @cached() + @cached(max_entries=5000) def get_aliases_for_room(self, room_id): return self._simple_select_onecol( "room_aliases", From 6cf0ba14663c263ea5f423a4b24b3915b8349e36 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 22 Mar 2016 14:18:21 +0000 Subject: [PATCH 10/35] Bump get_unread_event_push_actions_by_room_for_user cache --- synapse/storage/event_push_actions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 5820539a9..dc5830450 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -49,7 +49,7 @@ class EventPushActionsStore(SQLBaseStore): ) self._simple_insert_many_txn(txn, "event_push_actions", values) - @cachedInlineCallbacks(num_args=3, lru=True, tree=True) + @cachedInlineCallbacks(num_args=3, lru=True, tree=True, max_entries=5000) def get_unread_event_push_actions_by_room_for_user( self, room_id, user_id, last_read_event_id ): From d787e41b2046572d42c13b6aa9b4636f98f7f9e9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 22 Mar 2016 14:44:48 +0000 Subject: [PATCH 11/35] Measure StateHandler._resolve_events --- synapse/state.py | 74 +++++++++++++++++++++++++----------------------- 1 file changed, 38 insertions(+), 36 deletions(-) diff --git a/synapse/state.py b/synapse/state.py index b9a138752..e09632972 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -18,6 +18,7 @@ from twisted.internet import defer from synapse.util.logutils import log_function from synapse.util.caches.expiringcache import ExpiringCache +from synapse.util.metrics import Measure from synapse.api.constants import EventTypes from synapse.api.errors import AuthError from synapse.api.auth import AuthEventTypes @@ -263,48 +264,49 @@ class StateHandler(object): from (type, state_key) to event. prev_states is a list of event_ids. :rtype: (dict[(str, str), synapse.events.FrozenEvent], list[str]) """ - state = {} - for st in state_sets: - for e in st: - state.setdefault( - (e.type, e.state_key), - {} - )[e.event_id] = e + with Measure(self.clock, "state._resolve_events"): + state = {} + for st in state_sets: + for e in st: + state.setdefault( + (e.type, e.state_key), + {} + )[e.event_id] = e - unconflicted_state = { - k: v.values()[0] for k, v in state.items() - if len(v.values()) == 1 - } + unconflicted_state = { + k: v.values()[0] for k, v in state.items() + if len(v.values()) == 1 + } - conflicted_state = { - k: v.values() - for k, v in state.items() - if len(v.values()) > 1 - } + conflicted_state = { + k: v.values() + for k, v in state.items() + if len(v.values()) > 1 + } - if event_type: - prev_states_events = conflicted_state.get( - (event_type, state_key), [] - ) - prev_states = [s.event_id for s in prev_states_events] - else: - prev_states = [] + if event_type: + prev_states_events = conflicted_state.get( + (event_type, state_key), [] + ) + prev_states = [s.event_id for s in prev_states_events] + else: + prev_states = [] - auth_events = { - k: e for k, e in unconflicted_state.items() - if k[0] in AuthEventTypes - } + auth_events = { + k: e for k, e in unconflicted_state.items() + if k[0] in AuthEventTypes + } - try: - resolved_state = self._resolve_state_events( - conflicted_state, auth_events - ) - except: - logger.exception("Failed to resolve state") - raise + try: + resolved_state = self._resolve_state_events( + conflicted_state, auth_events + ) + except: + logger.exception("Failed to resolve state") + raise - new_state = unconflicted_state - new_state.update(resolved_state) + new_state = unconflicted_state + new_state.update(resolved_state) return new_state, prev_states From 99f929f36b396b7152b3840c11e8debc5505f673 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 22 Mar 2016 15:31:13 +0000 Subject: [PATCH 12/35] Make StateHandler._state_cache only store event_ids. --- synapse/state.py | 24 +++++++++++++++++------- synapse/storage/events.py | 25 +++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 7 deletions(-) diff --git a/synapse/state.py b/synapse/state.py index e09632972..9d90a437d 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -28,6 +28,7 @@ from collections import namedtuple import logging import hashlib +import os logger = logging.getLogger(__name__) @@ -35,8 +36,11 @@ logger = logging.getLogger(__name__) KeyStateTuple = namedtuple("KeyStateTuple", ("context", "type", "state_key")) -SIZE_OF_CACHE = 1000 -EVICTION_TIMEOUT_SECONDS = 20 +CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.1)) + + +SIZE_OF_CACHE = int(5000 * CACHE_SIZE_FACTOR) +EVICTION_TIMEOUT_SECONDS = 60 * 60 class _StateCacheEntry(object): @@ -92,7 +96,9 @@ class StateHandler(object): if cache: cache.ts = self.clock.time_msec() - state = cache.state + + event_dict = yield self.store.get_events(cache.state.values()) + state = {(e.type, e.state_key): e for e in event_dict.values()} else: res = yield self.resolve_state_groups(room_id, event_ids) state = res[1] @@ -191,14 +197,18 @@ class StateHandler(object): cache = self._state_cache.get(frozenset(event_ids), None) if cache and cache.state_group: cache.ts = self.clock.time_msec() - prev_state = cache.state.get((event_type, state_key), None) + + event_dict = yield self.store.get_events(cache.state.values()) + state = {(e.type, e.state_key): e for e in event_dict.values()} + + prev_state = state.get((event_type, state_key), None) if prev_state: prev_state = prev_state.event_id prev_states = [prev_state] else: prev_states = [] defer.returnValue( - (cache.state_group, cache.state, prev_states) + (cache.state_group, state, prev_states) ) state_groups = yield self.store.get_state_groups( @@ -226,7 +236,7 @@ class StateHandler(object): if self._state_cache is not None: cache = _StateCacheEntry( - state=state, + state={key: event.event_id for key, event in state.items()}, state_group=name, ts=self.clock.time_msec() ) @@ -241,7 +251,7 @@ class StateHandler(object): if self._state_cache is not None: cache = _StateCacheEntry( - state=new_state, + state={key: event.event_id for key, event in new_state.items()}, state_group=None, ts=self.clock.time_msec() ) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index e444b64ce..584e659d4 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -151,6 +151,31 @@ class EventsStore(SQLBaseStore): defer.returnValue(events[0] if events else None) + @defer.inlineCallbacks + def get_events(self, event_ids, check_redacted=True, + get_prev_content=False, allow_rejected=False): + """Get events from the database + + Args: + event_ids (list): The event_ids of the events to fetch + check_redacted (bool): If True, check if event has been redacted + and redact it. + get_prev_content (bool): If True and event is a state event, + include the previous states content in the unsigned field. + allow_rejected (bool): If True return rejected events. + + Returns: + Deferred : Dict from event_id to event. + """ + events = yield self._get_events( + event_ids, + check_redacted=check_redacted, + get_prev_content=get_prev_content, + allow_rejected=allow_rejected, + ) + + defer.returnValue({e.event_id: e for e in events}) + @log_function def _persist_event_txn(self, txn, event, context, is_new_state=True, current_state=None): From c4a8cbd15a471d2a658de96abcc3254fc95de1bf Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 22 Mar 2016 16:06:21 +0000 Subject: [PATCH 13/35] Make LruCache use a dedicated _Node class --- synapse/util/caches/lrucache.py | 73 ++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 32 deletions(-) diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index f7423f2fa..f9df445a8 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -29,6 +29,16 @@ def enumerate_leaves(node, depth): yield m +class _Node(object): + __slots__ = ["prev_node", "next_node", "key", "value"] + + def __init__(self, prev_node, next_node, key, value): + self.prev_node = prev_node + self.next_node = next_node + self.key = key + self.value = value + + class LruCache(object): """ Least-recently-used cache. @@ -38,10 +48,9 @@ class LruCache(object): def __init__(self, max_size, keylen=1, cache_type=dict): cache = cache_type() self.cache = cache # Used for introspection. - list_root = [] - list_root[:] = [list_root, list_root, None, None] - - PREV, NEXT, KEY, VALUE = 0, 1, 2, 3 + list_root = _Node(None, None, None, None) + list_root.next_node = list_root + list_root.prev_node = list_root lock = threading.Lock() @@ -55,36 +64,36 @@ class LruCache(object): def add_node(key, value): prev_node = list_root - next_node = prev_node[NEXT] - node = [prev_node, next_node, key, value] - prev_node[NEXT] = node - next_node[PREV] = node + next_node = prev_node.next_node + node = _Node(prev_node, next_node, key, value) + prev_node.next_node = node + next_node.prev_node = node cache[key] = node def move_node_to_front(node): - prev_node = node[PREV] - next_node = node[NEXT] - prev_node[NEXT] = next_node - next_node[PREV] = prev_node + prev_node = node.prev_node + next_node = node.next_node + prev_node.next_node = next_node + next_node.prev_node = prev_node prev_node = list_root - next_node = prev_node[NEXT] - node[PREV] = prev_node - node[NEXT] = next_node - prev_node[NEXT] = node - next_node[PREV] = node + next_node = prev_node.next_node + node.prev_node = prev_node + node.next_node = next_node + prev_node.next_node = node + next_node.prev_node = node def delete_node(node): - prev_node = node[PREV] - next_node = node[NEXT] - prev_node[NEXT] = next_node - next_node[PREV] = prev_node + prev_node = node.prev_node + next_node = node.next_node + prev_node.next_node = next_node + next_node.prev_node = prev_node @synchronized def cache_get(key, default=None): node = cache.get(key, None) if node is not None: move_node_to_front(node) - return node[VALUE] + return node.value else: return default @@ -93,25 +102,25 @@ class LruCache(object): node = cache.get(key, None) if node is not None: move_node_to_front(node) - node[VALUE] = value + node.value = value else: add_node(key, value) if len(cache) > max_size: - todelete = list_root[PREV] + todelete = list_root.prev_node delete_node(todelete) - cache.pop(todelete[KEY], None) + cache.pop(todelete.key, None) @synchronized def cache_set_default(key, value): node = cache.get(key, None) if node is not None: - return node[VALUE] + return node.value else: add_node(key, value) if len(cache) > max_size: - todelete = list_root[PREV] + todelete = list_root.prev_node delete_node(todelete) - cache.pop(todelete[KEY], None) + cache.pop(todelete.key, None) return value @synchronized @@ -119,8 +128,8 @@ class LruCache(object): node = cache.get(key, None) if node: delete_node(node) - cache.pop(node[KEY], None) - return node[VALUE] + cache.pop(node.key, None) + return node.value else: return default @@ -137,8 +146,8 @@ class LruCache(object): @synchronized def cache_clear(): - list_root[NEXT] = list_root - list_root[PREV] = list_root + list_root.next_node = list_root + list_root.prev_node = list_root cache.clear() @synchronized From d531ebcb57de61bad0ac2e4231280d41d8db4404 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 22 Mar 2016 18:02:36 +0000 Subject: [PATCH 14/35] Key StateHandler._state_cache off of state groups --- synapse/state.py | 77 ++++++++++++++++++------------------------------ 1 file changed, 29 insertions(+), 48 deletions(-) diff --git a/synapse/state.py b/synapse/state.py index 9d90a437d..14c043001 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -90,18 +90,8 @@ class StateHandler(object): """ event_ids = yield self.store.get_latest_event_ids_in_room(room_id) - cache = None - if self._state_cache is not None: - cache = self._state_cache.get(frozenset(event_ids), None) - - if cache: - cache.ts = self.clock.time_msec() - - event_dict = yield self.store.get_events(cache.state.values()) - state = {(e.type, e.state_key): e for e in event_dict.values()} - else: - res = yield self.resolve_state_groups(room_id, event_ids) - state = res[1] + res = yield self.resolve_state_groups(room_id, event_ids) + state = res[1] if event_type: defer.returnValue(state.get((event_type, state_key))) @@ -193,8 +183,33 @@ class StateHandler(object): """ logger.debug("resolve_state_groups event_ids %s", event_ids) + state_groups = yield self.store.get_state_groups( + room_id, event_ids + ) + + logger.debug( + "resolve_state_groups state_groups %s", + state_groups.keys() + ) + + group_names = frozenset(state_groups.keys()) + if len(group_names) == 1: + name, state_list = state_groups.items().pop() + state = { + (e.type, e.state_key): e + for e in state_list + } + prev_state = state.get((event_type, state_key), None) + if prev_state: + prev_state = prev_state.event_id + prev_states = [prev_state] + else: + prev_states = [] + + defer.returnValue((name, state, prev_states)) + if self._state_cache is not None: - cache = self._state_cache.get(frozenset(event_ids), None) + cache = self._state_cache.get(group_names, None) if cache and cache.state_group: cache.ts = self.clock.time_msec() @@ -211,40 +226,6 @@ class StateHandler(object): (cache.state_group, state, prev_states) ) - state_groups = yield self.store.get_state_groups( - room_id, event_ids - ) - - logger.debug( - "resolve_state_groups state_groups %s", - state_groups.keys() - ) - - group_names = set(state_groups.keys()) - if len(group_names) == 1: - name, state_list = state_groups.items().pop() - state = { - (e.type, e.state_key): e - for e in state_list - } - prev_state = state.get((event_type, state_key), None) - if prev_state: - prev_state = prev_state.event_id - prev_states = [prev_state] - else: - prev_states = [] - - if self._state_cache is not None: - cache = _StateCacheEntry( - state={key: event.event_id for key, event in state.items()}, - state_group=name, - ts=self.clock.time_msec() - ) - - self._state_cache[frozenset(event_ids)] = cache - - defer.returnValue((name, state, prev_states)) - new_state, prev_states = self._resolve_events( state_groups.values(), event_type, state_key ) @@ -256,7 +237,7 @@ class StateHandler(object): ts=self.clock.time_msec() ) - self._state_cache[frozenset(event_ids)] = cache + self._state_cache[group_names] = cache defer.returnValue((None, new_state, prev_states)) From 9e2e994395327956f846113566fd18c01f12441a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Mar 2016 09:28:07 +0000 Subject: [PATCH 15/35] Reduce cache size --- synapse/state.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/state.py b/synapse/state.py index 14c043001..41d32e664 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -39,7 +39,7 @@ KeyStateTuple = namedtuple("KeyStateTuple", ("context", "type", "state_key")) CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.1)) -SIZE_OF_CACHE = int(5000 * CACHE_SIZE_FACTOR) +SIZE_OF_CACHE = int(1000 * CACHE_SIZE_FACTOR) EVICTION_TIMEOUT_SECONDS = 60 * 60 From b6507869cdd5da18117dbbd0fbf78f4bdd4391f7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Mar 2016 10:32:10 +0000 Subject: [PATCH 16/35] Make get_invites return RoomsForUser --- synapse/push/__init__.py | 2 +- synapse/storage/roommember.py | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/synapse/push/__init__.py b/synapse/push/__init__.py index 65ef1b68a..296c4447e 100644 --- a/synapse/push/__init__.py +++ b/synapse/push/__init__.py @@ -317,7 +317,7 @@ class Pusher(object): @defer.inlineCallbacks def _get_badge_count(self): invites, joins = yield defer.gatherResults([ - self.store.get_invites_for_user(self.user_id), + self.store.get_invited_rooms_for_user(self.user_id), self.store.get_rooms_for_user(self.user_id), ], consumeErrors=True) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 0cd89260f..430b49c12 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -115,19 +115,17 @@ class RoomMemberStore(SQLBaseStore): ).addCallback(self._get_events) @cached() - def get_invites_for_user(self, user_id): - """ Get all the invite events for a user + def get_invited_rooms_for_user(self, user_id): + """ Get all the rooms the user is invited to Args: user_id (str): The user ID. Returns: - A deferred list of event objects. + A deferred list of RoomsForUser. """ return self.get_rooms_for_user_where_membership_is( user_id, [Membership.INVITE] - ).addCallback(lambda invites: self._get_events([ - invite.event_id for invite in invites - ])) + ) def get_leave_and_ban_events_for_user(self, user_id): """ Get all the leave events for a user From 34473a9c7f3e42db5154d2558e737fcab2546a81 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Mar 2016 10:42:19 +0000 Subject: [PATCH 17/35] Don't require alias in public room list. Rooms now no longer require an alias to be published. Also, changes the way we pull out state of each room to not require fetching all state events. --- synapse/handlers/room.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 051468989..a07c0ee43 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -946,53 +946,52 @@ class RoomListHandler(BaseHandler): @defer.inlineCallbacks def handle_room(room_id): aliases = yield self.store.get_aliases_for_room(room_id) - if not aliases: - defer.returnValue(None) - state = yield self.state_handler.get_current_state(room_id) + def get_state(etype, state_key): + return self.state_handler.get_current_state(room_id, etype, state_key) - result = {"aliases": aliases, "room_id": room_id} + result = {"room_id": room_id} + if aliases: + result["aliases"] = aliases - name_event = state.get((EventTypes.Name, ""), None) + name_event = yield get_state(EventTypes.Name, "") if name_event: name = name_event.content.get("name", None) if name: result["name"] = name - topic_event = state.get((EventTypes.Topic, ""), None) + topic_event = yield get_state(EventTypes.Topic, "") if topic_event: topic = topic_event.content.get("topic", None) if topic: result["topic"] = topic - canonical_event = state.get((EventTypes.CanonicalAlias, ""), None) + canonical_event = yield get_state(EventTypes.CanonicalAlias, "") if canonical_event: canonical_alias = canonical_event.content.get("alias", None) if canonical_alias: result["canonical_alias"] = canonical_alias - visibility_event = state.get((EventTypes.RoomHistoryVisibility, ""), None) + visibility_event = yield get_state(EventTypes.RoomHistoryVisibility, "") visibility = None if visibility_event: visibility = visibility_event.content.get("history_visibility", None) result["world_readable"] = visibility == "world_readable" - guest_event = state.get((EventTypes.GuestAccess, ""), None) + guest_event = yield get_state(EventTypes.GuestAccess, "") guest = None if guest_event: guest = guest_event.content.get("guest_access", None) result["guest_can_join"] = guest == "can_join" - avatar_event = state.get(("m.room.avatar", ""), None) + avatar_event = yield get_state("m.room.avatar", "") if avatar_event: avatar_url = avatar_event.content.get("url", None) if avatar_url: result["avatar_url"] = avatar_url - result["num_joined_members"] = sum( - 1 for (event_type, _), ev in state.items() - if event_type == EventTypes.Member and ev.membership == Membership.JOIN - ) + joined_users = yield self.store.get_users_in_room(room_id) + result["num_joined_members"] = len(joined_users) defer.returnValue(result) From 8b0dfc9fc4e98ea9c029c5f7b49efaf011ce1979 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Mar 2016 11:37:58 +0000 Subject: [PATCH 18/35] Don't cache events in get_current_state_for_key --- synapse/storage/events.py | 4 ++-- synapse/storage/state.py | 16 +++++++++------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index e444b64ce..7bcddf1f3 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -157,7 +157,7 @@ class EventsStore(SQLBaseStore): # We purposefully do this first since if we include a `current_state` # key, we *want* to update the `current_state_events` table if current_state: - txn.call_after(self.get_current_state_for_key.invalidate_all) + txn.call_after(self._get_current_state_for_key.invalidate_all) txn.call_after(self.get_rooms_for_user.invalidate_all) txn.call_after(self.get_users_in_room.invalidate, (event.room_id,)) txn.call_after(self.get_joined_hosts_for_room.invalidate, (event.room_id,)) @@ -441,7 +441,7 @@ class EventsStore(SQLBaseStore): for event, _ in state_events_and_contexts: if not context.rejected: txn.call_after( - self.get_current_state_for_key.invalidate, + self._get_current_state_for_key.invalidate, (event.room_id, event.type, event.state_key,) ) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index f06c734c4..eab2c5a8c 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -14,9 +14,7 @@ # limitations under the License. from ._base import SQLBaseStore -from synapse.util.caches.descriptors import ( - cached, cachedInlineCallbacks, cachedList -) +from synapse.util.caches.descriptors import cached, cachedList from twisted.internet import defer @@ -155,8 +153,14 @@ class StateStore(SQLBaseStore): events = yield self._get_events(event_ids, get_prev_content=False) defer.returnValue(events) - @cachedInlineCallbacks(num_args=3) + @defer.inlineCallbacks def get_current_state_for_key(self, room_id, event_type, state_key): + event_ids = yield self._get_current_state_for_key(room_id, event_type, state_key) + events = yield self._get_events(event_ids, get_prev_content=False) + defer.returnValue(events) + + @cached(num_args=3) + def _get_current_state_for_key(self, room_id, event_type, state_key): def f(txn): sql = ( "SELECT event_id FROM current_state_events" @@ -167,9 +171,7 @@ class StateStore(SQLBaseStore): txn.execute(sql, args) results = txn.fetchall() return [r[0] for r in results] - event_ids = yield self.runInteraction("get_current_state_for_key", f) - events = yield self._get_events(event_ids, get_prev_content=False) - defer.returnValue(events) + return self.runInteraction("get_current_state_for_key", f) def _get_state_groups_from_groups(self, groups, types): """Returns dictionary state_group -> (dict of (type, state_key) -> event id) From d87a846ebceb81f78660c4900126eff6e3998b8a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Mar 2016 11:42:50 +0000 Subject: [PATCH 19/35] Don't cache events in get_recent_events_for_room --- synapse/storage/stream.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 7f4a82752..cf84938be 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -36,7 +36,7 @@ what sort order was used: from twisted.internet import defer from ._base import SQLBaseStore -from synapse.util.caches.descriptors import cachedInlineCallbacks +from synapse.util.caches.descriptors import cached from synapse.api.constants import EventTypes from synapse.types import RoomStreamToken from synapse.util.logcontext import preserve_fn @@ -465,9 +465,25 @@ class StreamStore(SQLBaseStore): defer.returnValue((events, token)) - @cachedInlineCallbacks(num_args=4) + @defer.inlineCallbacks def get_recent_events_for_room(self, room_id, limit, end_token, from_token=None): + rows, token = yield self.get_recent_event_ids_for_room( + room_id, limit, end_token, from_token + ) + logger.debug("stream before") + events = yield self._get_events( + [r["event_id"] for r in rows], + get_prev_content=True + ) + logger.debug("stream after") + + self._set_before_and_after(events, rows) + + defer.returnValue((events, token)) + + @cached(num_args=4) + def get_recent_event_ids_for_room(self, room_id, limit, end_token, from_token=None): end_token = RoomStreamToken.parse_stream_token(end_token) if from_token is None: @@ -517,21 +533,10 @@ class StreamStore(SQLBaseStore): return rows, token - rows, token = yield self.runInteraction( + return self.runInteraction( "get_recent_events_for_room", get_recent_events_for_room_txn ) - logger.debug("stream before") - events = yield self._get_events( - [r["event_id"] for r in rows], - get_prev_content=True - ) - logger.debug("stream after") - - self._set_before_and_after(events, rows) - - defer.returnValue((events, token)) - @defer.inlineCallbacks def get_room_events_max_id(self, direction='f'): token = yield self._stream_id_gen.get_max_token() From 0677fc1c4e48fb0b91a2f91b348d16e5ce676125 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Mar 2016 13:24:27 +0000 Subject: [PATCH 20/35] Comment --- synapse/handlers/room.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index a07c0ee43..25225ea1c 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -947,6 +947,9 @@ class RoomListHandler(BaseHandler): def handle_room(room_id): aliases = yield self.store.get_aliases_for_room(room_id) + # We pull each bit of state out indvidually to avoid pulling the + # full state into memory. Due to how the caching works this should + # be fairly quick, even if not originally in the cache. def get_state(etype, state_key): return self.state_handler.get_current_state(room_id, etype, state_key) From b2802a1351cf5dbfb68d0e0f96ad9fb16df98fe8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Mar 2016 13:45:57 +0000 Subject: [PATCH 21/35] Ensure published rooms have public join rules --- synapse/handlers/room.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 25225ea1c..7062414ad 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -953,6 +953,13 @@ class RoomListHandler(BaseHandler): def get_state(etype, state_key): return self.state_handler.get_current_state(room_id, etype, state_key) + # Double check that this is actually a public room. + join_rules_event = yield get_state(EventTypes.JoinRules, "") + if join_rules_event: + join_rule = join_rules_event.content.get("join_rule", None) + if join_rule and join_rule != JoinRules.PUBLIC: + defer.returnValue(None) + result = {"room_id": room_id} if aliases: result["aliases"] = aliases From 84afeb41f32acdab22036b8c1efbb402eef31cd7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Mar 2016 13:49:10 +0000 Subject: [PATCH 22/35] Ensure all old public rooms have aliases --- synapse/handlers/room.py | 9 ++++---- .../storage/schema/delta/30/public_rooms.sql | 21 +++++++++++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) create mode 100644 synapse/storage/schema/delta/30/public_rooms.sql diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 7062414ad..d5c56ce0d 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -119,7 +119,8 @@ class RoomCreationHandler(BaseHandler): invite_3pid_list = config.get("invite_3pid", []) - is_public = config.get("visibility", None) == "public" + visibility = config.get("visibility", None) + is_public = visibility == "public" # autogen room IDs and try to create it. We may clash, so just # try a few times till one goes through, giving up eventually. @@ -155,9 +156,9 @@ class RoomCreationHandler(BaseHandler): preset_config = config.get( "preset", - RoomCreationPreset.PUBLIC_CHAT - if is_public - else RoomCreationPreset.PRIVATE_CHAT + RoomCreationPreset.PRIVATE_CHAT + if visibility == "private" + else RoomCreationPreset.PUBLIC_CHAT ) raw_initial_state = config.get("initial_state", []) diff --git a/synapse/storage/schema/delta/30/public_rooms.sql b/synapse/storage/schema/delta/30/public_rooms.sql new file mode 100644 index 000000000..a48604faa --- /dev/null +++ b/synapse/storage/schema/delta/30/public_rooms.sql @@ -0,0 +1,21 @@ +/* Copyright 2016 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/* This release removes the restriction that published rooms must have an alias, + * so we go back and ensure the only 'public' rooms are ones with an alias.*/ +UPDATE rooms SET is_public = 0 WHERE is_public = 1 AND room_id not in ( + SELECT room_id FROM room_aliases +); From 0c1a27b7877c08643ced4bdabe9843d69b0bcea1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Mar 2016 14:10:49 +0000 Subject: [PATCH 23/35] SQLite and postgres doesn't share a true literal --- synapse/storage/schema/delta/30/public_rooms.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/schema/delta/30/public_rooms.sql b/synapse/storage/schema/delta/30/public_rooms.sql index a48604faa..3400898ed 100644 --- a/synapse/storage/schema/delta/30/public_rooms.sql +++ b/synapse/storage/schema/delta/30/public_rooms.sql @@ -16,6 +16,6 @@ /* This release removes the restriction that published rooms must have an alias, * so we go back and ensure the only 'public' rooms are ones with an alias.*/ -UPDATE rooms SET is_public = 0 WHERE is_public = 1 AND room_id not in ( +UPDATE rooms SET is_public = (1 = 0) WHERE is_public = (1 = 1) AND room_id not in ( SELECT room_id FROM room_aliases ); From b2757655455fb2bf485c66affeb5a294eb9459c2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Mar 2016 14:15:32 +0000 Subject: [PATCH 24/35] Comment about weird SQL --- synapse/storage/schema/delta/30/public_rooms.sql | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/storage/schema/delta/30/public_rooms.sql b/synapse/storage/schema/delta/30/public_rooms.sql index 3400898ed..f09db4faa 100644 --- a/synapse/storage/schema/delta/30/public_rooms.sql +++ b/synapse/storage/schema/delta/30/public_rooms.sql @@ -15,7 +15,9 @@ /* This release removes the restriction that published rooms must have an alias, - * so we go back and ensure the only 'public' rooms are ones with an alias.*/ + * so we go back and ensure the only 'public' rooms are ones with an alias. + * We use (1 = 0) and (1 = 1) so that it works in both postgres and sqlite + */ UPDATE rooms SET is_public = (1 = 0) WHERE is_public = (1 = 1) AND room_id not in ( SELECT room_id FROM room_aliases ); From 75daede92f041500347a5f446229be5ca50c2b8e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 22 Mar 2016 18:22:52 +0000 Subject: [PATCH 25/35] String intern --- synapse/storage/state.py | 12 +++++++++--- synapse/util/caches/__init__.py | 8 ++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index eab2c5a8c..1982b1c60 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -15,6 +15,7 @@ from ._base import SQLBaseStore from synapse.util.caches.descriptors import cached, cachedList +from synapse.util.caches import intern_string from twisted.internet import defer @@ -155,7 +156,9 @@ class StateStore(SQLBaseStore): @defer.inlineCallbacks def get_current_state_for_key(self, room_id, event_type, state_key): - event_ids = yield self._get_current_state_for_key(room_id, event_type, state_key) + event_ids = yield self._get_current_state_for_key( + room_id, intern_string(event_type), intern_string(state_key) + ) events = yield self._get_events(event_ids, get_prev_content=False) defer.returnValue(events) @@ -202,7 +205,7 @@ class StateStore(SQLBaseStore): results = {} for row in rows: - key = (row["type"], row["state_key"]) + key = (intern_string(row["type"]), intern_string(row["state_key"])) results.setdefault(row["state_group"], {})[key] = row["event_id"] return results @@ -393,7 +396,10 @@ class StateStore(SQLBaseStore): # cache absence of the key, on the assumption that if we've # explicitly asked for some types then we will probably ask # for them again. - state_dict = {key: None for key in types} + state_dict = { + (intern_string(etype), intern_string(state_key)): None + for (etype, state_key) in types + } state_dict.update(results[group]) results[group] = state_dict else: diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py index 1a1490419..9d450fade 100644 --- a/synapse/util/caches/__init__.py +++ b/synapse/util/caches/__init__.py @@ -14,6 +14,7 @@ # limitations under the License. import synapse.metrics +from lrucache import LruCache DEBUG_CACHES = False @@ -25,3 +26,10 @@ cache_counter = metrics.register_cache( lambda: {(name,): len(caches_by_name[name]) for name in caches_by_name.keys()}, labels=["name"], ) + +_string_cache = LruCache(5000) +caches_by_name["string_cache"] = _string_cache + + +def intern_string(string): + return _string_cache.setdefault(string, string) From fe9794706ab817fcedc17f99693eb0823b339d93 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Mar 2016 14:58:08 +0000 Subject: [PATCH 26/35] Intern type and state_key on events --- synapse/events/__init__.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index abed6b5e6..2ceac19ad 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -14,6 +14,7 @@ # limitations under the License. from synapse.util.frozenutils import freeze +from synapse.util.caches import intern_string # Whether we should use frozen_dict in FrozenEvent. Using frozen_dicts prevents @@ -140,6 +141,12 @@ class FrozenEvent(EventBase): unsigned = dict(event_dict.pop("unsigned", {})) + # We intern these strings because they turn up a lot (especially when + # caching). + event_dict["type"] = intern_string(event_dict["type"]) + if "state_key" in event_dict: + event_dict["state_key"] = intern_string(event_dict["state_key"]) + if USE_FROZEN_DICTS: frozen_dict = freeze(event_dict) else: From f96526ffc23fdd99ab47abda67fb579a1ad764f9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Mar 2016 15:01:05 +0000 Subject: [PATCH 27/35] Intern sender, event_id and room_id in events --- synapse/events/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 2ceac19ad..63004eaf0 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -146,6 +146,11 @@ class FrozenEvent(EventBase): event_dict["type"] = intern_string(event_dict["type"]) if "state_key" in event_dict: event_dict["state_key"] = intern_string(event_dict["state_key"]) + if "sender" in event_dict: + event_dict["sender"] = intern_string(event_dict["sender"]) + + event_dict["event_id"] = intern(event_dict["event_id"].encode('ascii')) + event_dict["room_id"] = intern(event_dict["room_id"].encode('ascii')) if USE_FROZEN_DICTS: frozen_dict = freeze(event_dict) From acdfef7b1443a8260c43e31e9944b74dfdf286dc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Mar 2016 16:13:05 +0000 Subject: [PATCH 28/35] Intern all the things --- synapse/events/__init__.py | 11 +---- synapse/federation/federation_client.py | 1 + synapse/federation/transport/server.py | 28 ++++++------ synapse/http/server.py | 10 +++-- synapse/storage/_base.py | 3 +- synapse/storage/receipts.py | 21 +++++---- synapse/storage/state.py | 10 ++--- synapse/util/caches/__init__.py | 58 ++++++++++++++++++++++++- 8 files changed, 97 insertions(+), 45 deletions(-) diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 63004eaf0..23f8b612a 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -14,7 +14,7 @@ # limitations under the License. from synapse.util.frozenutils import freeze -from synapse.util.caches import intern_string +from synapse.util.caches import intern_dict # Whether we should use frozen_dict in FrozenEvent. Using frozen_dicts prevents @@ -143,14 +143,7 @@ class FrozenEvent(EventBase): # We intern these strings because they turn up a lot (especially when # caching). - event_dict["type"] = intern_string(event_dict["type"]) - if "state_key" in event_dict: - event_dict["state_key"] = intern_string(event_dict["state_key"]) - if "sender" in event_dict: - event_dict["sender"] = intern_string(event_dict["sender"]) - - event_dict["event_id"] = intern(event_dict["event_id"].encode('ascii')) - event_dict["room_id"] = intern(event_dict["room_id"].encode('ascii')) + event_dict = intern_dict(event_dict) if USE_FROZEN_DICTS: frozen_dict = freeze(event_dict) diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 83c1f4658..37ee469fa 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -418,6 +418,7 @@ class FederationClient(FederationBase): "Failed to make_%s via %s: %s", membership, destination, e.message ) + raise raise RuntimeError("Failed to send to any server.") diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 208bff8d4..d65a7893d 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -175,7 +175,7 @@ class BaseFederationServlet(object): class FederationSendServlet(BaseFederationServlet): - PATH = "/send/([^/]*)/" + PATH = "/send/(?P[^/]*)/" def __init__(self, handler, server_name, **kwargs): super(FederationSendServlet, self).__init__( @@ -250,7 +250,7 @@ class FederationPullServlet(BaseFederationServlet): class FederationEventServlet(BaseFederationServlet): - PATH = "/event/([^/]*)/" + PATH = "/event/(?P[^/]*)/" # This is when someone asks for a data item for a given server data_id pair. def on_GET(self, origin, content, query, event_id): @@ -258,7 +258,7 @@ class FederationEventServlet(BaseFederationServlet): class FederationStateServlet(BaseFederationServlet): - PATH = "/state/([^/]*)/" + PATH = "/state/(?P[^/]*)/" # This is when someone asks for all data for a given context. def on_GET(self, origin, content, query, context): @@ -270,7 +270,7 @@ class FederationStateServlet(BaseFederationServlet): class FederationBackfillServlet(BaseFederationServlet): - PATH = "/backfill/([^/]*)/" + PATH = "/backfill/(?P[^/]*)/" def on_GET(self, origin, content, query, context): versions = query["v"] @@ -285,7 +285,7 @@ class FederationBackfillServlet(BaseFederationServlet): class FederationQueryServlet(BaseFederationServlet): - PATH = "/query/([^/]*)" + PATH = "/query/(?P[^/]*)" # This is when we receive a server-server Query def on_GET(self, origin, content, query, query_type): @@ -296,7 +296,7 @@ class FederationQueryServlet(BaseFederationServlet): class FederationMakeJoinServlet(BaseFederationServlet): - PATH = "/make_join/([^/]*)/([^/]*)" + PATH = "/make_join/(?P[^/]*)/(?P[^/]*)" @defer.inlineCallbacks def on_GET(self, origin, content, query, context, user_id): @@ -305,7 +305,7 @@ class FederationMakeJoinServlet(BaseFederationServlet): class FederationMakeLeaveServlet(BaseFederationServlet): - PATH = "/make_leave/([^/]*)/([^/]*)" + PATH = "/make_leave/(?P[^/]*)/(?P[^/]*)" @defer.inlineCallbacks def on_GET(self, origin, content, query, context, user_id): @@ -314,7 +314,7 @@ class FederationMakeLeaveServlet(BaseFederationServlet): class FederationSendLeaveServlet(BaseFederationServlet): - PATH = "/send_leave/([^/]*)/([^/]*)" + PATH = "/send_leave/(?P[^/]*)/(?P[^/]*)" @defer.inlineCallbacks def on_PUT(self, origin, content, query, room_id, txid): @@ -323,14 +323,14 @@ class FederationSendLeaveServlet(BaseFederationServlet): class FederationEventAuthServlet(BaseFederationServlet): - PATH = "/event_auth/([^/]*)/([^/]*)" + PATH = "/event_auth(?P[^/]*)/(?P[^/]*)" def on_GET(self, origin, content, query, context, event_id): return self.handler.on_event_auth(origin, context, event_id) class FederationSendJoinServlet(BaseFederationServlet): - PATH = "/send_join/([^/]*)/([^/]*)" + PATH = "/send_join/(?P[^/]*)/(?P[^/]*)" @defer.inlineCallbacks def on_PUT(self, origin, content, query, context, event_id): @@ -341,7 +341,7 @@ class FederationSendJoinServlet(BaseFederationServlet): class FederationInviteServlet(BaseFederationServlet): - PATH = "/invite/([^/]*)/([^/]*)" + PATH = "/invite/(?P[^/]*)/(?P[^/]*)" @defer.inlineCallbacks def on_PUT(self, origin, content, query, context, event_id): @@ -352,7 +352,7 @@ class FederationInviteServlet(BaseFederationServlet): class FederationThirdPartyInviteExchangeServlet(BaseFederationServlet): - PATH = "/exchange_third_party_invite/([^/]*)" + PATH = "/exchange_third_party_invite/(?P[^/]*)" @defer.inlineCallbacks def on_PUT(self, origin, content, query, room_id): @@ -381,7 +381,7 @@ class FederationClientKeysClaimServlet(BaseFederationServlet): class FederationQueryAuthServlet(BaseFederationServlet): - PATH = "/query_auth/([^/]*)/([^/]*)" + PATH = "/query_auth/(?P[^/]*)/(?P[^/]*)" @defer.inlineCallbacks def on_POST(self, origin, content, query, context, event_id): @@ -394,7 +394,7 @@ class FederationQueryAuthServlet(BaseFederationServlet): class FederationGetMissingEventsServlet(BaseFederationServlet): # TODO(paul): Why does this path alone end with "/?" optional? - PATH = "/get_missing_events/([^/]*)/?" + PATH = "/get_missing_events/(?P[^/]*)/?" @defer.inlineCallbacks def on_POST(self, origin, content, query, room_id): diff --git a/synapse/http/server.py b/synapse/http/server.py index b17b190ee..b82196fd5 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -18,6 +18,7 @@ from synapse.api.errors import ( cs_exception, SynapseError, CodeMessageException, UnrecognizedRequestError, Codes ) from synapse.util.logcontext import LoggingContext, PreserveLoggingContext +from synapse.util.caches import intern_dict import synapse.metrics import synapse.events @@ -229,11 +230,12 @@ class JsonResource(HttpServer, resource.Resource): else: servlet_classname = "%r" % callback - args = [ - urllib.unquote(u).decode("UTF-8") if u else u for u in m.groups() - ] + kwargs = intern_dict({ + name: urllib.unquote(value).decode("UTF-8") if value else value + for name, value in m.groupdict().items() + }) - callback_return = yield callback(request, *args) + callback_return = yield callback(request, **kwargs) if callback_return is not None: code, response = callback_return self._send_response(request, code, response) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 583b77a83..b75b79df3 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -18,6 +18,7 @@ from synapse.api.errors import StoreError from synapse.util.logcontext import LoggingContext, PreserveLoggingContext from synapse.util.caches.dictionary_cache import DictionaryCache from synapse.util.caches.descriptors import Cache +from synapse.util.caches import intern_dict import synapse.metrics @@ -350,7 +351,7 @@ class SQLBaseStore(object): """ col_headers = list(column[0] for column in cursor.description) results = list( - dict(zip(col_headers, row)) for row in cursor.fetchall() + intern_dict(dict(zip(col_headers, row))) for row in cursor.fetchall() ) return results diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index dbc074d6b..6b9d848ea 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -62,18 +62,17 @@ class ReceiptsStore(SQLBaseStore): @cachedInlineCallbacks(num_args=2) def get_receipts_for_user(self, user_id, receipt_type): - def f(txn): - sql = ( - "SELECT room_id,event_id " - "FROM receipts_linearized " - "WHERE user_id = ? AND receipt_type = ? " - ) - txn.execute(sql, (user_id, receipt_type)) - return txn.fetchall() + rows = yield self._simple_select_list( + table="receipts_linearized", + keyvalues={ + "user_id": user_id, + "receipt_type": receipt_type, + }, + retcols=("room_id", "event_id"), + desc="get_receipts_for_user", + ) - defer.returnValue(dict( - (yield self.runInteraction("get_receipts_for_user", f)) - )) + defer.returnValue({row["room_id"]: row["event_id"] for row in rows}) @defer.inlineCallbacks def get_linearized_receipts_for_rooms(self, room_ids, to_key, from_key=None): diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 1982b1c60..03eecbbbb 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -156,9 +156,7 @@ class StateStore(SQLBaseStore): @defer.inlineCallbacks def get_current_state_for_key(self, room_id, event_type, state_key): - event_ids = yield self._get_current_state_for_key( - room_id, intern_string(event_type), intern_string(state_key) - ) + event_ids = yield self._get_current_state_for_key(room_id, event_type, state_key) events = yield self._get_events(event_ids, get_prev_content=False) defer.returnValue(events) @@ -205,7 +203,7 @@ class StateStore(SQLBaseStore): results = {} for row in rows: - key = (intern_string(row["type"]), intern_string(row["state_key"])) + key = (row["type"], row["state_key"]) results.setdefault(row["state_group"], {})[key] = row["event_id"] return results @@ -286,7 +284,9 @@ class StateStore(SQLBaseStore): desc="_get_state_group_for_events", ) - defer.returnValue({row["event_id"]: row["state_group"] for row in rows}) + defer.returnValue({ + intern(row["event_id"].encode('ascii')): row["state_group"] for row in rows + }) def _get_some_state_from_cache(self, group, types): """Checks if group is in cache. See `_get_state_for_groups` diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py index 9d450fade..838cec45f 100644 --- a/synapse/util/caches/__init__.py +++ b/synapse/util/caches/__init__.py @@ -15,6 +15,9 @@ import synapse.metrics from lrucache import LruCache +import os + +CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.1)) DEBUG_CACHES = False @@ -27,9 +30,62 @@ cache_counter = metrics.register_cache( labels=["name"], ) -_string_cache = LruCache(5000) +_string_cache = LruCache(int(5000 * CACHE_SIZE_FACTOR)) caches_by_name["string_cache"] = _string_cache +KNOWN_KEYS = { + key: key for key in + ( + "auth_events", + "content", + "depth", + "event_id", + "hashes", + "origin", + "origin_server_ts", + "prev_events", + "room_id", + "sender", + "signatures", + "state_key", + "type", + "unsigned", + "user_id", + ) +} + + def intern_string(string): + """Takes a (potentially) unicode string and interns using custom cache + """ return _string_cache.setdefault(string, string) + + +def intern_dict(dictionary): + """Takes a dictionary and interns well known keys and their values + """ + return _intern_known_values({ + _intern_key(key): value for key, value in dictionary.items() + }) + + +def _intern_known_values(dictionary): + intern_str_keys = ("event_id", "room_id") + intern_unicode_keys = ("sender", "user_id", "type", "state_key") + + for key in intern_str_keys: + val = dictionary.get(key, None) + if val is not None: + dictionary[key] = intern(val.encode('ascii')) + + for key in intern_unicode_keys: + val = dictionary.get(key, None) + if val is not None: + dictionary[key] = intern_string(val) + + return dictionary + + +def _intern_key(key): + return KNOWN_KEYS.get(key, key) From 2f0180b09e2a2afeed418a5840ae6b4fffcb4be4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Mar 2016 16:29:46 +0000 Subject: [PATCH 29/35] Don't bother interning keys that are already interned --- synapse/storage/state.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 03eecbbbb..02cefdff2 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -284,9 +284,7 @@ class StateStore(SQLBaseStore): desc="_get_state_group_for_events", ) - defer.returnValue({ - intern(row["event_id"].encode('ascii')): row["state_group"] for row in rows - }) + defer.returnValue({row["event_id"]: row["state_group"] for row in rows}) def _get_some_state_from_cache(self, group, types): """Checks if group is in cache. See `_get_state_for_groups` From 8122ad7bab74b1a52188e350ca605033a9eca28e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Mar 2016 16:34:59 +0000 Subject: [PATCH 30/35] Simplify intern_dict --- synapse/util/caches/__init__.py | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py index 838cec45f..d53569ca4 100644 --- a/synapse/util/caches/__init__.py +++ b/synapse/util/caches/__init__.py @@ -65,27 +65,20 @@ def intern_string(string): def intern_dict(dictionary): """Takes a dictionary and interns well known keys and their values """ - return _intern_known_values({ - _intern_key(key): value for key, value in dictionary.items() - }) + return { + KNOWN_KEYS.get(key, key): _intern_known_values(key, value) + for key, value in dictionary.items() + } -def _intern_known_values(dictionary): +def _intern_known_values(key, value): intern_str_keys = ("event_id", "room_id") intern_unicode_keys = ("sender", "user_id", "type", "state_key") - for key in intern_str_keys: - val = dictionary.get(key, None) - if val is not None: - dictionary[key] = intern(val.encode('ascii')) + if key in intern_str_keys: + return intern(value.encode('ascii')) - for key in intern_unicode_keys: - val = dictionary.get(key, None) - if val is not None: - dictionary[key] = intern_string(val) + if key in intern_unicode_keys: + return intern_string(value) - return dictionary - - -def _intern_key(key): - return KNOWN_KEYS.get(key, key) + return value From 3b554bda267402fd43a9e462eccf4060077f37dc Mon Sep 17 00:00:00 2001 From: David Baker Date: Thu, 24 Mar 2016 13:19:39 +0000 Subject: [PATCH 31/35] Never notify for member events. This fixes https://github.com/vector-im/vector-web/issues/828 --- synapse/push/baserules.py | 41 ++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/synapse/push/baserules.py b/synapse/push/baserules.py index 86a2998bc..792af70eb 100644 --- a/synapse/push/baserules.py +++ b/synapse/push/baserules.py @@ -160,7 +160,27 @@ BASE_APPEND_OVRRIDE_RULES = [ 'actions': [ 'dont_notify', ] - } + }, + # Will we sometimes want to know about people joining and leaving? + # Perhaps: if so, this could be expanded upon. Seems the most usual case + # is that we don't though. We add this override rule so that even if + # the room rule is set to notify, we don't get notifications about + # join/leave/avatar/displayname events. + # See also: https://matrix.org/jira/browse/SYN-607 + { + 'rule_id': 'global/override/.m.rule.member_event', + 'conditions': [ + { + 'kind': 'event_match', + 'key': 'type', + 'pattern': 'm.room.member', + '_id': '_member', + } + ], + 'actions': [ + 'dont_notify' + ] + }, ] @@ -261,25 +281,6 @@ BASE_APPEND_UNDERRIDE_RULES = [ } ] }, - # This is too simple: https://matrix.org/jira/browse/SYN-607 - # Removing for now - # { - # 'rule_id': 'global/underride/.m.rule.member_event', - # 'conditions': [ - # { - # 'kind': 'event_match', - # 'key': 'type', - # 'pattern': 'm.room.member', - # '_id': '_member', - # } - # ], - # 'actions': [ - # 'notify', { - # 'set_tweak': 'highlight', - # 'value': False - # } - # ] - # }, { 'rule_id': 'global/underride/.m.rule.message', 'conditions': [ From 191c7bef6bbb80f66f66e95387940c3bb6b5a0cf Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Thu, 24 Mar 2016 17:47:31 +0000 Subject: [PATCH 32/35] Deduplicate identical /sync requests --- synapse/handlers/sync.py | 16 +++++++++- synapse/rest/client/v2_alpha/sync.py | 3 ++ synapse/util/caches/response_cache.py | 46 +++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 synapse/util/caches/response_cache.py diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 1f6fde8e8..48ab5707e 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -20,6 +20,7 @@ from synapse.api.constants import Membership, EventTypes from synapse.util import unwrapFirstError from synapse.util.logcontext import LoggingContext, preserve_fn from synapse.util.metrics import Measure +from synapse.util.caches.response_cache import ResponseCache from synapse.push.clientformat import format_push_rules_for_user from twisted.internet import defer @@ -35,6 +36,7 @@ SyncConfig = collections.namedtuple("SyncConfig", [ "user", "filter_collection", "is_guest", + "request_key", ]) @@ -136,8 +138,8 @@ class SyncHandler(BaseHandler): super(SyncHandler, self).__init__(hs) self.event_sources = hs.get_event_sources() self.clock = hs.get_clock() + self.response_cache = ResponseCache() - @defer.inlineCallbacks def wait_for_sync_for_user(self, sync_config, since_token=None, timeout=0, full_state=False): """Get the sync for a client if we have new data for it now. Otherwise @@ -146,7 +148,19 @@ class SyncHandler(BaseHandler): Returns: A Deferred SyncResult. """ + result = self.response_cache.get(sync_config.request_key) + if not result: + result = self.response_cache.set( + sync_config.request_key, + self._wait_for_sync_for_user( + sync_config, since_token, timeout, full_state + ) + ) + return result + @defer.inlineCallbacks + def _wait_for_sync_for_user(self, sync_config, since_token, timeout, + full_state): context = LoggingContext.current_context() if context: if since_token is None: diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py index de4a020ad..c5785d707 100644 --- a/synapse/rest/client/v2_alpha/sync.py +++ b/synapse/rest/client/v2_alpha/sync.py @@ -115,6 +115,8 @@ class SyncRestServlet(RestServlet): ) ) + request_key = (user, timeout, since, filter_id, full_state) + if filter_id: if filter_id.startswith('{'): try: @@ -134,6 +136,7 @@ class SyncRestServlet(RestServlet): user=user, filter_collection=filter, is_guest=requester.is_guest, + request_key=request_key, ) if since is not None: diff --git a/synapse/util/caches/response_cache.py b/synapse/util/caches/response_cache.py new file mode 100644 index 000000000..1c2e34426 --- /dev/null +++ b/synapse/util/caches/response_cache.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.util.async import ObservableDeferred + + +class ResponseCache(object): + """ + This caches a deferred response. Until the deferred completes it will be + returned from the cache. This means that if the client retries the request + while the response is still being computed, that original response will be + used rather than trying to compute a new response. + """ + + def __init__(self): + self.pending_result_cache = {} # Request that haven't finished yet. + + def get(self, key): + result = self.pending_result_cache.get(key) + if result is not None: + return result.observe() + else: + return None + + def set(self, key, deferred): + result = ObservableDeferred(deferred) + self.pending_result_cache[key] = result + + def remove(r): + self.pending_result_cache.pop(key, None) + return r + + result.addBoth(remove) + return result.observe() From 54a546091abdf70c740d1e59b025e79c44df7455 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Thu, 24 Mar 2016 18:02:10 +0000 Subject: [PATCH 33/35] Add a response cache for getting the public room list --- synapse/handlers/room.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index d5c56ce0d..133183a25 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -25,6 +25,7 @@ from synapse.api.constants import ( from synapse.api.errors import AuthError, StoreError, SynapseError, Codes from synapse.util import stringutils, unwrapFirstError from synapse.util.logcontext import preserve_context_over_fn +from synapse.util.caches.response_cache import ResponseCache from signedjson.sign import verify_signed_json from signedjson.key import decode_verify_key_bytes @@ -939,9 +940,18 @@ class RoomMemberHandler(BaseHandler): class RoomListHandler(BaseHandler): + def __init__(self, hs): + super(RoomListHandler, self).__init__(hs) + self.response_cache = ResponseCache() + + def get_public_room_list(self): + result = self.response_cache.get(()) + if not result: + result = self.response_cache.set((), self._get_public_room_list()) + return result @defer.inlineCallbacks - def get_public_room_list(self): + def _get_public_room_list(self): room_ids = yield self.store.get_public_room_ids() @defer.inlineCallbacks From 77cba688edb9216f5c578c931e96142722641b70 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Thu, 24 Mar 2016 18:02:37 +0000 Subject: [PATCH 34/35] Fix typo --- synapse/util/caches/response_cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/util/caches/response_cache.py b/synapse/util/caches/response_cache.py index 1c2e34426..be310ba32 100644 --- a/synapse/util/caches/response_cache.py +++ b/synapse/util/caches/response_cache.py @@ -25,7 +25,7 @@ class ResponseCache(object): """ def __init__(self): - self.pending_result_cache = {} # Request that haven't finished yet. + self.pending_result_cache = {} # Requests that haven't finished yet. def get(self, key): result = self.pending_result_cache.get(key) From ec0cf996c94cb11f2a9b51369b886fb275b26ee5 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 25 Mar 2016 23:38:19 +0000 Subject: [PATCH 35/35] typo --- synapse/replication/resource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/replication/resource.py b/synapse/replication/resource.py index 8c1ae0fbc..37a1d3960 100644 --- a/synapse/replication/resource.py +++ b/synapse/replication/resource.py @@ -76,7 +76,7 @@ class ReplicationResource(Resource): The response is a JSON object with keys for each stream with updates. Under each key is a JSON object with: - * "postion": The current position of the stream. + * "position": The current position of the stream. * "field_names": The names of the fields in each row. * "rows": The updates as an array of arrays.