From 51fb884c52c9dfc59bbaf188a8ac5dc01028f108 Mon Sep 17 00:00:00 2001 From: Bruno Bigras Date: Wed, 19 Apr 2017 17:32:00 -0400 Subject: [PATCH 001/139] Fix the system requirements list in README.rst --- README.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/README.rst b/README.rst index b9c854ad4..c92546124 100644 --- a/README.rst +++ b/README.rst @@ -84,6 +84,7 @@ Synapse Installation Synapse is the reference python/twisted Matrix homeserver implementation. System requirements: + - POSIX-compliant system (tested on Linux & OS X) - Python 2.7 - At least 1GB of free RAM if you want to join large public rooms like #matrix:matrix.org From 328378f9cb809e1428221dfaadd68bb9278b2123 Mon Sep 17 00:00:00 2001 From: Slipeer Date: Thu, 11 May 2017 11:42:08 +0300 Subject: [PATCH 002/139] Fix users claimed non-exclusively by an app service don't get notifications #2211 --- synapse/storage/appservice.py | 7 +++++-- synapse/storage/push_rule.py | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index 514570561..0e9e8d345 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -39,12 +39,15 @@ class ApplicationServiceStore(SQLBaseStore): def get_app_services(self): return self.services_cache - def get_if_app_services_interested_in_user(self, user_id): + def get_if_app_services_interested_in_user(self, user_id, exclusive=False): """Check if the user is one associated with an app service """ for service in self.services_cache: if service.is_interested_in_user(user_id): - return True + if exclusive: + return service.is_exclusive_user(user_id) + else: + return True return False def get_app_service_by_user_id(self, user_id): diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index 0a819d32c..65bad3fad 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -163,7 +163,7 @@ class PushRuleStore(SQLBaseStore): local_users_in_room = set( u for u in users_in_room if self.hs.is_mine_id(u) - and not self.get_if_app_services_interested_in_user(u) + and not self.get_if_app_services_interested_in_user(u, exclusive=True) ) # users in the room who have pushers need to get push rules run because From bfbc907cec96ce9a64730930f63ed400c1aa3b5b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 2 May 2017 10:40:31 +0100 Subject: [PATCH 003/139] Prefill state caches --- synapse/storage/_base.py | 8 ++++---- synapse/storage/events.py | 10 ++++++++-- synapse/storage/state.py | 8 ++++++++ 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index c659004e8..58b73af7d 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -60,12 +60,12 @@ class LoggingTransaction(object): object.__setattr__(self, "database_engine", database_engine) object.__setattr__(self, "after_callbacks", after_callbacks) - def call_after(self, callback, *args): + def call_after(self, callback, *args, **kwargs): """Call the given callback on the main twisted thread after the transaction has finished. Used to invalidate the caches on the correct thread. """ - self.after_callbacks.append((callback, args)) + self.after_callbacks.append((callback, args, kwargs)) def __getattr__(self, name): return getattr(self.txn, name) @@ -319,8 +319,8 @@ class SQLBaseStore(object): inner_func, *args, **kwargs ) finally: - for after_callback, after_args in after_callbacks: - after_callback(*after_args) + for after_callback, after_args, after_kwargs in after_callbacks: + after_callback(*after_args, **after_kwargs) defer.returnValue(result) @defer.inlineCallbacks diff --git a/synapse/storage/events.py b/synapse/storage/events.py index dbd63078c..0dffafd90 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -374,6 +374,7 @@ class EventsStore(SQLBaseStore): new_forward_extremeties=new_forward_extremeties, ) persist_event_counter.inc_by(len(chunk)) + for event, context in chunk: if context.app_service: origin_type = "local" @@ -387,6 +388,11 @@ class EventsStore(SQLBaseStore): event_counter.inc(event.type, origin_type, origin_entity) + for room_id, (_, _, new_state) in current_state_for_room.iteritems(): + self.get_current_state_ids.prefill( + (room_id, ), new_state + ) + @defer.inlineCallbacks def _calculate_new_extremeties(self, room_id, event_contexts, latest_event_ids): """Calculates the new forward extremeties for a room given events to @@ -545,7 +551,7 @@ class EventsStore(SQLBaseStore): if ev_id in events_to_insert } - defer.returnValue((to_delete, to_insert)) + defer.returnValue((to_delete, to_insert, current_state)) @defer.inlineCallbacks def get_event(self, event_id, check_redacted=True, @@ -698,7 +704,7 @@ class EventsStore(SQLBaseStore): def _update_current_state_txn(self, txn, state_delta_by_room): for room_id, current_state_tuple in state_delta_by_room.iteritems(): - to_delete, to_insert = current_state_tuple + to_delete, to_insert, _ = current_state_tuple txn.executemany( "DELETE FROM current_state_events WHERE event_id = ?", [(ev_id,) for ev_id in to_delete.itervalues()], diff --git a/synapse/storage/state.py b/synapse/storage/state.py index a16afa8df..1e1ce87e0 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -227,6 +227,14 @@ class StateStore(SQLBaseStore): ], ) + txn.call_after( + self._state_group_cache.update, + self._state_group_cache.sequence, + key=context.state_group, + value=context.current_state_ids, + full=True, + ) + self._simple_insert_many_txn( txn, table="event_to_state_groups", From e0d2f6d5b02dd208bc55434b5c2d386827486e9f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 2 May 2017 11:36:11 +0100 Subject: [PATCH 004/139] Add more granular event send metrics --- synapse/storage/events.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 0dffafd90..36574f78b 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -374,6 +374,18 @@ class EventsStore(SQLBaseStore): new_forward_extremeties=new_forward_extremeties, ) persist_event_counter.inc_by(len(chunk)) + for event, context in chunk: + if context.app_service: + origin_type = "local" + origin_entity = context.app_service.id + elif self.hs.is_mine_id(event.sender): + origin_type = "local" + origin_entity = "*client*" + else: + origin_type = "remote" + origin_entity = get_domain_from_id(event.sender) + + event_counter.inc(event.type, origin_type, origin_entity) for event, context in chunk: if context.app_service: From 871605f4e20cce3f093b2eae0f3d2ad7fb43a640 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 3 May 2017 09:56:05 +0100 Subject: [PATCH 005/139] Comments --- synapse/storage/events.py | 6 +++--- synapse/storage/state.py | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 36574f78b..5db7ec162 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -453,10 +453,10 @@ class EventsStore(SQLBaseStore): Assumes that we are only persisting events for one room at a time. Returns: - 2-tuple (to_delete, to_insert) where both are state dicts, i.e. - (type, state_key) -> event_id. `to_delete` are the entries to + 3-tuple (to_delete, to_insert, new_state) where both are state dicts, + i.e. (type, state_key) -> event_id. `to_delete` are the entries to first be deleted from current_state_events, `to_insert` are entries - to insert. + to insert. `new_state` is the full set of state. May return None if there are no changes to be applied. """ # Now we need to work out the different state sets for diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 1e1ce87e0..5d6f7dfa2 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -227,6 +227,9 @@ class StateStore(SQLBaseStore): ], ) + # Prefill the state group cache with this group. + # It's fine to use the sequence like this as the state group map + # is immutable. txn.call_after( self._state_group_cache.update, self._state_group_cache.sequence, From e4435b014e50a10ad89c201d6f91b6be35a9b02f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 3 May 2017 10:00:29 +0100 Subject: [PATCH 006/139] Update comment --- synapse/storage/state.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 5d6f7dfa2..03981f5d2 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -229,7 +229,8 @@ class StateStore(SQLBaseStore): # Prefill the state group cache with this group. # It's fine to use the sequence like this as the state group map - # is immutable. + # is immutable. (If the map wasn't immutable then this prefill could + # race with another update) txn.call_after( self._state_group_cache.update, self._state_group_cache.sequence, From 608b5a6317ce3797ff279f6d1a8a39f475b55736 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 May 2017 12:55:29 +0100 Subject: [PATCH 007/139] Take a copy before prefilling, as it may be a frozendict --- synapse/storage/state.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 03981f5d2..85acf2ad1 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -235,7 +235,7 @@ class StateStore(SQLBaseStore): self._state_group_cache.update, self._state_group_cache.sequence, key=context.state_group, - value=context.current_state_ids, + value=dict(context.current_state_ids), full=True, ) From 331570ea6f97d570cf2774cd0700eb588e9fb1d7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 May 2017 15:33:07 +0100 Subject: [PATCH 008/139] Remove spurious merge artifacts --- synapse/storage/events.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 5db7ec162..12dd74daa 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -387,19 +387,6 @@ class EventsStore(SQLBaseStore): event_counter.inc(event.type, origin_type, origin_entity) - for event, context in chunk: - if context.app_service: - origin_type = "local" - origin_entity = context.app_service.id - elif self.hs.is_mine_id(event.sender): - origin_type = "local" - origin_entity = "*client*" - else: - origin_type = "remote" - origin_entity = get_domain_from_id(event.sender) - - event_counter.inc(event.type, origin_type, origin_entity) - for room_id, (_, _, new_state) in current_state_for_room.iteritems(): self.get_current_state_ids.prefill( (room_id, ), new_state From f2a5b6dbfd0f21919a36f779026e041900b998a9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 May 2017 14:07:08 +0100 Subject: [PATCH 009/139] Speed up get_domain_from_id --- synapse/types.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/types.py b/synapse/types.py index c87ed813b..445bdcb4d 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -56,10 +56,10 @@ def create_requester(user_id, access_token_id=None, is_guest=False, def get_domain_from_id(string): - try: - return string.split(":", 1)[1] - except IndexError: + idx = string.find(":") + if idx == -1: raise SynapseError(400, "Invalid ID: %r" % (string,)) + return string[idx + 1:] class DomainSpecificString( From ec5c4499f4ab24445c6df7310007353b466020ce Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 May 2017 14:46:16 +0100 Subject: [PATCH 010/139] Make presence use cached users/hosts in room --- synapse/federation/transaction_queue.py | 2 +- synapse/handlers/presence.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 695f1a737..a15198e05 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -285,7 +285,7 @@ class TransactionQueue(object): Args: states (list(UserPresenceState)) """ - hosts_and_states = yield get_interested_remotes(self.store, states) + hosts_and_states = yield get_interested_remotes(self.store, states, self.state) for destinations, states in hosts_and_states: for destination in destinations: diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index f3707afcd..c7c0b0a1e 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -780,12 +780,12 @@ class PresenceHandler(object): # don't need to send to local clients here, as that is done as part # of the event stream/sync. # TODO: Only send to servers not already in the room. - user_ids = yield self.store.get_users_in_room(room_id) if self.is_mine(user): state = yield self.current_state_for_user(user.to_string()) self._push_to_remotes([state]) else: + user_ids = yield self.store.get_users_in_room(room_id) user_ids = filter(self.is_mine_id, user_ids) states = yield self.current_state_for_users(user_ids) @@ -1322,7 +1322,7 @@ def get_interested_parties(store, states): @defer.inlineCallbacks -def get_interested_remotes(store, states): +def get_interested_remotes(store, states, state_handler): """Given a list of presence states figure out which remote servers should be sent which. @@ -1345,7 +1345,7 @@ def get_interested_remotes(store, states): room_ids_to_states, users_to_states = yield get_interested_parties(store, states) for room_id, states in room_ids_to_states.iteritems(): - hosts = yield store.get_hosts_in_room(room_id) + hosts = yield state_handler.get_current_hosts_in_room(room_id) hosts_and_states.append((hosts, states)) for user_id, states in users_to_states.iteritems(): From 13f540ef1b94e6173bdd4f2d84d90e0948cf5bf2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 May 2017 14:07:24 +0100 Subject: [PATCH 011/139] Speed up get_joined_hosts --- synapse/handlers/room_member.py | 3 ++- synapse/storage/roommember.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index ab87632d9..1ca88517a 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -739,10 +739,11 @@ class RoomMemberHandler(BaseHandler): if len(current_state_ids) == 1 and create_event_id: defer.returnValue(self.hs.is_mine_id(create_event_id)) - for (etype, state_key), event_id in current_state_ids.items(): + for etype, state_key in current_state_ids: if etype != EventTypes.Member or not self.hs.is_mine_id(state_key): continue + event_id = current_state_ids[(etype, state_key)] event = yield self.store.get_event(event_id, allow_none=True) if not event: continue diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 2fa20bd87..404f3583e 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -534,7 +534,7 @@ class RoomMemberStore(SQLBaseStore): assert state_group is not None joined_hosts = set() - for (etype, state_key), event_id in current_state_ids.items(): + for etype, state_key in current_state_ids: if etype == EventTypes.Member: try: host = get_domain_from_id(state_key) @@ -545,6 +545,7 @@ class RoomMemberStore(SQLBaseStore): if host in joined_hosts: continue + event_id = current_state_ids[(etype, state_key)] event = yield self.get_event(event_id, allow_none=True) if event and event.content["membership"] == Membership.JOIN: joined_hosts.add(intern_string(host)) From ad53fc3cf49492fca55cd775f6ff6b2cd353f588 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 May 2017 13:40:01 +0100 Subject: [PATCH 012/139] Short circuit when we have delta ids --- synapse/storage/roommember.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 404f3583e..a5df1d46b 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -387,7 +387,9 @@ class RoomMemberStore(SQLBaseStore): state_group = object() return self._get_joined_users_from_context( - event.room_id, state_group, context.current_state_ids, event=event, + event.room_id, state_group, context.current_state_ids, + event=event, + context=context, ) def get_joined_users_from_state(self, room_id, state_group, state_ids): @@ -405,18 +407,33 @@ class RoomMemberStore(SQLBaseStore): @cachedInlineCallbacks(num_args=2, cache_context=True, iterable=True, max_entries=100000) def _get_joined_users_from_context(self, room_id, state_group, current_state_ids, - cache_context, event=None): + cache_context, event=None, context=None): # We don't use `state_group`, it's there so that we can cache based # on it. However, it's important that it's never None, since two current_states # with a state_group of None are likely to be different. # See bulk_get_push_rules_for_room for how we work around this. assert state_group is not None + users_in_room = {} member_event_ids = [ e_id for key, e_id in current_state_ids.iteritems() if key[0] == EventTypes.Member ] + if context is not None: + if context.prev_group and context.delta_ids: + prev_res = self._get_joined_users_from_context.cache.get( + (room_id, context.prev_group), None + ) + if prev_res and isinstance(prev_res, dict): + users_in_room = dict(prev_res) + member_event_ids = [ + e_id + for key, e_id in context.delta_ids.iteritems() + if key[0] == EventTypes.Member + ] + for etype, state_key in context.delta_ids: + users_in_room.pop(state_key, None) # We check if we have any of the member event ids in the event cache # before we ask the DB @@ -431,7 +448,6 @@ class RoomMemberStore(SQLBaseStore): ) missing_member_event_ids = [] - users_in_room = {} for event_id in member_event_ids: ev_entry = event_map.get(event_id) if ev_entry: From 85e8092ccab2b7a479b71225fc24e69a2f58f980 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 May 2017 10:03:09 +0100 Subject: [PATCH 013/139] Comment --- synapse/storage/roommember.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index a5df1d46b..0829ae5be 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -420,7 +420,12 @@ class RoomMemberStore(SQLBaseStore): for key, e_id in current_state_ids.iteritems() if key[0] == EventTypes.Member ] + if context is not None: + # If we have a context with a delta from a previous state group, + # check if we also have the result from the previous group in cache. + # If we do then we can reuse that result and simply update it with + # any membership changes in `delta_ids` if context.prev_group and context.delta_ids: prev_res = self._get_joined_users_from_context.cache.get( (room_id, context.prev_group), None From 3a431a126d50761976a45ec495c63a34a6452f4f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 May 2017 11:26:57 +0100 Subject: [PATCH 014/139] Bump changelog and version --- CHANGES.rst | 24 ++++++++++++++++++++++++ synapse/__init__.py | 2 +- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index babeaa0de..b6068ba99 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,27 @@ +Changes in synapse v0.21.0-rc3 (2017-05-17) +=========================================== + +Features: + +* Add per user ratelimiting overrides (PR #2208) +* Add config option to limit maximum number of events requested by ``/sync`` + and ``/messages`` (PR #2221) Thanks to @psaavedra! + + +Changes: + +* Various small performance fixes (PR #2201, #2202, #2224, #2226, #2227, #2228) +* Update username availability checker API (PR #2209, #2213) +* When purging, don't de-delta state groups we're about to delete (PR #2214) +* Documantation to chek synapse version (PR #2215) Thanks to @hamber-dick! +* Add an index to event_search to speed up purge history API (PR #2218) + + +Bug fixes: + +* Fix API to allow clients to upload one-time-keys with new sigs (PR #2206) + + Changes in synapse v0.21.0-rc2 (2017-05-08) =========================================== diff --git a/synapse/__init__.py b/synapse/__init__.py index d4ad23fa3..c39dde913 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.21.0-rc2" +__version__ = "0.21.0-rc3" From c6f4ff147509768b88860806501dac84cfc62d12 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 May 2017 11:29:14 +0100 Subject: [PATCH 015/139] Spelling --- CHANGES.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index b6068ba99..65880dac5 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -3,7 +3,7 @@ Changes in synapse v0.21.0-rc3 (2017-05-17) Features: -* Add per user ratelimiting overrides (PR #2208) +* Add per user rate-limiting overrides (PR #2208) * Add config option to limit maximum number of events requested by ``/sync`` and ``/messages`` (PR #2221) Thanks to @psaavedra! @@ -13,7 +13,7 @@ Changes: * Various small performance fixes (PR #2201, #2202, #2224, #2226, #2227, #2228) * Update username availability checker API (PR #2209, #2213) * When purging, don't de-delta state groups we're about to delete (PR #2214) -* Documantation to chek synapse version (PR #2215) Thanks to @hamber-dick! +* Documentation to check synapse version (PR #2215) Thanks to @hamber-dick! * Add an index to event_search to speed up purge history API (PR #2218) From 7c53a2780161478599f8394f6bb471f96fa87347 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 May 2017 13:13:45 +0100 Subject: [PATCH 016/139] Update changelog --- CHANGES.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 65880dac5..476d6fb6b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -10,7 +10,8 @@ Features: Changes: -* Various small performance fixes (PR #2201, #2202, #2224, #2226, #2227, #2228) +* Various small performance fixes (PR #2201, #2202, #2224, #2226, #2227, #2228, + #2229) * Update username availability checker API (PR #2209, #2213) * When purging, don't de-delta state groups we're about to delete (PR #2214) * Documentation to check synapse version (PR #2215) Thanks to @hamber-dick! From bbfe4e996c9b9729f19d5b104dc6abfe120531b4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 May 2017 14:31:23 +0100 Subject: [PATCH 017/139] Make get_state_groups_from_groups faster. Most of the time was spent copying a dict to filter out sentinel values that indicated that keys did not exist in the dict. The sentinel values were added to ensure that we cached the non-existence of keys. By updating DictionaryCache to keep track of which keys were known to not exist itself we can remove a dictionary copy. --- synapse/storage/state.py | 40 +++++------------ synapse/util/caches/dictionary_cache.py | 57 ++++++++++++++++++++----- tests/util/test_dict_cache.py | 2 +- 3 files changed, 58 insertions(+), 41 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 85acf2ad1..a7c3d401d 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -563,20 +563,22 @@ class StateStore(SQLBaseStore): where a `state_key` of `None` matches all state_keys for the `type`. """ - is_all, state_dict_ids = self._state_group_cache.get(group) + is_all, known_absent, state_dict_ids = self._state_group_cache.get(group) type_to_key = {} missing_types = set() + for typ, state_key in types: + key = (typ, state_key) if state_key is None: type_to_key[typ] = None - missing_types.add((typ, state_key)) + missing_types.add(key) else: if type_to_key.get(typ, object()) is not None: type_to_key.setdefault(typ, set()).add(state_key) - if (typ, state_key) not in state_dict_ids: - missing_types.add((typ, state_key)) + if key not in state_dict_ids and key not in known_absent: + missing_types.add(key) sentinel = object() @@ -590,7 +592,7 @@ class StateStore(SQLBaseStore): return True return False - got_all = not (missing_types or types is None) + got_all = is_all or not missing_types return { k: v for k, v in state_dict_ids.iteritems() @@ -607,7 +609,7 @@ class StateStore(SQLBaseStore): Args: group: The state group to lookup """ - is_all, state_dict_ids = self._state_group_cache.get(group) + is_all, _, state_dict_ids = self._state_group_cache.get(group) return state_dict_ids, is_all @@ -624,7 +626,7 @@ class StateStore(SQLBaseStore): missing_groups = [] if types is not None: for group in set(groups): - state_dict_ids, missing_types, got_all = self._get_some_state_from_cache( + state_dict_ids, _, got_all = self._get_some_state_from_cache( group, types ) results[group] = state_dict_ids @@ -653,19 +655,7 @@ class StateStore(SQLBaseStore): # Now we want to update the cache with all the things we fetched # from the database. for group, group_state_dict in group_to_state_dict.iteritems(): - if types: - # We delibrately put key -> None mappings into the cache to - # cache absence of the key, on the assumption that if we've - # explicitly asked for some types then we will probably ask - # for them again. - state_dict = { - (intern_string(etype), intern_string(state_key)): None - for (etype, state_key) in types - } - state_dict.update(results[group]) - results[group] = state_dict - else: - state_dict = results[group] + state_dict = results[group] state_dict.update( ((intern_string(k[0]), intern_string(k[1])), to_ascii(v)) @@ -677,17 +667,9 @@ class StateStore(SQLBaseStore): key=group, value=state_dict, full=(types is None), + known_absent=types, ) - # Remove all the entries with None values. The None values were just - # used for bookkeeping in the cache. - for group, state_dict in results.iteritems(): - results[group] = { - key: event_id - for key, event_id in state_dict.iteritems() - if event_id - } - defer.returnValue(results) def get_next_state_group(self): diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index cb6933c61..d4105822b 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -23,7 +23,17 @@ import logging logger = logging.getLogger(__name__) -class DictionaryEntry(namedtuple("DictionaryEntry", ("full", "value"))): +class DictionaryEntry(namedtuple("DictionaryEntry", ("full", "known_absent", "value"))): + """Returned when getting an entry from the cache + + Attributes: + full (bool): Whether the cache has the full or dict or just some keys. + If not full then not all requested keys will necessarily be present + in `value` + known_absent (set): Keys that were looked up in the dict and were not + there. + value (dict): The full or partial dict value + """ def __len__(self): return len(self.value) @@ -58,21 +68,31 @@ class DictionaryCache(object): ) def get(self, key, dict_keys=None): + """Fetch an entry out of the cache + + Args: + key + dict_key(list): If given a set of keys then return only those keys + that exist in the cache. + + Returns: + DictionaryEntry + """ entry = self.cache.get(key, self.sentinel) if entry is not self.sentinel: self.metrics.inc_hits() if dict_keys is None: - return DictionaryEntry(entry.full, dict(entry.value)) + return DictionaryEntry(entry.full, entry.known_absent, dict(entry.value)) else: - return DictionaryEntry(entry.full, { + return DictionaryEntry(entry.full, entry.known_absent, { k: entry.value[k] for k in dict_keys if k in entry.value }) self.metrics.inc_misses() - return DictionaryEntry(False, {}) + return DictionaryEntry(False, set(), {}) def invalidate(self, key): self.check_thread() @@ -87,19 +107,34 @@ class DictionaryCache(object): self.sequence += 1 self.cache.clear() - def update(self, sequence, key, value, full=False): + def update(self, sequence, key, value, full=False, known_absent=None): + """Updates the entry in the cache + + Args: + sequence + key + value (dict): The value to update the cache with. + full (bool): Whether the given value is the full dict, or just a + partial subset there of. If not full then any existing entries + for the key will be updated. + known_absent (set): Set of keys that we know don't exist in the full + dict. + """ self.check_thread() if self.sequence == sequence: # Only update the cache if the caches sequence number matches the # number that the cache had before the SELECT was started (SYN-369) + if known_absent is None: + known_absent = set() if full: - self._insert(key, value) + self._insert(key, value, known_absent) else: - self._update_or_insert(key, value) + self._update_or_insert(key, value, known_absent) - def _update_or_insert(self, key, value): - entry = self.cache.setdefault(key, DictionaryEntry(False, {})) + def _update_or_insert(self, key, value, known_absent): + entry = self.cache.setdefault(key, DictionaryEntry(False, set(), {})) entry.value.update(value) + entry.known_absent.update(known_absent) - def _insert(self, key, value): - self.cache[key] = DictionaryEntry(True, value) + def _insert(self, key, value, known_absent): + self.cache[key] = DictionaryEntry(True, known_absent, value) diff --git a/tests/util/test_dict_cache.py b/tests/util/test_dict_cache.py index 272b71034..bc92f85fa 100644 --- a/tests/util/test_dict_cache.py +++ b/tests/util/test_dict_cache.py @@ -28,7 +28,7 @@ class DictCacheTestCase(unittest.TestCase): key = "test_simple_cache_hit_full" v = self.cache.get(key) - self.assertEqual((False, {}), v) + self.assertEqual((False, set(), {}), v) seq = self.cache.sequence test_value = {"test": "test_simple_cache_hit_full"} From 66d8ffabbdfa1fadd107851c8a0362a74149b37b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 2 May 2017 10:46:01 +0100 Subject: [PATCH 018/139] Faster push rule calculation via push specific cache We add a push rule specific cache that ensures that we can reuse calculated push rules appropriately when a user join/leaves. --- synapse/handlers/message.py | 5 +- synapse/push/action_generator.py | 12 +- synapse/push/bulk_push_rule_evaluator.py | 279 ++++++++++++++++++++--- 3 files changed, 249 insertions(+), 47 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 196925eda..ba8776f28 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -54,6 +54,8 @@ class MessageHandler(BaseHandler): # This is to stop us from diverging history *too* much. self.limiter = Limiter(max_count=5) + self.action_generator = ActionGenerator(self.hs) + @defer.inlineCallbacks def purge_history(self, room_id, event_id): event = yield self.store.get_event(event_id) @@ -590,8 +592,7 @@ class MessageHandler(BaseHandler): "Changing the room create event is forbidden", ) - action_generator = ActionGenerator(self.hs) - yield action_generator.handle_push_actions_for_event( + yield self.action_generator.handle_push_actions_for_event( event, context ) diff --git a/synapse/push/action_generator.py b/synapse/push/action_generator.py index 3f75d3f92..0658497d9 100644 --- a/synapse/push/action_generator.py +++ b/synapse/push/action_generator.py @@ -15,7 +15,7 @@ from twisted.internet import defer -from .bulk_push_rule_evaluator import evaluator_for_event +from .bulk_push_rule_evaluator import BulkPushRuleEvaluator from synapse.util.metrics import Measure @@ -29,6 +29,7 @@ class ActionGenerator: self.hs = hs self.clock = hs.get_clock() self.store = hs.get_datastore() + self.bulk_evaluator = BulkPushRuleEvaluator(hs) # really we want to get all user ids and all profile tags too, # since we want the actions for each profile tag for every user and # also actions for a client with no profile tag for each user. @@ -38,16 +39,11 @@ class ActionGenerator: @defer.inlineCallbacks def handle_push_actions_for_event(self, event, context): - with Measure(self.clock, "evaluator_for_event"): - bulk_evaluator = yield evaluator_for_event( - event, self.hs, self.store, context - ) - with Measure(self.clock, "action_for_event_by_user"): - actions_by_user = yield bulk_evaluator.action_for_event_by_user( + actions_by_user = yield self.bulk_evaluator.action_for_event_by_user( event, context ) context.push_actions = [ - (uid, actions) for uid, actions in actions_by_user.items() + (uid, actions) for uid, actions in actions_by_user.iteritems() ] diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index f943ff640..43c2e27dc 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -19,60 +19,78 @@ from twisted.internet import defer from .push_rule_evaluator import PushRuleEvaluatorForEvent -from synapse.api.constants import EventTypes from synapse.visibility import filter_events_for_clients_context +from synapse.api.constants import EventTypes, Membership +from synapse.util.caches.descriptors import cached +from synapse.util.async import Linearizer logger = logging.getLogger(__name__) -@defer.inlineCallbacks -def evaluator_for_event(event, hs, store, context): - rules_by_user = yield store.bulk_get_push_rules_for_room( - event, context - ) - - # if this event is an invite event, we may need to run rules for the user - # who's been invited, otherwise they won't get told they've been invited - if event.type == 'm.room.member' and event.content['membership'] == 'invite': - invited_user = event.state_key - if invited_user and hs.is_mine_id(invited_user): - has_pusher = yield store.user_has_pusher(invited_user) - if has_pusher: - rules_by_user = dict(rules_by_user) - rules_by_user[invited_user] = yield store.get_push_rules_for_user( - invited_user - ) - - defer.returnValue(BulkPushRuleEvaluator( - event.room_id, rules_by_user, store - )) +rules_by_room = {} class BulkPushRuleEvaluator: + """Calculates the outcome of push rules for an event for all users in the + room at once. """ - Runs push rules for all users in a room. - This is faster than running PushRuleEvaluator for each user because it - fetches all the rules for all the users in one (batched) db query - rather than doing multiple queries per-user. It currently uses - the same logic to run the actual rules, but could be optimised further - (see https://matrix.org/jira/browse/SYN-562) - """ - def __init__(self, room_id, rules_by_user, store): - self.room_id = room_id - self.rules_by_user = rules_by_user - self.store = store + + def __init__(self, hs): + self.hs = hs + self.store = hs.get_datastore() + + @defer.inlineCallbacks + def _get_rules_for_event(self, event, context): + """This gets the rules for all users in the room at the time of the event, + as well as the push rules for the invitee if the event is an invite. + + Returns: + dict of user_id -> push_rules + """ + room_id = event.room_id + rules_for_room = self._get_rules_for_room(room_id) + + rules_by_user = yield rules_for_room.get_rules(context) + + # if this event is an invite event, we may need to run rules for the user + # who's been invited, otherwise they won't get told they've been invited + if event.type == 'm.room.member' and event.content['membership'] == 'invite': + invited = event.state_key + if invited and self.hs.is_mine_id(invited): + has_pusher = yield self.store.user_has_pusher(invited) + if has_pusher: + rules_by_user = dict(rules_by_user) + rules_by_user[invited] = yield self.store.get_push_rules_for_user( + invited + ) + + defer.returnValue(rules_by_user) + + @cached(max_entries=10000) + def _get_rules_for_room(self, room_id): + """Get the current RulesForRoom object for the given room id + + Returns: + RulesForRoom + """ + return RulesForRoom(self.hs, room_id, self._get_rules_for_room.cache) @defer.inlineCallbacks def action_for_event_by_user(self, event, context): + """Given an event and context, evaluate the push rules and return + the results + + Returns: + dict of user_id -> action + """ + rules_by_user = yield self._get_rules_for_event(event, context) actions_by_user = {} # None of these users can be peeking since this list of users comes # from the set of users in the room, so we know for sure they're all # actually in the room. - user_tuples = [ - (u, False) for u in self.rules_by_user.keys() - ] + user_tuples = [(u, False) for u in rules_by_user] filtered_by_user = yield filter_events_for_clients_context( self.store, user_tuples, [event], {event.event_id: context} @@ -86,7 +104,7 @@ class BulkPushRuleEvaluator: condition_cache = {} - for uid, rules in self.rules_by_user.items(): + for uid, rules in rules_by_user.iteritems(): display_name = None profile_info = room_members.get(uid) if profile_info: @@ -138,3 +156,190 @@ def _condition_checker(evaluator, conditions, uid, display_name, cache): return False return True + + +class RulesForRoom(object): + """Caches push rules for users in a room. + + This efficiently handles users joining/leaving the room by not invalidating + the entire cache for the room. + """ + + def __init__(self, hs, room_id, rules_for_room_cache): + """ + Args: + hs (HomeServer) + room_id (str) + rules_for_room_cache(Cache): The cache object that caches these + RoomsForUser objects. + """ + self.room_id = room_id + self.is_mine_id = hs.is_mine_id + self.store = hs.get_datastore() + + self.linearizer = Linearizer(name="rules_for_room") + + self.member_map = {} # event_id -> (user_id, state) + self.rules_by_user = {} # user_id -> rules + + # The last state group we updated the caches for. If the state_group of + # a new event comes along, we know that we can just return the cached + # result. + # On invalidation of the rules themselves (if the user changes them), + # we invalidate everything and set state_group to `object()` + self.state_group = object() + + # A sequence number to keep track of when we're allowed to update the + # cache. We bump the sequence number when we invalidate the cache. If + # the sequence number changes while we're calculating stuff we should + # not update the cache with it. + self.sequence = 0 + + # We need to be clever on the invalidating caches callbacks, as + # otherwise the invalidation callback holds a reference to the object, + # potentially causing it to leak. + # To get around this we pass a function that on invalidations looks ups + # the RoomsForUser entry in the cache, rather than keeping a reference + # to self around in the callback. + def invalidate_all_cb(): + rules = rules_for_room_cache.get(room_id, update_metrics=False) + if rules: + rules.invalidate_all() + + self.invalidate_all_cb = invalidate_all_cb + + @defer.inlineCallbacks + def get_rules(self, context): + """Given an event context return the rules for all users who are + currently in the room. + """ + state_group = context.state_group + + with (yield self.linearizer.queue(())): + if state_group and self.state_group == state_group: + defer.returnValue(self.rules_by_user) + + ret_rules_by_user = {} + missing_member_event_ids = {} + if state_group and self.state_group == context.prev_group: + # If we have a simple delta then we can reuse most of the previous + # results. + ret_rules_by_user = self.rules_by_user + current_state_ids = context.delta_ids + else: + current_state_ids = context.current_state_ids + + # Loop through to see which member events we've seen and have rules + # for and which we need to fetch + for key, event_id in current_state_ids.iteritems(): + if key[0] != EventTypes.Member: + continue + + res = self.member_map.get(event_id, None) + if res: + user_id, state = res + if state == Membership.JOIN: + rules = self.rules_by_user.get(user_id, None) + if rules: + ret_rules_by_user[user_id] = rules + continue + + user_id = key[1] + if not self.is_mine_id(user_id): + continue + + if self.store.get_if_app_services_interested_in_user( + user_id, exclusive=True + ): + continue + + # If a user has left a room we remove their push rule. If they + # joined then we readd it later in _update_rules_with_member_event_ids + ret_rules_by_user.pop(user_id, None) + missing_member_event_ids[user_id] = event_id + + if missing_member_event_ids: + # If we have some memebr events we haven't seen, look them up + # and fetch push rules for them if appropriate. + yield self._update_rules_with_member_event_ids( + ret_rules_by_user, missing_member_event_ids, state_group + ) + + defer.returnValue(ret_rules_by_user) + + @defer.inlineCallbacks + def _update_rules_with_member_event_ids(self, ret_rules_by_user, member_event_ids, + state_group): + """Update the partially filled rules_by_user dict by fetching rules for + any newly joined users in the `member_event_ids` list. + + Args: + ret_rules_by_user (dict): Partiallly filled dict of push rules. Gets + updated with any new rules. + member_event_ids (list): List of event ids for membership events that + have happened since the last time we filled rules_by_user + state_group: The state group we are currently computing push rules + for. Used when updating the cache. + """ + sequence = self.sequence + + rows = yield self.store._simple_select_many_batch( + table="room_memberships", + column="event_id", + iterable=member_event_ids.values(), + retcols=('user_id', 'membership', 'event_id'), + keyvalues={}, + batch_size=500, + desc="_get_rules_for_member_event_ids", + ) + + members = { + row["event_id"]: (row["user_id"], row["membership"]) + for row in rows + } + + interested_in_user_ids = set(user_id for user_id, _ in members.itervalues()) + + if_users_with_pushers = yield self.store.get_if_users_have_pushers( + interested_in_user_ids, + on_invalidate=self.invalidate_all_cb, + ) + + user_ids = set( + uid for uid, have_pusher in if_users_with_pushers.iteritems() if have_pusher + ) + + users_with_receipts = yield self.store.get_users_with_read_receipts_in_room( + self.room_id, on_invalidate=self.invalidate_all_cb, + ) + + # any users with pushers must be ours: they have pushers + for uid in users_with_receipts: + if uid in interested_in_user_ids: + user_ids.add(uid) + + rules_by_user = yield self.store.bulk_get_push_rules( + user_ids, on_invalidate=self.invalidate_all_cb, + ) + + ret_rules_by_user.update( + item for item in rules_by_user.iteritems() if item[0] is not None + ) + + self.update_cache(sequence, members, ret_rules_by_user, state_group) + + def invalidate_all(self): + # Note: Don't hand this function directly to an invalidation callback + # as it keeps a reference to self and will stop this instance from being + # GC'd if it gets dropped from the rules_to_user cache. Instead use + # `self.invalidate_all_cb` + self.sequence += 1 + self.state_group = object() + self.member_map = {} + self.rules_by_user = {} + + def update_cache(self, sequence, members, rules_by_user, state_group): + if sequence == self.sequence: + self.member_map.update(members) + self.rules_by_user = rules_by_user + self.state_group = state_group From f98efea9b170093c9abb5ceebfb1858da78d2978 Mon Sep 17 00:00:00 2001 From: Aaron Raimist Date: Wed, 17 May 2017 21:41:48 -0500 Subject: [PATCH 019/139] Correct a typo in UPGRADE.rst --- UPGRADE.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/UPGRADE.rst b/UPGRADE.rst index 6164df883..62b22e910 100644 --- a/UPGRADE.rst +++ b/UPGRADE.rst @@ -33,7 +33,7 @@ To check whether your update was sucessfull, run: .. code:: bash - # replace your.server.domain with ther domain of your synaspe homeserver + # replace your.server.domain with ther domain of your synapse homeserver curl https:///_matrix/federation/v1/version So for the Matrix.org HS server the URL would be: https://matrix.org/_matrix/federation/v1/version. From 056ba9b7953ae3709176d424b3b2fec4625bc480 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 May 2017 11:45:56 +0100 Subject: [PATCH 020/139] Add comment --- synapse/push/bulk_push_rule_evaluator.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 43c2e27dc..5b1f9a1c2 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -74,6 +74,9 @@ class BulkPushRuleEvaluator: Returns: RulesForRoom """ + # It's important that RulesForRoom gets added to self._get_rules_for_room.cache + # before any lookup methods get called on it as otherwise there may be + # a race if invalidate_all gets called (which assumes its in the cache) return RulesForRoom(self.hs, room_id, self._get_rules_for_room.cache) @defer.inlineCallbacks From a5425b2e5b28fb7167b2c885aef7213664b20b9a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 May 2017 13:53:48 +0100 Subject: [PATCH 021/139] Bump changelog and version --- CHANGES.rst | 6 ++++++ synapse/__init__.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 476d6fb6b..82247fa52 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,9 @@ +Changes in synapse v0.21.0 (2017-05-18) +======================================= + +No changes since v0.21.0-rc3 + + Changes in synapse v0.21.0-rc3 (2017-05-17) =========================================== diff --git a/synapse/__init__.py b/synapse/__init__.py index c39dde913..2a40bab3f 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.21.0-rc3" +__version__ = "0.21.0" From c57789d138ea3c7f764b2504af8fdcc440656e73 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 May 2017 16:17:23 +0100 Subject: [PATCH 022/139] Remove size of push get_rules cache --- synapse/push/bulk_push_rule_evaluator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 5b1f9a1c2..7f04d5668 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -67,7 +67,7 @@ class BulkPushRuleEvaluator: defer.returnValue(rules_by_user) - @cached(max_entries=10000) + @cached() def _get_rules_for_room(self, room_id): """Get the current RulesForRoom object for the given room id From 760625acbaef9ae7032ba5e59e91979f454febef Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 May 2017 16:34:41 +0100 Subject: [PATCH 023/139] Make get_if_app_services_interested_in_user faster --- synapse/appservice/__init__.py | 10 ++++++++ synapse/push/bulk_push_rule_evaluator.py | 4 +-- synapse/storage/appservice.py | 31 +++++++++++++++++------- synapse/storage/push_rule.py | 2 +- 4 files changed, 34 insertions(+), 13 deletions(-) diff --git a/synapse/appservice/__init__.py b/synapse/appservice/__init__.py index 7346206bb..b98900731 100644 --- a/synapse/appservice/__init__.py +++ b/synapse/appservice/__init__.py @@ -241,6 +241,16 @@ class ApplicationService(object): def is_exclusive_room(self, room_id): return self._is_exclusive(ApplicationService.NS_ROOMS, room_id) + def get_exlusive_user_regexes(self): + """Get the list of regexes used to determine if a user is exclusively + registered by the AS + """ + return [ + regex_obj["regex"] + for regex_obj in self.namespaces[ApplicationService.NS_USERS] + if regex_obj["exclusive"] + ] + def is_rate_limited(self): return self.rate_limited diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 7f04d5668..386d7bed8 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -251,9 +251,7 @@ class RulesForRoom(object): if not self.is_mine_id(user_id): continue - if self.store.get_if_app_services_interested_in_user( - user_id, exclusive=True - ): + if self.store.get_if_app_services_interested_in_user(user_id): continue # If a user has left a room we remove their push rule. If they diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index 0e9e8d345..532df736a 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +import re import simplejson as json from twisted.internet import defer @@ -36,19 +37,31 @@ class ApplicationServiceStore(SQLBaseStore): hs.config.app_service_config_files ) + # We precompie a regex constructed from all the regexes that the AS's + # have registered for exclusive users. + exclusive_user_regexes = [ + regex.pattern + for service in self.services_cache + for regex in service.get_exlusive_user_regexes() + ] + if exclusive_user_regexes: + exclusive_user_regex = "|".join("(" + r + ")" for r in exclusive_user_regexes) + self.exclusive_user_regex = re.compile(exclusive_user_regex) + else: + # We handle this case specially otherwise the constructed regex + # will always match + self.exclusive_user_regex = None + def get_app_services(self): return self.services_cache - def get_if_app_services_interested_in_user(self, user_id, exclusive=False): - """Check if the user is one associated with an app service + def get_if_app_services_interested_in_user(self, user_id): + """Check if the user is one associated with an app service (exclusively) """ - for service in self.services_cache: - if service.is_interested_in_user(user_id): - if exclusive: - return service.is_exclusive_user(user_id) - else: - return True - return False + if self.exclusive_user_regex: + return bool(self.exclusive_user_regex.match(user_id)) + else: + return False def get_app_service_by_user_id(self, user_id): """Retrieve an application service from their user ID. diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index 65bad3fad..0a819d32c 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -163,7 +163,7 @@ class PushRuleStore(SQLBaseStore): local_users_in_room = set( u for u in users_in_room if self.hs.is_mine_id(u) - and not self.get_if_app_services_interested_in_user(u, exclusive=True) + and not self.get_if_app_services_interested_in_user(u) ) # users in the room who have pushers need to get push rules run because From 107ac7ac9690e638f62924eba6c29d192632c75a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 May 2017 17:17:53 +0100 Subject: [PATCH 024/139] Increase size of push rule caches --- synapse/storage/push_rule.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index 0a819d32c..8758b1c0c 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -49,7 +49,7 @@ def _load_rules(rawrules, enabled_map): class PushRuleStore(SQLBaseStore): - @cachedInlineCallbacks() + @cachedInlineCallbacks(max_entries=5000) def get_push_rules_for_user(self, user_id): rows = yield self._simple_select_list( table="push_rules", @@ -73,7 +73,7 @@ class PushRuleStore(SQLBaseStore): defer.returnValue(rules) - @cachedInlineCallbacks() + @cachedInlineCallbacks(max_entries=5000) def get_push_rules_enabled_for_user(self, user_id): results = yield self._simple_select_list( table="push_rules_enable", From 72ed8196b3b83f19d09b9ad7078a1cd3f07dc0c5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 May 2017 17:48:36 +0100 Subject: [PATCH 025/139] Don't push users who have left --- synapse/push/bulk_push_rule_evaluator.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 386d7bed8..015802691 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -299,7 +299,10 @@ class RulesForRoom(object): for row in rows } - interested_in_user_ids = set(user_id for user_id, _ in members.itervalues()) + interested_in_user_ids = set( + user_id for user_id, membership in members.itervalues() + if membership == Membership.JOIN + ) if_users_with_pushers = yield self.store.get_if_users_have_pushers( interested_in_user_ids, From cafe659f726d4b775d473bfd3ec1a7c8f17b5994 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 May 2017 18:17:40 +0100 Subject: [PATCH 026/139] Store ActionGenerator in HomeServer --- synapse/handlers/federation.py | 5 ++--- synapse/handlers/message.py | 3 +-- synapse/push/action_generator.py | 2 +- synapse/push/bulk_push_rule_evaluator.py | 2 +- synapse/server.py | 5 +++++ 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 52d97dfbf..63e633548 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -43,7 +43,6 @@ from synapse.events.utils import prune_event from synapse.util.retryutils import NotRetryingDestination -from synapse.push.action_generator import ActionGenerator from synapse.util.distributor import user_joined_room from twisted.internet import defer @@ -75,6 +74,7 @@ class FederationHandler(BaseHandler): self.state_handler = hs.get_state_handler() self.server_name = hs.hostname self.keyring = hs.get_keyring() + self.action_generator = hs.get_action_generator() self.replication_layer.set_handler(self) @@ -1389,8 +1389,7 @@ class FederationHandler(BaseHandler): ) if not event.internal_metadata.is_outlier(): - action_generator = ActionGenerator(self.hs) - yield action_generator.handle_push_actions_for_event( + yield self.action_generator.handle_push_actions_for_event( event, context ) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index ba8776f28..a04f634c5 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -20,7 +20,6 @@ from synapse.api.errors import AuthError, Codes, SynapseError from synapse.crypto.event_signing import add_hashes_and_signatures from synapse.events.utils import serialize_event from synapse.events.validator import EventValidator -from synapse.push.action_generator import ActionGenerator from synapse.types import ( UserID, RoomAlias, RoomStreamToken, ) @@ -54,7 +53,7 @@ class MessageHandler(BaseHandler): # This is to stop us from diverging history *too* much. self.limiter = Limiter(max_count=5) - self.action_generator = ActionGenerator(self.hs) + self.action_generator = hs.get_action_generator() @defer.inlineCallbacks def purge_history(self, room_id, event_id): diff --git a/synapse/push/action_generator.py b/synapse/push/action_generator.py index 0658497d9..fe09d50d5 100644 --- a/synapse/push/action_generator.py +++ b/synapse/push/action_generator.py @@ -24,7 +24,7 @@ import logging logger = logging.getLogger(__name__) -class ActionGenerator: +class ActionGenerator(object): def __init__(self, hs): self.hs = hs self.clock = hs.get_clock() diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 015802691..eebabe78a 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -31,7 +31,7 @@ logger = logging.getLogger(__name__) rules_by_room = {} -class BulkPushRuleEvaluator: +class BulkPushRuleEvaluator(object): """Calculates the outcome of push rules for an event for all users in the room at once. """ diff --git a/synapse/server.py b/synapse/server.py index 12754c89a..e400e278c 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -52,6 +52,7 @@ from synapse.handlers.read_marker import ReadMarkerHandler from synapse.http.client import SimpleHttpClient, InsecureInterceptableContextFactory from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.notifier import Notifier +from synapse.push.action_generator import ActionGenerator from synapse.push.pusherpool import PusherPool from synapse.rest.media.v1.media_repository import MediaRepository from synapse.state import StateHandler @@ -135,6 +136,7 @@ class HomeServer(object): 'macaroon_generator', 'tcp_replication', 'read_marker_handler', + 'action_generator', ] def __init__(self, hostname, **kwargs): @@ -299,6 +301,9 @@ class HomeServer(object): def build_tcp_replication(self): raise NotImplementedError() + def build_action_generator(self): + return ActionGenerator(self) + def remove_pusher(self, app_id, push_key, user_id): return self.get_pusherpool().remove_pusher(app_id, push_key, user_id) From 1c1c0257f45c9a992065d2ac62f795f48f84c711 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 19 May 2017 11:44:11 +0100 Subject: [PATCH 027/139] Move invalidation cb to its own structure --- synapse/push/bulk_push_rule_evaluator.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index eebabe78a..760d567ca 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -24,6 +24,8 @@ from synapse.api.constants import EventTypes, Membership from synapse.util.caches.descriptors import cached from synapse.util.async import Linearizer +from collections import namedtuple + logger = logging.getLogger(__name__) @@ -204,12 +206,7 @@ class RulesForRoom(object): # To get around this we pass a function that on invalidations looks ups # the RoomsForUser entry in the cache, rather than keeping a reference # to self around in the callback. - def invalidate_all_cb(): - rules = rules_for_room_cache.get(room_id, update_metrics=False) - if rules: - rules.invalidate_all() - - self.invalidate_all_cb = invalidate_all_cb + self.invalidate_all_cb = _Invalidation(rules_for_room_cache, room_id) @defer.inlineCallbacks def get_rules(self, context): @@ -347,3 +344,15 @@ class RulesForRoom(object): self.member_map.update(members) self.rules_by_user = rules_by_user self.state_group = state_group + + +class _Invalidation(namedtuple("_Invalidation", ("cache", "room_id"))): + # We rely on _CacheContext implementing __eq__ and __hash__ sensibly, + # which namedtuple does for us (i.e. two _CacheContext are the same if + # their caches and keys match). This is important in particular to + # dedupe when we add callbacks to lru cache nodes, otherwise the number + # of callbacks would grow. + def __call__(self): + rules = self.cache.get(self.room_id, None, update_metrics=False) + if rules: + rules.invalidate_all() From 58ebb96cce1992a79cc363d4157643c5db569396 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 19 May 2017 14:38:50 +0100 Subject: [PATCH 028/139] Fix invalidation of get_users_with_read_receipts_in_room --- synapse/storage/receipts.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index efb90c3c9..f42b8014c 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -45,7 +45,9 @@ class ReceiptsStore(SQLBaseStore): return # Returns an ObservableDeferred - res = self.get_users_with_read_receipts_in_room.cache.get((room_id,), None) + res = self.get_users_with_read_receipts_in_room.cache.get( + room_id, None, update_metrics=False, + ) if res: if isinstance(res, defer.Deferred) and res.called: From b4f59c7e27cf9d736e2806e1bab9ae60ae1f7c06 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 19 May 2017 15:47:55 +0100 Subject: [PATCH 029/139] Add count of one time keys to sync stream --- synapse/handlers/sync.py | 11 +++++++++++ synapse/rest/client/v2_alpha/sync.py | 1 + 2 files changed, 12 insertions(+) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index c0205da1a..91c6c6be3 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -117,6 +117,8 @@ class SyncResult(collections.namedtuple("SyncResult", [ "archived", # ArchivedSyncResult for each archived room. "to_device", # List of direct messages for the device. "device_lists", # List of user_ids whose devices have chanegd + "device_one_time_keys_count", # Dict of algorithm to count for one time keys + # for this device ])): __slots__ = [] @@ -550,6 +552,14 @@ class SyncHandler(object): sync_result_builder ) + device_id = sync_config.device_id + one_time_key_counts = {} + if device_id: + user_id = sync_config.user.to_string() + one_time_key_counts = yield self.store.count_e2e_one_time_keys( + user_id, device_id + ) + defer.returnValue(SyncResult( presence=sync_result_builder.presence, account_data=sync_result_builder.account_data, @@ -558,6 +568,7 @@ class SyncHandler(object): archived=sync_result_builder.archived, to_device=sync_result_builder.to_device, device_lists=device_lists, + device_one_time_keys_count=one_time_key_counts, next_batch=sync_result_builder.now_token, )) diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py index 771e127ab..83e209d18 100644 --- a/synapse/rest/client/v2_alpha/sync.py +++ b/synapse/rest/client/v2_alpha/sync.py @@ -192,6 +192,7 @@ class SyncRestServlet(RestServlet): "invite": invited, "leave": archived, }, + "device_one_time_keys_count": sync_result.device_one_time_keys_count, "next_batch": sync_result.next_batch.to_string(), } From 270e1c904a53c69512eec2d3818718d64efa7649 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 19 May 2017 16:51:05 +0100 Subject: [PATCH 030/139] Speed up calculating push rules --- synapse/push/bulk_push_rule_evaluator.py | 27 +++++++++++++++++------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 760d567ca..3da684c6b 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -200,6 +200,10 @@ class RulesForRoom(object): # not update the cache with it. self.sequence = 0 + # A cache of user_ids that we *know* aren't interesting, e.g. user_ids + # owned by AS's, or remote users, etc. + self.uninteresting_user_set = set() + # We need to be clever on the invalidating caches callbacks, as # otherwise the invalidation callback holds a reference to the object, # potentially causing it to leak. @@ -231,10 +235,24 @@ class RulesForRoom(object): # Loop through to see which member events we've seen and have rules # for and which we need to fetch - for key, event_id in current_state_ids.iteritems(): + for key in current_state_ids: if key[0] != EventTypes.Member: continue + user_id = key[1] + if user_id in self.uninteresting_user_set: + continue + + if not self.is_mine_id(user_id): + self.uninteresting_user_set.add(user_id) + continue + + if self.store.get_if_app_services_interested_in_user(user_id): + self.uninteresting_user_set.add(user_id) + continue + + event_id = current_state_ids[key] + res = self.member_map.get(event_id, None) if res: user_id, state = res @@ -244,13 +262,6 @@ class RulesForRoom(object): ret_rules_by_user[user_id] = rules continue - user_id = key[1] - if not self.is_mine_id(user_id): - continue - - if self.store.get_if_app_services_interested_in_user(user_id): - continue - # If a user has left a room we remove their push rule. If they # joined then we readd it later in _update_rules_with_member_event_ids ret_rules_by_user.pop(user_id, None) From 25f03cf8e9600f68b72fb5843e1e2b789d064c2a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 22 May 2017 14:58:22 +0100 Subject: [PATCH 031/139] Use tuple unpacking --- synapse/push/bulk_push_rule_evaluator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 3da684c6b..f01a609e3 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -236,10 +236,10 @@ class RulesForRoom(object): # Loop through to see which member events we've seen and have rules # for and which we need to fetch for key in current_state_ids: - if key[0] != EventTypes.Member: + typ, user_id = key + if typ != EventTypes.Member: continue - user_id = key[1] if user_id in self.uninteresting_user_set: continue From 24c8f38784fc51945b54fc34f470c91192415c81 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 22 May 2017 14:59:27 +0100 Subject: [PATCH 032/139] Comment --- synapse/push/bulk_push_rule_evaluator.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index f01a609e3..8443e4b05 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -201,7 +201,10 @@ class RulesForRoom(object): self.sequence = 0 # A cache of user_ids that we *know* aren't interesting, e.g. user_ids - # owned by AS's, or remote users, etc. + # owned by AS's, or remote users, etc. (I.e. users we will never need to + # calculate push for) + # These never need to be invalidated as we will never set up push for + # them. self.uninteresting_user_set = set() # We need to be clever on the invalidating caches callbacks, as From 2d17b09a6de8ac0951d30307aff767f103f5cd8d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 22 May 2017 15:01:36 +0100 Subject: [PATCH 033/139] Add debug logging --- synapse/push/bulk_push_rule_evaluator.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 8443e4b05..6bf203993 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -224,6 +224,7 @@ class RulesForRoom(object): with (yield self.linearizer.queue(())): if state_group and self.state_group == state_group: + logger.debug("Using cached rules for %r", self.room_id) defer.returnValue(self.rules_by_user) ret_rules_by_user = {} @@ -236,6 +237,10 @@ class RulesForRoom(object): else: current_state_ids = context.current_state_ids + logger.debug( + "Looking for member changes in %r %r", state_group, current_state_ids + ) + # Loop through to see which member events we've seen and have rules # for and which we need to fetch for key in current_state_ids: @@ -273,10 +278,16 @@ class RulesForRoom(object): if missing_member_event_ids: # If we have some memebr events we haven't seen, look them up # and fetch push rules for them if appropriate. + logger.debug("Found new member events %r", missing_member_event_ids) yield self._update_rules_with_member_event_ids( ret_rules_by_user, missing_member_event_ids, state_group ) + if logger.isEnabledFor(logging.DEBUG): + logger.debug( + "Returning push rules for %r %r", + self.room_id, ret_rules_by_user.keys(), + ) defer.returnValue(ret_rules_by_user) @defer.inlineCallbacks @@ -310,11 +321,17 @@ class RulesForRoom(object): for row in rows } + if logger.isEnabledFor(logging.DEBUG): + logger.debug("Found members %r: %r", self.room_id, members.values()) + interested_in_user_ids = set( user_id for user_id, membership in members.itervalues() if membership == Membership.JOIN ) + if logger.isEnabledFor(logging.DEBUG): + logger.debug("Joined: %r", interested_in_user_ids) + if_users_with_pushers = yield self.store.get_if_users_have_pushers( interested_in_user_ids, on_invalidate=self.invalidate_all_cb, @@ -324,10 +341,16 @@ class RulesForRoom(object): uid for uid, have_pusher in if_users_with_pushers.iteritems() if have_pusher ) + if logger.isEnabledFor(logging.DEBUG): + logger.debug("With pushers: %r", user_ids) + users_with_receipts = yield self.store.get_users_with_read_receipts_in_room( self.room_id, on_invalidate=self.invalidate_all_cb, ) + if logger.isEnabledFor(logging.DEBUG): + logger.debug("With receipts: %r", users_with_receipts) + # any users with pushers must be ours: they have pushers for uid in users_with_receipts: if uid in interested_in_user_ids: @@ -348,6 +371,7 @@ class RulesForRoom(object): # as it keeps a reference to self and will stop this instance from being # GC'd if it gets dropped from the rules_to_user cache. Instead use # `self.invalidate_all_cb` + logger.debug("Invalidating RulesForRoom for %r", self.room_id) self.sequence += 1 self.state_group = object() self.member_map = {} From 7fb80b5eaeb00f27ac46a043e53341bd8a1d1cfc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 22 May 2017 15:02:12 +0100 Subject: [PATCH 034/139] Check if current event is a membership event --- synapse/push/bulk_push_rule_evaluator.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 6bf203993..2ee07f2f7 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -53,7 +53,7 @@ class BulkPushRuleEvaluator(object): room_id = event.room_id rules_for_room = self._get_rules_for_room(room_id) - rules_by_user = yield rules_for_room.get_rules(context) + rules_by_user = yield rules_for_room.get_rules(event, context) # if this event is an invite event, we may need to run rules for the user # who's been invited, otherwise they won't get told they've been invited @@ -216,7 +216,7 @@ class RulesForRoom(object): self.invalidate_all_cb = _Invalidation(rules_for_room_cache, room_id) @defer.inlineCallbacks - def get_rules(self, context): + def get_rules(self, event, context): """Given an event context return the rules for all users who are currently in the room. """ @@ -280,7 +280,7 @@ class RulesForRoom(object): # and fetch push rules for them if appropriate. logger.debug("Found new member events %r", missing_member_event_ids) yield self._update_rules_with_member_event_ids( - ret_rules_by_user, missing_member_event_ids, state_group + ret_rules_by_user, missing_member_event_ids, state_group, event ) if logger.isEnabledFor(logging.DEBUG): @@ -292,7 +292,7 @@ class RulesForRoom(object): @defer.inlineCallbacks def _update_rules_with_member_event_ids(self, ret_rules_by_user, member_event_ids, - state_group): + state_group, event): """Update the partially filled rules_by_user dict by fetching rules for any newly joined users in the `member_event_ids` list. @@ -321,6 +321,11 @@ class RulesForRoom(object): for row in rows } + if event.type == EventTypes.Member: + for event_id in member_event_ids.itervalues(): + if event_id == event.event_id: + members[event_id] = (event.state_key, event.membership) + if logger.isEnabledFor(logging.DEBUG): logger.debug("Found members %r: %r", self.room_id, members.values()) From e3417a06e23c532e6502bdcdcaedac826e231d69 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 22 May 2017 15:04:42 +0100 Subject: [PATCH 035/139] Update list cache to handle one arg case We update the normal cache descriptors to handle caches with a single argument specially so that the key wasn't a 1-tuple. We need to update the cache list to be aware of this. --- synapse/util/caches/descriptors.py | 48 ++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index 48dcbafee..77a0d8e35 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -404,6 +404,7 @@ class CacheDescriptor(_CacheDescriptorBase): wrapped.invalidate_all = cache.invalidate_all wrapped.cache = cache + wrapped.num_args = self.num_args obj.__dict__[self.orig.__name__] = wrapped @@ -451,8 +452,9 @@ class CacheListDescriptor(_CacheDescriptorBase): ) def __get__(self, obj, objtype=None): - - cache = getattr(obj, self.cached_method_name).cache + cached_method = getattr(obj, self.cached_method_name) + cache = cached_method.cache + num_args = cached_method.num_args @functools.wraps(self.orig) def wrapped(*args, **kwargs): @@ -470,11 +472,14 @@ class CacheListDescriptor(_CacheDescriptorBase): cached_defers = {} missing = [] for arg in list_args: - key = list(keyargs) - key[self.list_pos] = arg - try: - res = cache.get(tuple(key), callback=invalidate_callback) + if num_args == 1: + res = cache.get(arg, callback=invalidate_callback) + else: + key = list(keyargs) + key[self.list_pos] = arg + res = cache.get(tuple(key), callback=invalidate_callback) + if not isinstance(res, ObservableDeferred): results[arg] = res elif not res.has_succeeded(): @@ -505,17 +510,28 @@ class CacheListDescriptor(_CacheDescriptorBase): observer = ObservableDeferred(observer) - key = list(keyargs) - key[self.list_pos] = arg - cache.set( - tuple(key), observer, - callback=invalidate_callback - ) + if num_args == 1: + cache.set( + arg, observer, + callback=invalidate_callback + ) - def invalidate(f, key): - cache.invalidate(key) - return f - observer.addErrback(invalidate, tuple(key)) + def invalidate(f, key): + cache.invalidate(key) + return f + observer.addErrback(invalidate, arg) + else: + key = list(keyargs) + key[self.list_pos] = arg + cache.set( + tuple(key), observer, + callback=invalidate_callback + ) + + def invalidate(f, key): + cache.invalidate(key) + return f + observer.addErrback(invalidate, tuple(key)) res = observer.observe() res.addCallback(lambda r, arg: (arg, r), arg) From bd7bb5df717810eec0ae56d558a8413003d2ecaa Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 22 May 2017 15:12:19 +0100 Subject: [PATCH 036/139] Pull out if statement from for loop --- synapse/util/caches/descriptors.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index 77a0d8e35..cbdff8659 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -471,14 +471,22 @@ class CacheListDescriptor(_CacheDescriptorBase): results = {} cached_defers = {} missing = [] + + # If the cache takes a single arg then that is used as the key, + # otherwise a tuple is used. + if num_args == 1: + def cache_get(arg): + return cache.get(arg, callback=invalidate_callback) + else: + key = list(keyargs) + + def cache_get(arg): + key[self.list_pos] = arg + return cache.get(tuple(key), callback=invalidate_callback) + for arg in list_args: try: - if num_args == 1: - res = cache.get(arg, callback=invalidate_callback) - else: - key = list(keyargs) - key[self.list_pos] = arg - res = cache.get(tuple(key), callback=invalidate_callback) + res = cache_get(arg) if not isinstance(res, ObservableDeferred): results[arg] = res From 74bf4ee7bf4467453854fb554d711f3bec5bfd2a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 22 May 2017 16:19:22 +0100 Subject: [PATCH 037/139] Stream count_e2e_one_time_keys cache invalidation --- synapse/storage/end_to_end_keys.py | 31 +++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index e00f31da2..ad170c951 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -185,8 +185,8 @@ class EndToEndKeyStore(SQLBaseStore): for algorithm, key_id, json_bytes in new_keys ], ) - txn.call_after( - self.count_e2e_one_time_keys.invalidate, (user_id, device_id,) + self._invalidate_cache_and_stream( + txn, self.count_e2e_one_time_keys, (user_id, device_id,) ) yield self.runInteraction( "add_e2e_one_time_keys_insert", _add_e2e_one_time_keys @@ -245,16 +245,21 @@ class EndToEndKeyStore(SQLBaseStore): "claim_e2e_one_time_keys", _claim_e2e_one_time_keys ) - @defer.inlineCallbacks def delete_e2e_keys_by_device(self, user_id, device_id): - yield self._simple_delete( - table="e2e_device_keys_json", - keyvalues={"user_id": user_id, "device_id": device_id}, - desc="delete_e2e_device_keys_by_device" + def delete_e2e_keys_by_device_txn(txn): + self._simple_delete_txn( + txn, + table="e2e_device_keys_json", + keyvalues={"user_id": user_id, "device_id": device_id}, + ) + self._simple_delete_txn( + txn, + table="e2e_one_time_keys_json", + keyvalues={"user_id": user_id, "device_id": device_id}, + ) + self._invalidate_cache_and_stream( + txn, self.count_e2e_one_time_keys, (user_id, device_id,) + ) + return self.runInteraction( + "delete_e2e_keys_by_device", delete_e2e_keys_by_device_txn ) - yield self._simple_delete( - table="e2e_one_time_keys_json", - keyvalues={"user_id": user_id, "device_id": device_id}, - desc="delete_e2e_one_time_keys_by_device" - ) - self.count_e2e_one_time_keys.invalidate((user_id, device_id,)) From d668caa79c4f99b6d2b93c5b96e640e88f71a5c0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 22 May 2017 16:21:06 +0100 Subject: [PATCH 038/139] Remove spurious log level guards --- synapse/push/bulk_push_rule_evaluator.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 2ee07f2f7..354a2e64c 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -334,8 +334,7 @@ class RulesForRoom(object): if membership == Membership.JOIN ) - if logger.isEnabledFor(logging.DEBUG): - logger.debug("Joined: %r", interested_in_user_ids) + logger.debug("Joined: %r", interested_in_user_ids) if_users_with_pushers = yield self.store.get_if_users_have_pushers( interested_in_user_ids, @@ -346,15 +345,13 @@ class RulesForRoom(object): uid for uid, have_pusher in if_users_with_pushers.iteritems() if have_pusher ) - if logger.isEnabledFor(logging.DEBUG): - logger.debug("With pushers: %r", user_ids) + logger.debug("With pushers: %r", user_ids) users_with_receipts = yield self.store.get_users_with_read_receipts_in_room( self.room_id, on_invalidate=self.invalidate_all_cb, ) - if logger.isEnabledFor(logging.DEBUG): - logger.debug("With receipts: %r", users_with_receipts) + logger.debug("With receipts: %r", users_with_receipts) # any users with pushers must be ours: they have pushers for uid in users_with_receipts: From 6489455bed4c46ee8ffa09a933e8a3289f2ae62a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 22 May 2017 16:22:04 +0100 Subject: [PATCH 039/139] Comment --- synapse/push/bulk_push_rule_evaluator.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 354a2e64c..9a96e6fe8 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -321,6 +321,8 @@ class RulesForRoom(object): for row in rows } + # If the event is a join event then it will be in current state evnts + # map but not in the DB, so we have to explicitly insert it. if event.type == EventTypes.Member: for event_id in member_event_ids.itervalues(): if event_id == event.event_id: From f85a4152796d7ec39787b00fb4f177d682fe41fb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 22 May 2017 16:31:24 +0100 Subject: [PATCH 040/139] Add missing storage function to slave store --- synapse/replication/slave/storage/devices.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/replication/slave/storage/devices.py b/synapse/replication/slave/storage/devices.py index 4d4a43547..7687867ae 100644 --- a/synapse/replication/slave/storage/devices.py +++ b/synapse/replication/slave/storage/devices.py @@ -16,6 +16,7 @@ from ._base import BaseSlavedStore from ._slaved_id_tracker import SlavedIdTracker from synapse.storage import DataStore +from synapse.storage.end_to_end_keys import EndToEndKeyStore from synapse.util.caches.stream_change_cache import StreamChangeCache @@ -45,6 +46,7 @@ class SlavedDeviceStore(BaseSlavedStore): _mark_as_sent_devices_by_remote_txn = ( DataStore._mark_as_sent_devices_by_remote_txn.__func__ ) + count_e2e_one_time_keys = EndToEndKeyStore.__dict__["count_e2e_one_time_keys"] def stream_positions(self): result = super(SlavedDeviceStore, self).stream_positions() From 11c2a3655f14550193adb0360e5df62cc7274b9a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 22 May 2017 17:48:53 +0100 Subject: [PATCH 041/139] Only load jinja2 templates once Instead of every time a new email pusher is created, as loading jinja2 templates is slow. --- synapse/push/emailpusher.py | 15 ++----- synapse/push/mailer.py | 87 +++++++++++++++++++++++-------------- synapse/push/pusher.py | 56 +++++++++++++++++++----- synapse/push/pusherpool.py | 7 +-- tests/utils.py | 1 + 5 files changed, 106 insertions(+), 60 deletions(-) diff --git a/synapse/push/emailpusher.py b/synapse/push/emailpusher.py index c7afd1111..a69dda7b0 100644 --- a/synapse/push/emailpusher.py +++ b/synapse/push/emailpusher.py @@ -21,7 +21,6 @@ import logging from synapse.util.metrics import Measure from synapse.util.logcontext import LoggingContext -from mailer import Mailer logger = logging.getLogger(__name__) @@ -56,8 +55,10 @@ class EmailPusher(object): This shares quite a bit of code with httpusher: it would be good to factor out the common parts """ - def __init__(self, hs, pusherdict): + def __init__(self, hs, pusherdict, mailer): self.hs = hs + self.mailer = mailer + self.store = self.hs.get_datastore() self.clock = self.hs.get_clock() self.pusher_id = pusherdict['id'] @@ -73,16 +74,6 @@ class EmailPusher(object): self.processing = False - if self.hs.config.email_enable_notifs: - if 'data' in pusherdict and 'brand' in pusherdict['data']: - app_name = pusherdict['data']['brand'] - else: - app_name = self.hs.config.email_app_name - - self.mailer = Mailer(self.hs, app_name) - else: - self.mailer = None - @defer.inlineCallbacks def on_started(self): if self.mailer is not None: diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index f83aa7625..b5cd9b426 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -78,23 +78,17 @@ ALLOWED_ATTRS = { class Mailer(object): - def __init__(self, hs, app_name): + def __init__(self, hs, app_name, notif_template_html, notif_template_text): self.hs = hs + self.notif_template_html = notif_template_html + self.notif_template_text = notif_template_text + self.store = self.hs.get_datastore() self.macaroon_gen = self.hs.get_macaroon_generator() self.state_handler = self.hs.get_state_handler() - loader = jinja2.FileSystemLoader(self.hs.config.email_template_dir) self.app_name = app_name + logger.info("Created Mailer for app_name %s" % app_name) - env = jinja2.Environment(loader=loader) - env.filters["format_ts"] = format_ts_filter - env.filters["mxc_to_http"] = self.mxc_to_http_filter - self.notif_template_html = env.get_template( - self.hs.config.email_notif_template_html - ) - self.notif_template_text = env.get_template( - self.hs.config.email_notif_template_text - ) @defer.inlineCallbacks def send_notification_mail(self, app_id, user_id, email_address, @@ -481,28 +475,6 @@ class Mailer(object): urllib.urlencode(params), ) - def mxc_to_http_filter(self, value, width, height, resize_method="crop"): - if value[0:6] != "mxc://": - return "" - - serverAndMediaId = value[6:] - fragment = None - if '#' in serverAndMediaId: - (serverAndMediaId, fragment) = serverAndMediaId.split('#', 1) - fragment = "#" + fragment - - params = { - "width": width, - "height": height, - "method": resize_method, - } - return "%s_matrix/media/v1/thumbnail/%s?%s%s" % ( - self.hs.config.public_baseurl, - serverAndMediaId, - urllib.urlencode(params), - fragment or "", - ) - def safe_markup(raw_html): return jinja2.Markup(bleach.linkify(bleach.clean( @@ -543,3 +515,52 @@ def string_ordinal_total(s): def format_ts_filter(value, format): return time.strftime(format, time.localtime(value / 1000)) + + +def load_jinja2_templates(config): + """Load the jinja2 email templates from disk + + Returns: + (notif_template_html, notif_template_text) + """ + logger.info("loading jinja2") + + loader = jinja2.FileSystemLoader(config.email_template_dir) + env = jinja2.Environment(loader=loader) + env.filters["format_ts"] = format_ts_filter + env.filters["mxc_to_http"] = _create_mxc_to_http_filter(config) + + notif_template_html = env.get_template( + config.email_notif_template_html + ) + notif_template_text = env.get_template( + config.email_notif_template_text + ) + + return notif_template_html, notif_template_text + + +def _create_mxc_to_http_filter(config): + def mxc_to_http_filter(value, width, height, resize_method="crop"): + if value[0:6] != "mxc://": + return "" + + serverAndMediaId = value[6:] + fragment = None + if '#' in serverAndMediaId: + (serverAndMediaId, fragment) = serverAndMediaId.split('#', 1) + fragment = "#" + fragment + + params = { + "width": width, + "height": height, + "method": resize_method, + } + return "%s_matrix/media/v1/thumbnail/%s?%s%s" % ( + config.public_baseurl, + serverAndMediaId, + urllib.urlencode(params), + fragment or "", + ) + + return mxc_to_http_filter diff --git a/synapse/push/pusher.py b/synapse/push/pusher.py index de9c33b93..9385c80ce 100644 --- a/synapse/push/pusher.py +++ b/synapse/push/pusher.py @@ -26,22 +26,54 @@ logger = logging.getLogger(__name__) # process works fine) try: from synapse.push.emailpusher import EmailPusher + from synapse.push.mailer import Mailer, load_jinja2_templates except: pass -def create_pusher(hs, pusherdict): - logger.info("trying to create_pusher for %r", pusherdict) +class PusherFactory(object): + def __init__(self, hs): + self.hs = hs - PUSHER_TYPES = { - "http": HttpPusher, - } + self.pusher_types = { + "http": HttpPusher, + } - logger.info("email enable notifs: %r", hs.config.email_enable_notifs) - if hs.config.email_enable_notifs: - PUSHER_TYPES["email"] = EmailPusher - logger.info("defined email pusher type") + logger.info("email enable notifs: %r", hs.config.email_enable_notifs) + if hs.config.email_enable_notifs: + self.mailers = {} # app_name -> Mailer - if pusherdict['kind'] in PUSHER_TYPES: - logger.info("found pusher") - return PUSHER_TYPES[pusherdict['kind']](hs, pusherdict) + templates = load_jinja2_templates(hs.config) + self.notif_template_html, self.notif_template_text = templates + + self.pusher_types["email"] = self._create_email_pusher + + logger.info("defined email pusher type") + + def create_pusher(self, pusherdict): + logger.info("trying to create_pusher for %r", pusherdict) + + if pusherdict['kind'] in self.pusher_types: + logger.info("found pusher") + return self.pusher_types[pusherdict['kind']](self.hs, pusherdict) + + def _create_email_pusher(self, pusherdict): + app_name = self._brand_from_pusherdict + mailer = self.mailers.get(app_name) + if not mailer: + mailer = Mailer( + hs=self.hs, + app_name=app_name, + notif_template_html=self.notif_template_html, + notif_template_text=self.notif_template_text, + ) + self.mailers[app_name] = mailer + return EmailPusher(self.hs, pusherdict, mailer) + + def _app_name_from_pusherdict(self, pusherdict): + if 'data' in pusherdict and 'brand' in pusherdict['data']: + app_name = pusherdict['data']['brand'] + else: + app_name = self.hs.config.email_app_name + + return app_name diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py index 3837be523..43cb6e9c0 100644 --- a/synapse/push/pusherpool.py +++ b/synapse/push/pusherpool.py @@ -16,7 +16,7 @@ from twisted.internet import defer -import pusher +from .pusher import PusherFactory from synapse.util.logcontext import preserve_fn, preserve_context_over_deferred from synapse.util.async import run_on_reactor @@ -28,6 +28,7 @@ logger = logging.getLogger(__name__) class PusherPool: def __init__(self, _hs): self.hs = _hs + self.pusher_factory = PusherFactory(_hs) self.start_pushers = _hs.config.start_pushers self.store = self.hs.get_datastore() self.clock = self.hs.get_clock() @@ -48,7 +49,7 @@ class PusherPool: # will then get pulled out of the database, # recreated, added and started: this means we have only one # code path adding pushers. - pusher.create_pusher(self.hs, { + self.pusher_factory.create_pusher({ "id": None, "user_name": user_id, "kind": kind, @@ -186,7 +187,7 @@ class PusherPool: logger.info("Starting %d pushers", len(pushers)) for pusherdict in pushers: try: - p = pusher.create_pusher(self.hs, pusherdict) + p = self.pusher_factory.create_pusher(pusherdict) except: logger.exception("Couldn't start a pusher: caught Exception") continue diff --git a/tests/utils.py b/tests/utils.py index d3d6c8021..4f7e32b3a 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -55,6 +55,7 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): config.password_providers = [] config.worker_replication_url = "" config.worker_app = None + config.email_enable_notifs = False config.use_frozen_dicts = True config.database_config = {"name": "sqlite3"} From 836d5c44b6457a93c0d163e721ded153382d2a79 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 22 May 2017 21:14:20 +0100 Subject: [PATCH 042/139] actually trim oversize og:description meta --- synapse/rest/media/v1/preview_url_resource.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 99760d622..c680fddab 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -434,6 +434,8 @@ def _calc_og(tree, media_uri): for el in _iterate_over_text(tree.find("body"), *TAGS_TO_REMOVE) ) og['og:description'] = summarize_paragraphs(text_nodes) + else: + og['og:description'] = summarize_paragraphs([og['og:description']]) # TODO: delete the url downloads to stop diskfilling, # as we only ever cared about its OG From e6618ece2d7fa857e9649584483e20d2e0a82a81 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 23 May 2017 09:36:52 +0100 Subject: [PATCH 043/139] Missed an invalidation --- synapse/storage/end_to_end_keys.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index ad170c951..f3e5331fd 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -240,6 +240,9 @@ class EndToEndKeyStore(SQLBaseStore): txn.call_after( self.count_e2e_one_time_keys.invalidate, (user_id, device_id,) ) + self._invalidate_cache_and_stream( + txn, self.count_e2e_one_time_keys, (user_id, device_id,) + ) return result return self.runInteraction( "claim_e2e_one_time_keys", _claim_e2e_one_time_keys From 8cf9f0a3e7073f4e8bdddf81d4599f9a9ca7e978 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 23 May 2017 09:46:59 +0100 Subject: [PATCH 044/139] Remove redundant invalidation --- synapse/storage/end_to_end_keys.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index f3e5331fd..2cebb203c 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -237,9 +237,6 @@ class EndToEndKeyStore(SQLBaseStore): ) for user_id, device_id, algorithm, key_id in delete: txn.execute(sql, (user_id, device_id, algorithm, key_id)) - txn.call_after( - self.count_e2e_one_time_keys.invalidate, (user_id, device_id,) - ) self._invalidate_cache_and_stream( txn, self.count_e2e_one_time_keys, (user_id, device_id,) ) From c049472b8ad75d1d9a627803cd698cfe8c5570b8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 24 May 2017 14:22:41 +0100 Subject: [PATCH 045/139] Only store event_auth for state events --- synapse/handlers/federation.py | 20 ++++++++++++----- synapse/storage/event_federation.py | 35 ++++++++++++++++++++++++----- synapse/storage/events.py | 1 + 3 files changed, 44 insertions(+), 12 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 63e633548..a333acc4a 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -832,7 +832,11 @@ class FederationHandler(BaseHandler): @defer.inlineCallbacks def on_event_auth(self, event_id): - auth = yield self.store.get_auth_chain([event_id]) + event = yield self.store.get_event(event_id) + auth = yield self.store.get_auth_chain( + [auth_id for auth_id, _ in event.auth_events], + include_given=True + ) for event in auth: event.signatures.update( @@ -1047,9 +1051,7 @@ class FederationHandler(BaseHandler): yield user_joined_room(self.distributor, user, event.room_id) state_ids = context.prev_state_ids.values() - auth_chain = yield self.store.get_auth_chain(set( - [event.event_id] + state_ids - )) + auth_chain = yield self.store.get_auth_chain(state_ids) state = yield self.store.get_events(context.prev_state_ids.values()) @@ -1598,7 +1600,11 @@ class FederationHandler(BaseHandler): pass # Now get the current auth_chain for the event. - local_auth_chain = yield self.store.get_auth_chain([event_id]) + event = yield self.store.get_event(event_id) + local_auth_chain = yield self.store.get_auth_chain( + [auth_id for auth_id, _ in event.auth_events], + include_given=True + ) # TODO: Check if we would now reject event_id. If so we need to tell # everyone. @@ -1791,7 +1797,9 @@ class FederationHandler(BaseHandler): auth_ids = yield self.auth.compute_auth_events( event, context.prev_state_ids ) - local_auth_chain = yield self.store.get_auth_chain(auth_ids) + local_auth_chain = yield self.store.get_auth_chain( + auth_ids, include_given=True + ) try: # 2. Get remote difference. diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 519059c30..72126c682 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -44,18 +44,41 @@ class EventFederationStore(SQLBaseStore): self._delete_old_forward_extrem_cache, 60 * 60 * 1000 ) - def get_auth_chain(self, event_ids): - return self.get_auth_chain_ids(event_ids).addCallback(self._get_events) + def get_auth_chain(self, event_ids, include_given=False): + """Get auth events for given event_ids. The events *must* be state events. - def get_auth_chain_ids(self, event_ids): + Args: + event_ids (list): state events + include_given (bool): include the given events in result + + Returns: + list of events + """ + return self.get_auth_chain_ids( + event_ids, include_given=include_given, + ).addCallback(self._get_events) + + def get_auth_chain_ids(self, event_ids, include_given=False): + """Get auth events for given event_ids. The events *must* be state events. + + Args: + event_ids (list): state events + include_given (bool): include the given events in result + + Returns: + list of event_ids + """ return self.runInteraction( "get_auth_chain_ids", self._get_auth_chain_ids_txn, - event_ids + event_ids, include_given ) - def _get_auth_chain_ids_txn(self, txn, event_ids): - results = set() + def _get_auth_chain_ids_txn(self, txn, event_ids, include_given): + if include_given: + results = set(event_ids) + else: + results = set() base_sql = ( "SELECT auth_id FROM event_auth WHERE event_id IN (%s)" diff --git a/synapse/storage/events.py b/synapse/storage/events.py index c4aeb4880..3d4f53ea5 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1120,6 +1120,7 @@ class EventsStore(SQLBaseStore): } for event, _ in events_and_contexts for auth_id, _ in event.auth_events + if event.is_state() ], ) From 6e614e9e10d35006707cc6eceafe80c13eb13948 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 24 May 2017 14:58:13 +0100 Subject: [PATCH 046/139] Add background task to clear out old event_auth --- synapse/storage/event_federation.py | 56 +++++++++++++++++++ synapse/storage/prepare_database.py | 2 +- .../schema/delta/42/event_auth_state_only.sql | 17 ++++++ 3 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 synapse/storage/schema/delta/42/event_auth_state_only.sql diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 72126c682..e8133de2f 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -37,9 +37,16 @@ class EventFederationStore(SQLBaseStore): and backfilling from another server respectively. """ + EVENT_AUTH_STATE_ONLY = "event_auth_state_only" + def __init__(self, hs): super(EventFederationStore, self).__init__(hs) + self.register_background_update_handler( + self.EVENT_AUTH_STATE_ONLY, + self._background_delete_non_state_event_auth, + ) + hs.get_clock().looping_call( self._delete_old_forward_extrem_cache, 60 * 60 * 1000 ) @@ -527,3 +534,52 @@ class EventFederationStore(SQLBaseStore): txn.execute(query, (room_id,)) txn.call_after(self.get_latest_event_ids_in_room.invalidate, (room_id,)) + + @defer.inlineCallbacks + def _background_delete_non_state_event_auth(self, progress, batch_size): + def delete_event_auth(txn): + target_min_stream_id = progress.get("target_min_stream_id_inclusive") + max_stream_id = progress.get("max_stream_id_exclusive") + + if not target_min_stream_id or not max_stream_id: + txn.execute("SELECT COALESCE(MIN(stream_ordering), 0) FROM events") + rows = txn.fetchall() + target_min_stream_id = rows[0][0] + + txn.execute("SELECT COALESCE(MAX(stream_ordering), 0) FROM events") + rows = txn.fetchall() + max_stream_id = rows[0][0] + + min_stream_id = max_stream_id - batch_size + + sql = """ + DELETE FROM event_auth + WHERE event_id IN ( + SELECT event_id FROM events + LEFT JOIN state_events USING (room_id, event_id) + WHERE ? <= stream_ordering AND stream_ordering < ? + AND state_key IS null + ) + """ + + txn.execute(sql, (min_stream_id, max_stream_id,)) + + new_progress = { + "target_min_stream_id_inclusive": target_min_stream_id, + "max_stream_id_exclusive": min_stream_id, + } + + self._background_update_progress_txn( + txn, self.EVENT_AUTH_STATE_ONLY, new_progress + ) + + return min_stream_id >= target_min_stream_id + + result = yield self.runInteraction( + self.EVENT_AUTH_STATE_ONLY, delete_event_auth + ) + + if not result: + yield self._end_background_update(self.EVENT_AUTH_STATE_ONLY) + + defer.returnValue(batch_size) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 6e623843d..eaba699e2 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 41 +SCHEMA_VERSION = 42 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/schema/delta/42/event_auth_state_only.sql b/synapse/storage/schema/delta/42/event_auth_state_only.sql new file mode 100644 index 000000000..b8821ac75 --- /dev/null +++ b/synapse/storage/schema/delta/42/event_auth_state_only.sql @@ -0,0 +1,17 @@ +/* Copyright 2017 Vector Creations Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT INTO background_updates (update_name, progress_json) VALUES + ('event_auth_state_only', '{}'); From dbc0dfd2d5f4b09b1151070d55e2826736324f38 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 25 May 2017 14:28:34 +0100 Subject: [PATCH 047/139] Remove unused options --- synapse/state.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/synapse/state.py b/synapse/state.py index 02fee47f3..536e2dc65 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -239,16 +239,9 @@ class StateHandler(object): defer.returnValue(context) logger.debug("calling resolve_state_groups from compute_event_context") - if event.is_state(): - entry = yield self.resolve_state_groups( - event.room_id, [e for e, _ in event.prev_events], - event_type=event.type, - state_key=event.state_key, - ) - else: - entry = yield self.resolve_state_groups( - event.room_id, [e for e, _ in event.prev_events], - ) + entry = yield self.resolve_state_groups( + event.room_id, [e for e, _ in event.prev_events], + ) curr_state = entry.state @@ -284,7 +277,7 @@ class StateHandler(object): @defer.inlineCallbacks @log_function - def resolve_state_groups(self, room_id, event_ids, event_type=None, state_key=""): + def resolve_state_groups(self, room_id, event_ids): """ Given a list of event_ids this method fetches the state at each event, resolves conflicts between them and returns them. From 2b03751c3cf0e64833afd36a1d393a5ce1d67e8d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 25 May 2017 14:47:39 +0100 Subject: [PATCH 048/139] Don't return weird prev_group --- synapse/state.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/synapse/state.py b/synapse/state.py index 536e2dc65..3f93f9e27 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -195,12 +195,12 @@ class StateHandler(object): Returns: synapse.events.snapshot.EventContext: """ - context = EventContext() if event.internal_metadata.is_outlier(): # If this is an outlier, then we know it shouldn't have any current # state. Certainly store.get_current_state won't return any, and # persisting the event won't store the state group. + context = EventContext() if old_state: context.prev_state_ids = { (s.type, s.state_key): s.event_id for s in old_state @@ -219,6 +219,7 @@ class StateHandler(object): defer.returnValue(context) if old_state: + context = EventContext() context.prev_state_ids = { (s.type, s.state_key): s.event_id for s in old_state } @@ -245,6 +246,7 @@ class StateHandler(object): curr_state = entry.state + context = EventContext() context.prev_state_ids = curr_state if event.is_state(): context.state_group = self.store.get_next_state_group() @@ -257,11 +259,14 @@ class StateHandler(object): context.current_state_ids = dict(context.prev_state_ids) context.current_state_ids[key] = event.event_id - context.prev_group = entry.prev_group - context.delta_ids = entry.delta_ids - if context.delta_ids is not None: - context.delta_ids = dict(context.delta_ids) - context.delta_ids[key] = event.event_id + if entry.state_group: + context.prev_group = entry.state_group + context.delta_ids = { + key: event.event_id + } + elif entry.prev_group: + context.prev_group = entry.prev_group + context.delta_ids = entry.delta_ids else: if entry.state_group is None: entry.state_group = self.store.get_next_state_group() @@ -305,8 +310,8 @@ class StateHandler(object): defer.returnValue(_StateCacheEntry( state=state_list, state_group=name, - prev_group=name, - delta_ids={}, + prev_group=None, + delta_ids=None, )) with (yield self.resolve_linearizer.queue(group_names)): From dfbda5e0250adfa762f5491c7efb2666866db034 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 25 May 2017 17:08:41 +0100 Subject: [PATCH 049/139] Faster cache for get_joined_hosts --- synapse/federation/transaction_queue.py | 2 + synapse/replication/slave/storage/events.py | 2 + synapse/state.py | 16 ++-- synapse/storage/roommember.py | 93 ++++++++++++++++----- synapse/storage/state.py | 33 ++++++++ 5 files changed, 117 insertions(+), 29 deletions(-) diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index a15198e05..4c25ef110 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -187,6 +187,8 @@ class TransactionQueue(object): prev_id for prev_id, _ in event.prev_events ], ) + destinations = set(destinations) + logger.info("destinations: %r", destinations) if send_on_behalf_of is not None: # If we are sending the event on behalf of another server diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index fcaf58b93..6cd3a843d 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -108,6 +108,8 @@ class SlavedEventStore(BaseSlavedStore): get_current_state_ids = ( StateStore.__dict__["get_current_state_ids"] ) + get_state_group_delta = DataStore.get_state_group_delta.__func__ + _get_joined_hosts_cache = RoomMemberStore.__dict__["_get_joined_hosts_cache"] has_room_changed_since = DataStore.has_room_changed_since.__func__ get_unread_push_actions_for_user_in_range_for_http = ( diff --git a/synapse/state.py b/synapse/state.py index 3f93f9e27..95dbe02e5 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -170,9 +170,7 @@ class StateHandler(object): latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) logger.debug("calling resolve_state_groups from get_current_user_in_room") entry = yield self.resolve_state_groups(room_id, latest_event_ids) - joined_users = yield self.store.get_joined_users_from_state( - room_id, entry.state_id, entry.state - ) + joined_users = yield self.store.get_joined_users_from_state(room_id, entry) defer.returnValue(joined_users) @defer.inlineCallbacks @@ -181,9 +179,9 @@ class StateHandler(object): latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) logger.debug("calling resolve_state_groups from get_current_hosts_in_room") entry = yield self.resolve_state_groups(room_id, latest_event_ids) - joined_hosts = yield self.store.get_joined_hosts( - room_id, entry.state_id, entry.state - ) + logger.info("State: %r", entry.state_group) + joined_hosts = yield self.store.get_joined_hosts(room_id, entry) + logger.info("returning: %r", joined_hosts) defer.returnValue(joined_hosts) @defer.inlineCallbacks @@ -307,11 +305,13 @@ class StateHandler(object): if len(group_names) == 1: name, state_list = state_groups_ids.items().pop() + prev_group, delta_ids = yield self.store.get_state_group_delta(name) + defer.returnValue(_StateCacheEntry( state=state_list, state_group=name, - prev_group=None, - delta_ids=None, + prev_group=prev_group, + delta_ids=delta_ids, )) with (yield self.resolve_linearizer.queue(group_names)): diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 0829ae5be..8c4a5f9f2 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -18,6 +18,7 @@ from twisted.internet import defer from collections import namedtuple from ._base import SQLBaseStore +from synapse.util.async import Linearizer from synapse.util.caches import intern_string from synapse.util.caches.descriptors import cached, cachedInlineCallbacks from synapse.util.stringutils import to_ascii @@ -392,7 +393,8 @@ class RoomMemberStore(SQLBaseStore): context=context, ) - def get_joined_users_from_state(self, room_id, state_group, state_ids): + def get_joined_users_from_state(self, room_id, state_entry): + state_group = state_entry.state_group if not state_group: # If state_group is None it means it has yet to be assigned a # state group, i.e. we need to make sure that calls with a state_group @@ -401,7 +403,7 @@ class RoomMemberStore(SQLBaseStore): state_group = object() return self._get_joined_users_from_context( - room_id, state_group, state_ids, + room_id, state_group, state_entry.state, context=state_entry, ) @cachedInlineCallbacks(num_args=2, cache_context=True, iterable=True, @@ -534,7 +536,8 @@ class RoomMemberStore(SQLBaseStore): defer.returnValue(False) - def get_joined_hosts(self, room_id, state_group, state_ids): + def get_joined_hosts(self, room_id, state_entry): + state_group = state_entry.state_group if not state_group: # If state_group is None it means it has yet to be assigned a # state group, i.e. we need to make sure that calls with a state_group @@ -543,33 +546,21 @@ class RoomMemberStore(SQLBaseStore): state_group = object() return self._get_joined_hosts( - room_id, state_group, state_ids + room_id, state_group, state_entry.state, state_entry=state_entry, ) @cachedInlineCallbacks(num_args=2, max_entries=10000, iterable=True) - def _get_joined_hosts(self, room_id, state_group, current_state_ids): + # @defer.inlineCallbacks + def _get_joined_hosts(self, room_id, state_group, current_state_ids, state_entry): # We don't use `state_group`, its there so that we can cache based # on it. However, its important that its never None, since two current_state's # with a state_group of None are likely to be different. # See bulk_get_push_rules_for_room for how we work around this. assert state_group is not None - joined_hosts = set() - for etype, state_key in current_state_ids: - if etype == EventTypes.Member: - try: - host = get_domain_from_id(state_key) - except: - logger.warn("state_key not user_id: %s", state_key) - continue - - if host in joined_hosts: - continue - - event_id = current_state_ids[(etype, state_key)] - event = yield self.get_event(event_id, allow_none=True) - if event and event.content["membership"] == Membership.JOIN: - joined_hosts.add(intern_string(host)) + cache = self._get_joined_hosts_cache(room_id) + joined_hosts = yield cache.get_destinations(state_entry) + logger.info("returning: %r", joined_hosts) defer.returnValue(joined_hosts) @@ -647,3 +638,63 @@ class RoomMemberStore(SQLBaseStore): yield self._end_background_update(_MEMBERSHIP_PROFILE_UPDATE_NAME) defer.returnValue(result) + + @cached(max_entries=10000, iterable=True) + def _get_joined_hosts_cache(self, room_id): + return _JoinedHostsCache(self, room_id) + + +class _JoinedHostsCache(object): + def __init__(self, store, room_id): + self.store = store + self.room_id = room_id + + self.hosts_to_joined_users = {} + + self.state_group = object() + + self.linearizer = Linearizer("_JoinedHostsCache") + + self._len = 0 + + @defer.inlineCallbacks + def get_destinations(self, state_entry): + if state_entry.state_group == self.state_group: + defer.returnValue(frozenset(self.hosts_to_joined_users)) + + with (yield self.linearizer.queue(())): + if state_entry.state_group == self.state_group: + pass + elif state_entry.prev_group == self.state_group: + for (typ, state_key), event_id in state_entry.delta_ids.iteritems(): + if typ != EventTypes.Member: + continue + + host = intern_string(get_domain_from_id(state_key)) + user_id = state_key + known_joins = self.hosts_to_joined_users.setdefault(host, set()) + + event = yield self.store.get_event(event_id) + if event.membership == Membership.JOIN: + known_joins.add(user_id) + else: + known_joins.discard(user_id) + + if not known_joins: + self.hosts_to_joined_users.pop(host, None) + else: + joined_users = yield self.store.get_joined_users_from_state( + self.room_id, state_entry, + ) + + self.hosts_to_joined_users = {} + for user_id in joined_users: + host = intern_string(get_domain_from_id(user_id)) + self.hosts_to_joined_users.setdefault(host, set()).add(user_id) + + self.state_group = state_entry.state_group + self._len = sum(len(v) for v in self.hosts_to_joined_users.itervalues()) + defer.returnValue(frozenset(self.hosts_to_joined_users)) + + def __len__(self): + return self._len diff --git a/synapse/storage/state.py b/synapse/storage/state.py index a7c3d401d..01474ff5f 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -98,6 +98,39 @@ class StateStore(SQLBaseStore): _get_current_state_ids_txn, ) + def get_state_group_delta(self, state_group): + def _get_state_group_delta_txn(txn): + prev_group = self._simple_select_one_onecol_txn( + txn, + table="state_group_edges", + keyvalues={ + "state_group": state_group, + }, + retcol="prev_state_group", + allow_none=True, + ) + + if not prev_group: + return None, None + + delta_ids = self._simple_select_list_txn( + txn, + table="state_groups_state", + keyvalues={ + "state_group": state_group, + }, + retcols=("type", "state_key", "event_id",) + ) + + return prev_group, { + (row["type"], row["state_key"]): row["event_id"] + for row in delta_ids + } + return self.runInteraction( + "get_state_group_delta", + _get_state_group_delta_txn, + ) + @defer.inlineCallbacks def get_state_groups_ids(self, room_id, event_ids): if not event_ids: From 23da6383609f9b16e9ea6efd19096516621408be Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 26 May 2017 10:02:04 +0100 Subject: [PATCH 050/139] Fix typing tests --- synapse/handlers/typing.py | 12 +++++------- tests/test_state.py | 2 ++ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index 3b7818af5..82dedbbc9 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -89,7 +89,7 @@ class TypingHandler(object): until = self._member_typing_until.get(member, None) if not until or until <= now: logger.info("Timing out typing for: %s", member.user_id) - preserve_fn(self._stopped_typing)(member) + self._stopped_typing(member) continue # Check if we need to resend a keep alive over federation for this @@ -147,7 +147,7 @@ class TypingHandler(object): # No point sending another notification defer.returnValue(None) - yield self._push_update( + self._push_update( member=member, typing=True, ) @@ -171,7 +171,7 @@ class TypingHandler(object): member = RoomMember(room_id=room_id, user_id=target_user_id) - yield self._stopped_typing(member) + self._stopped_typing(member) @defer.inlineCallbacks def user_left_room(self, user, room_id): @@ -180,7 +180,6 @@ class TypingHandler(object): member = RoomMember(room_id=room_id, user_id=user_id) yield self._stopped_typing(member) - @defer.inlineCallbacks def _stopped_typing(self, member): if member.user_id not in self._room_typing.get(member.room_id, set()): # No point @@ -189,16 +188,15 @@ class TypingHandler(object): self._member_typing_until.pop(member, None) self._member_last_federation_poke.pop(member, None) - yield self._push_update( + self._push_update( member=member, typing=False, ) - @defer.inlineCallbacks def _push_update(self, member, typing): if self.hs.is_mine_id(member.user_id): # Only send updates for changes to our own users. - yield self._push_remote(member, typing) + preserve_fn(self._push_remote)(member, typing) self._push_update_local( member=member, diff --git a/tests/test_state.py b/tests/test_state.py index 6454f994e..feb84f3d4 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -143,6 +143,7 @@ class StateTestCase(unittest.TestCase): "add_event_hashes", "get_events", "get_next_state_group", + "get_state_group_delta", ] ) hs = Mock(spec_set=[ @@ -154,6 +155,7 @@ class StateTestCase(unittest.TestCase): hs.get_auth.return_value = Auth(hs) self.store.get_next_state_group.side_effect = Mock + self.store.get_state_group_delta.return_value = (None, None) self.state = StateHandler(hs) self.event_id = 0 From 619e8ecd0c123602c20eb3a4ce45bdefb79d5900 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 26 May 2017 10:46:03 +0100 Subject: [PATCH 051/139] Handle None state group correctly --- synapse/state.py | 6 +++--- synapse/storage/roommember.py | 5 ++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/synapse/state.py b/synapse/state.py index 95dbe02e5..5fbe0a097 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -364,11 +364,11 @@ class StateHandler(object): prev_group = None delta_ids = None - for old_group, old_ids in state_groups_ids.items(): - if not set(new_state.iterkeys()) - set(old_ids.iterkeys()): + for old_group, old_ids in state_groups_ids.iteritems(): + if not set(new_state) - set(old_ids): n_delta_ids = { k: v - for k, v in new_state.items() + for k, v in new_state.iteritems() if old_ids.get(k) != v } if not delta_ids or len(n_delta_ids) < len(delta_ids): diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 8c4a5f9f2..0e9e71f60 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -692,7 +692,10 @@ class _JoinedHostsCache(object): host = intern_string(get_domain_from_id(user_id)) self.hosts_to_joined_users.setdefault(host, set()).add(user_id) - self.state_group = state_entry.state_group + if state_entry.state_group: + self.state_group = state_entry.state_group + else: + self.state_group = object() self._len = sum(len(v) for v in self.hosts_to_joined_users.itervalues()) defer.returnValue(frozenset(self.hosts_to_joined_users)) From a584a81b3e59e9a4763d81d1a8b893fbd1a45ce0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 30 May 2017 14:41:42 +0100 Subject: [PATCH 052/139] Add current_state_delta_stream table --- synapse/storage/events.py | 31 ++++++++++++++----- .../schema/delta/42/current_state_delta.sql | 25 +++++++++++++++ 2 files changed, 48 insertions(+), 8 deletions(-) create mode 100644 synapse/storage/schema/delta/42/current_state_delta.sql diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 3d4f53ea5..c37a2a6f1 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -648,9 +648,10 @@ class EventsStore(SQLBaseStore): list of the event ids which are the forward extremities. """ - self._update_current_state_txn(txn, current_state_for_room) - max_stream_order = events_and_contexts[-1][0].internal_metadata.stream_ordering + + self._update_current_state_txn(txn, current_state_for_room, max_stream_order) + self._update_forward_extremities_txn( txn, new_forward_extremities=new_forward_extremeties, @@ -713,7 +714,7 @@ class EventsStore(SQLBaseStore): backfilled=backfilled, ) - def _update_current_state_txn(self, txn, state_delta_by_room): + def _update_current_state_txn(self, txn, state_delta_by_room, max_stream_order): for room_id, current_state_tuple in state_delta_by_room.iteritems(): to_delete, to_insert, _ = current_state_tuple txn.executemany( @@ -735,6 +736,24 @@ class EventsStore(SQLBaseStore): ], ) + state_deltas = {key: None for key in to_delete} + state_deltas.update(to_insert) + + self._simple_insert_many_txn( + txn, + table="current_state_delta_stream", + values=[ + { + "stream_id": max_stream_order, + "room_id": room_id, + "type": key[0], + "state_key": key[1], + "event_id": ev_id, + } + for key, ev_id in state_deltas.iteritems() + ] + ) + # Invalidate the various caches # Figure out the changes of membership to invalidate the @@ -743,11 +762,7 @@ class EventsStore(SQLBaseStore): # and which we have added, then we invlidate the caches for all # those users. members_changed = set( - state_key for ev_type, state_key in to_delete.iterkeys() - if ev_type == EventTypes.Member - ) - members_changed.update( - state_key for ev_type, state_key in to_insert.iterkeys() + state_key for ev_type, state_key in state_deltas if ev_type == EventTypes.Member ) diff --git a/synapse/storage/schema/delta/42/current_state_delta.sql b/synapse/storage/schema/delta/42/current_state_delta.sql new file mode 100644 index 000000000..1a55aa912 --- /dev/null +++ b/synapse/storage/schema/delta/42/current_state_delta.sql @@ -0,0 +1,25 @@ +/* Copyright 2017 Vector Creations Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +CREATE TABLE current_state_delta_stream ( + stream_id BIGINT NOT NULL, + room_id TEXT NOT NULL, + type TEXT NOT NULL, + state_key TEXT NOT NULL, + event_id TEXT -- Is null if the key was removed +); + +CREATE INDEX current_state_delta_stream_idx ON current_state_delta_stream(stream_id); From 04095f75810176d7ba2b5ef70b40dd1a3281850d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 30 May 2017 14:53:01 +0100 Subject: [PATCH 053/139] Add clobbered event_id --- synapse/storage/events.py | 1 + synapse/storage/schema/delta/42/current_state_delta.sql | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index c37a2a6f1..dfb57f9d1 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -749,6 +749,7 @@ class EventsStore(SQLBaseStore): "type": key[0], "state_key": key[1], "event_id": ev_id, + "prev_event_id": to_delete.get(key, None), } for key, ev_id in state_deltas.iteritems() ] diff --git a/synapse/storage/schema/delta/42/current_state_delta.sql b/synapse/storage/schema/delta/42/current_state_delta.sql index 1a55aa912..bf124f3de 100644 --- a/synapse/storage/schema/delta/42/current_state_delta.sql +++ b/synapse/storage/schema/delta/42/current_state_delta.sql @@ -19,7 +19,8 @@ CREATE TABLE current_state_delta_stream ( room_id TEXT NOT NULL, type TEXT NOT NULL, state_key TEXT NOT NULL, - event_id TEXT -- Is null if the key was removed + event_id TEXT, -- Is null if the key was removed + prev_event_id TEXT ); CREATE INDEX current_state_delta_stream_idx ON current_state_delta_stream(stream_id); From dd48f7204c511d5ba4438dfe01679bcb7367216d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 30 May 2017 15:01:22 +0100 Subject: [PATCH 054/139] Add comment --- synapse/storage/schema/delta/42/current_state_delta.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/schema/delta/42/current_state_delta.sql b/synapse/storage/schema/delta/42/current_state_delta.sql index bf124f3de..d28851aff 100644 --- a/synapse/storage/schema/delta/42/current_state_delta.sql +++ b/synapse/storage/schema/delta/42/current_state_delta.sql @@ -20,7 +20,7 @@ CREATE TABLE current_state_delta_stream ( type TEXT NOT NULL, state_key TEXT NOT NULL, event_id TEXT, -- Is null if the key was removed - prev_event_id TEXT + prev_event_id TEXT -- Is null if the key was added ); CREATE INDEX current_state_delta_stream_idx ON current_state_delta_stream(stream_id); From eeb2f9e546060ca9f2ef7260220b51d85d9b0d92 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 31 May 2017 11:51:01 +0100 Subject: [PATCH 055/139] Add user_directory to database --- synapse/handlers/user_directory.py | 218 ++++++++++++++++++++ synapse/notifier.py | 6 +- synapse/server.py | 5 + synapse/storage/__init__.py | 2 + synapse/storage/schema/delta/42/user_dir.py | 69 +++++++ synapse/storage/user_directory.py | 145 +++++++++++++ 6 files changed, 444 insertions(+), 1 deletion(-) create mode 100644 synapse/handlers/user_directory.py create mode 100644 synapse/storage/schema/delta/42/user_dir.py create mode 100644 synapse/storage/user_directory.py diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py new file mode 100644 index 000000000..43e917c1a --- /dev/null +++ b/synapse/handlers/user_directory.py @@ -0,0 +1,218 @@ +# -*- coding: utf-8 -*- +# Copyright 2017 Vector Creations Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from twisted.internet import defer + +from synapse.api.constants import EventTypes, JoinRules, Membership +from synapse.storage.roommember import ProfileInfo +from synapse.util.metrics import Measure + + +logger = logging.getLogger(__name__) + + +class UserDirectoyHandler(object): + def __init__(self, hs): + self.store = hs.get_datastore() + self.state = hs.get_state_handler() + self.server_name = hs.hostname + self.clock = hs.get_clock() + + self.initially_handled_users = set() + + self.pos = None + + self._is_processing = False + + @defer.inlineCallbacks + def notify_new_event(self): + if self._is_processing: + return + + self._is_processing = True + try: + yield self._unsafe_process() + finally: + self._is_processing = False + + @defer.inlineCallbacks + def _unsafe_process(self): + if self.pos is None: + self.pos = yield self.store.get_user_directory_stream_pos() + + if self.pos is None: + yield self._do_initial_spam() + self.pos = yield self.store.get_user_directory_stream_pos() + + while True: + with Measure(self.clock, "user_dir_delta"): + deltas = yield self.store.get_current_state_deltas(self.pos) + if not deltas: + return + + yield self._handle_deltas(deltas) + + max_stream_id = deltas[-1]["stream_id"] + yield self.store.update_user_directory_stream_pos(max_stream_id) + + @defer.inlineCallbacks + def _handle_room(self, room_id): + # TODO: Check we're still joined to room + + is_public = yield self.store.is_room_world_readable_or_publicly_joinable(room_id) + if not is_public: + return + + users_with_profile = yield self.state.get_current_user_in_room(room_id) + unhandled_users = set(users_with_profile) - self.initially_handled_users + + yield self.store.add_profiles_to_user_dir( + room_id, { + user_id: users_with_profile[user_id] for user_id in unhandled_users + } + ) + + self.initially_handled_users |= unhandled_users + + @defer.inlineCallbacks + def _do_initial_spam(self): + yield self.store.delete_all_from_user_dir() + + room_ids = yield self.store.get_all_rooms() + + for room_id in room_ids: + yield self._handle_room(room_id) + + self.initially_handled_users = None + + yield self.store.update_user_directory_stream_pos(-1) + + @defer.inlineCallbacks + def _handle_new_user(self, room_id, user_id, profile): + row = yield self.store.get_user_in_directory(user_id) + if row: + return + + yield self.store.add_profiles_to_user_dir(room_id, {user_id: profile}) + + def _handle_remove_user(self, room_id, user_id): + row = yield self.store.get_user_in_directory(user_id) + if not row or row["room_id"] != room_id: + return + + # TODO: Make this faster? + rooms = yield self.store.get_rooms_for_user(user_id) + for room_id in rooms: + is_public = yield self.store.is_room_world_readable_or_publicly_joinable( + room_id + ) + + if is_public: + return + + yield self.store.remove_from_user_dir(user_id) + + @defer.inlineCallbacks + def _handle_deltas(self, deltas): + for delta in deltas: + typ = delta["type"] + state_key = delta["state_key"] + room_id = delta["room_id"] + event_id = delta["event_id"] + prev_event_id = delta["prev_event_id"] + + if typ == EventTypes.RoomHistoryVisibility: + change = yield self._get_key_change( + prev_event_id, event_id, + key_name="history_visibility", + public_value="world_readable", + ) + if change is None: + continue + + users_with_profile = yield self.state.get_current_user_in_room(room_id) + for user_id, profile in users_with_profile.iteritems(): + if change: + yield self._handle_new_user(room_id, user_id, profile) + else: + yield self._handle_remove_user(room_id, user_id) + elif typ == EventTypes.JoinRules: + change = yield self._get_key_change( + prev_event_id, event_id, + key_name="join_rules", + public_value=JoinRules.PUBLIC, + ) + if change is None: + continue + + users_with_profile = yield self.state.get_current_user_in_room(room_id) + for user_id, profile in users_with_profile.iteritems(): + if change: + yield self._handle_new_user(room_id, user_id, profile) + else: + yield self._handle_remove_user(room_id, user_id) + elif typ == EventTypes.Member: + change = yield self._get_key_change( + prev_event_id, event_id, + key_name="membership", + public_value=Membership.JOIN, + ) + + if change is None: + continue + + if change: + event = yield self.store.get_event(event_id) + profile = ProfileInfo( + avatar_url=event.content.get("avatar_url"), + display_name=event.content.get("displayname"), + ) + + yield self._handle_new_user(room_id, state_key, profile) + else: + yield self._handle_remove_user(room_id, state_key) + + @defer.inlineCallbacks + def _get_key_change(self, prev_event_id, event_id, key_name, public_value): + prev_event = None + event = None + if prev_event_id: + prev_event = yield self.store.get_event(prev_event_id, allow_none=True) + + if event_id: + event = yield self.store.get_event(event_id, allow_none=True) + + if not event and not prev_event: + defer.returnValue(None) + + prev_hist_vis = None + hist_vis = None + + if prev_event: + prev_hist_vis = prev_event.content.get(key_name, None) + + if event: + hist_vis = event.content.get(key_name, None) + + logger.info("prev: %r, new: %r", prev_hist_vis, hist_vis) + + if hist_vis == public_value and prev_hist_vis != public_value: + defer.returnValue(True) + elif hist_vis != public_value and prev_hist_vis == public_value: + defer.returnValue(False) + else: + defer.returnValue(None) diff --git a/synapse/notifier.py b/synapse/notifier.py index 48566187a..6b1709d70 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -167,6 +167,7 @@ class Notifier(object): self.clock = hs.get_clock() self.appservice_handler = hs.get_application_service_handler() + self.user_directory_handler = hs.get_user_directory_handler() if hs.should_send_federation(): self.federation_sender = hs.get_federation_sender() @@ -251,7 +252,10 @@ class Notifier(object): """Notify any user streams that are interested in this room event""" # poke any interested application service. preserve_fn(self.appservice_handler.notify_interested_services)( - room_stream_id) + room_stream_id + ) + + preserve_fn(self.user_directory_handler.notify_new_event)() if self.federation_sender: preserve_fn(self.federation_sender.notify_new_events)( diff --git a/synapse/server.py b/synapse/server.py index e400e278c..a38e5179e 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -49,6 +49,7 @@ from synapse.handlers.events import EventHandler, EventStreamHandler from synapse.handlers.initial_sync import InitialSyncHandler from synapse.handlers.receipts import ReceiptsHandler from synapse.handlers.read_marker import ReadMarkerHandler +from synapse.handlers.user_directory import UserDirectoyHandler from synapse.http.client import SimpleHttpClient, InsecureInterceptableContextFactory from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.notifier import Notifier @@ -137,6 +138,7 @@ class HomeServer(object): 'tcp_replication', 'read_marker_handler', 'action_generator', + 'user_directory_handler', ] def __init__(self, hostname, **kwargs): @@ -304,6 +306,9 @@ class HomeServer(object): def build_action_generator(self): return ActionGenerator(self) + def build_user_directory_handler(self): + return UserDirectoyHandler(self) + def remove_pusher(self, app_id, push_key, user_id): return self.get_pusherpool().remove_pusher(app_id, push_key, user_id) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index d604e7668..11655bf60 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -49,6 +49,7 @@ from .tags import TagsStore from .account_data import AccountDataStore from .openid import OpenIdStore from .client_ips import ClientIpStore +from .user_directory import UserDirectoryStore from .util.id_generators import IdGenerator, StreamIdGenerator, ChainedIdGenerator from .engines import PostgresEngine @@ -86,6 +87,7 @@ class DataStore(RoomMemberStore, RoomStore, ClientIpStore, DeviceStore, DeviceInboxStore, + UserDirectoryStore, ): def __init__(self, db_conn, hs): diff --git a/synapse/storage/schema/delta/42/user_dir.py b/synapse/storage/schema/delta/42/user_dir.py new file mode 100644 index 000000000..38538960a --- /dev/null +++ b/synapse/storage/schema/delta/42/user_dir.py @@ -0,0 +1,69 @@ +# Copyright 2017 Vector Creations Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from synapse.storage.prepare_database import get_statements +from synapse.storage.engines import PostgresEngine, Sqlite3Engine + +logger = logging.getLogger(__name__) + + +BOTH_TABLES = """ +CREATE TABLE user_directory_stream_pos ( + Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, -- Makes sure this table only has one row. + stream_id BIGINT, + CHECK (Lock='X') +); + +INSERT INTO user_directory_stream_pos (stream_id) VALUES (null); +""" + + +POSTGRES_TABLE = """ +CREATE TABLE user_directory ( + user_id TEXT NOT NULL, + room_id TEXT NOT NULL, + display_name TEXT, + avatar_url TEXT, + vector tsvector +); + +CREATE INDEX user_directory_fts_idx ON user_directory USING gin(vector); +CREATE INDEX user_directory_user_idx ON user_directory(user_id); +""" + + +SQLITE_TABLE = """ +CREATE VIRTUAL TABLE user_directory + USING fts4 ( user_id, room_id, display_name, avatar_url, value ); +""" + + +def run_create(cur, database_engine, *args, **kwargs): + for statement in get_statements(BOTH_TABLES.splitlines()): + cur.execute(statement) + + if isinstance(database_engine, PostgresEngine): + for statement in get_statements(POSTGRES_TABLE.splitlines()): + cur.execute(statement) + elif isinstance(database_engine, Sqlite3Engine): + for statement in get_statements(SQLITE_TABLE.splitlines()): + cur.execute(statement) + else: + raise Exception("Unrecognized database engine") + + +def run_upgrade(*args, **kwargs): + pass diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py new file mode 100644 index 000000000..6c7c8c4be --- /dev/null +++ b/synapse/storage/user_directory.py @@ -0,0 +1,145 @@ +# -*- coding: utf-8 -*- +# Copyright 2017 Vector Creations Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + +from ._base import SQLBaseStore +from synapse.util.caches.descriptors import cached, cachedInlineCallbacks +from synapse.api.constants import EventTypes, JoinRules +from synapse.storage.engines import PostgresEngine, Sqlite3Engine + + +class UserDirectoryStore(SQLBaseStore): + + @cachedInlineCallbacks(cache_context=True) + def is_room_world_readable_or_publicly_joinable(self, room_id, cache_context): + current_state_ids = yield self.get_current_state_ids( + room_id, on_invalidate=cache_context.invalidate + ) + + join_rules_id = current_state_ids.get((EventTypes.JoinRules, "")) + if join_rules_id: + join_rule_ev = yield self.get_event(join_rules_id, allow_none=True) + if join_rule_ev: + if join_rule_ev.content.get("join_rules") == JoinRules.PUBLIC: + defer.returnValue(True) + + hist_vis_id = current_state_ids.get((EventTypes.RoomHistoryVisibility, "")) + if hist_vis_id: + hist_vis_ev = yield self.get_event(hist_vis_id, allow_none=True) + if hist_vis_ev: + if hist_vis_ev.content.get("history_visibility") == "world_readable": + defer.returnValue(True) + + defer.returnValue(False) + + def add_profiles_to_user_dir(self, room_id, users_with_profile): + if isinstance(self.database_engine, PostgresEngine): + sql = """ + INSERT INTO user_directory + (user_id, room_id, display_name, avatar_url, vector) + VALUES (?,?,?,?,to_tsvector('english', ?)) + """ + elif isinstance(self.database_engine, Sqlite3Engine): + sql = """ + INSERT INTO user_directory + (user_id, room_id, display_name, avatar_url, value) + VALUES (?,?,?,?,?) + """ + else: + # This should be unreachable. + raise Exception("Unrecognized database engine") + + def _add_profiles_to_user_dir_txn(txn): + txn.executemany(sql, ( + ( + user_id, room_id, p.display_name, p.avatar_url, + "%s %s" % (user_id, p.display_name,) if p.display_name else user_id + ) + for user_id, p in users_with_profile.iteritems() + )) + for user_id in users_with_profile: + txn.call_after( + self.get_user_in_directory.invalidate, (user_id,) + ) + + return self.runInteraction( + "add_profiles_to_user_dir", _add_profiles_to_user_dir_txn + ) + + @defer.inlineCallbacks + def remove_from_user_dir(self, user_id): + yield self._simple_delete( + table="user_directory", + keyvalues={"user_id": user_id}, + desc="remove_from_user_dir", + ) + self.get_user_in_directory.invalidate((user_id,)) + + def get_all_rooms(self): + return self._simple_select_onecol( + table="current_state_events", + keyvalues={}, + retcol="DISTINCT room_id", + desc="get_all_rooms", + ) + + def delete_all_from_user_dir(self): + def _delete_all_from_user_dir_txn(txn): + txn.execute("DELETE FROM user_directory") + txn.call_after(self.get_user_in_directory.invalidate_all) + return self.runInteraction( + "delete_all_from_user_dir", _delete_all_from_user_dir_txn + ) + + @cached() + def get_user_in_directory(self, user_id): + return self._simple_select_one( + table="user_directory", + keyvalues={"user_id": user_id}, + retcols=("room_id", "display_name", "avatar_url",), + allow_none=True, + desc="get_user_in_directory", + ) + + def get_user_directory_stream_pos(self): + return self._simple_select_one_onecol( + table="user_directory_stream_pos", + keyvalues={}, + retcol="stream_id", + desc="get_user_directory_stream_pos", + ) + + def update_user_directory_stream_pos(self, stream_id): + return self._simple_update_one( + table="user_directory_stream_pos", + keyvalues={}, + updatevalues={"stream_id": stream_id}, + desc="update_user_directory_stream_pos", + ) + + def get_current_state_deltas(self, prev_stream_id): + # TODO: Add stream change cache + # TODO: Add limit + sql = """ + SELECT stream_id, room_id, type, state_key, event_id, prev_event_id + FROM current_state_delta_stream + WHERE stream_id > ? + ORDER BY stream_id ASC + """ + + return self._execute( + "get_current_state_deltas", self.cursor_to_dict, sql, prev_stream_id + ) From 42137efde7aeb350e203fe19b5a661c2b27f208f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 31 May 2017 11:55:13 +0100 Subject: [PATCH 056/139] Don't go round in circles --- synapse/handlers/user_directory.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 43e917c1a..4b1b7df74 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -66,8 +66,8 @@ class UserDirectoyHandler(object): yield self._handle_deltas(deltas) - max_stream_id = deltas[-1]["stream_id"] - yield self.store.update_user_directory_stream_pos(max_stream_id) + self.pos = deltas[-1]["stream_id"] + yield self.store.update_user_directory_stream_pos(self.pos) @defer.inlineCallbacks def _handle_room(self, room_id): @@ -208,8 +208,6 @@ class UserDirectoyHandler(object): if event: hist_vis = event.content.get(key_name, None) - logger.info("prev: %r, new: %r", prev_hist_vis, hist_vis) - if hist_vis == public_value and prev_hist_vis != public_value: defer.returnValue(True) elif hist_vis != public_value and prev_hist_vis == public_value: From 3e123b84977f84f4c60ebafadda9b381baa9d00f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 31 May 2017 11:56:27 +0100 Subject: [PATCH 057/139] Start later --- synapse/handlers/user_directory.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 4b1b7df74..87c467f09 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -90,6 +90,9 @@ class UserDirectoyHandler(object): @defer.inlineCallbacks def _do_initial_spam(self): + # TODO: pull from current delta stream_id + new_pos = self.store.get_room_max_stream_ordering() + yield self.store.delete_all_from_user_dir() room_ids = yield self.store.get_all_rooms() @@ -99,7 +102,7 @@ class UserDirectoyHandler(object): self.initially_handled_users = None - yield self.store.update_user_directory_stream_pos(-1) + yield self.store.update_user_directory_stream_pos(new_pos) @defer.inlineCallbacks def _handle_new_user(self, room_id, user_id, profile): From 168524543f70f7c3dc113b21fc704af65d832bf8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 31 May 2017 11:59:36 +0100 Subject: [PATCH 058/139] Add call later --- synapse/handlers/user_directory.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 87c467f09..e9488ce55 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -38,6 +38,8 @@ class UserDirectoyHandler(object): self._is_processing = False + self.clock.call_later(0, self.notify_new_event) + @defer.inlineCallbacks def notify_new_event(self): if self._is_processing: From b5db4ed5f68ee81557393e94436d768b955b1aa0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 31 May 2017 13:30:12 +0100 Subject: [PATCH 059/139] Update room column when room becomes unpublic --- synapse/handlers/user_directory.py | 23 +++++++++++++++++++++-- synapse/storage/user_directory.py | 10 ++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index e9488ce55..0cf403f59 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -121,12 +121,13 @@ class UserDirectoyHandler(object): # TODO: Make this faster? rooms = yield self.store.get_rooms_for_user(user_id) - for room_id in rooms: + for j_room_id in rooms: is_public = yield self.store.is_room_world_readable_or_publicly_joinable( - room_id + j_room_id ) if is_public: + yield self.store.update_user_in_user_dir(user_id, j_room_id) return yield self.store.remove_from_user_dir(user_id) @@ -149,6 +150,15 @@ class UserDirectoyHandler(object): if change is None: continue + is_public = yield self.store.is_room_world_readable_or_publicly_joinable( + room_id + ) + + if change and is_public: + continue + elif not change and not is_public: + continue + users_with_profile = yield self.state.get_current_user_in_room(room_id) for user_id, profile in users_with_profile.iteritems(): if change: @@ -164,6 +174,15 @@ class UserDirectoyHandler(object): if change is None: continue + is_public = yield self.store.is_room_world_readable_or_publicly_joinable( + room_id + ) + + if change and is_public: + continue + elif not change and not is_public: + continue + users_with_profile = yield self.state.get_current_user_in_room(room_id) for user_id, profile in users_with_profile.iteritems(): if change: diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 6c7c8c4be..d72b93b58 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -79,6 +79,16 @@ class UserDirectoryStore(SQLBaseStore): "add_profiles_to_user_dir", _add_profiles_to_user_dir_txn ) + @defer.inlineCallbacks + def update_user_in_user_dir(self, user_id, room_id): + yield self._simple_update_one( + table="user_directory", + keyvalues={"user_id": user_id}, + updatevalues={"room_id": room_id}, + desc="update_user_in_user_dir", + ) + self.get_user_in_directory.invalidate((user_id,)) + @defer.inlineCallbacks def remove_from_user_dir(self, user_id): yield self._simple_delete( From 3b5f22ca40303392e45c8407952ecf3ee15785f6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 31 May 2017 14:00:01 +0100 Subject: [PATCH 060/139] Add search --- synapse/handlers/user_directory.py | 3 +++ synapse/storage/user_directory.py | 35 ++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 0cf403f59..4a9565df9 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -40,6 +40,9 @@ class UserDirectoyHandler(object): self.clock.call_later(0, self.notify_new_event) + def search_users(self, search_term, limit): + return self.store.search_user_dir(search_term, limit) + @defer.inlineCallbacks def notify_new_event(self): if self._is_processing: diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index d72b93b58..650c49982 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -153,3 +153,38 @@ class UserDirectoryStore(SQLBaseStore): return self._execute( "get_current_state_deltas", self.cursor_to_dict, sql, prev_stream_id ) + + @defer.inlineCallbacks + def search_user_dir(self, search_term, limit): + if isinstance(self.database_engine, PostgresEngine): + sql = """ + SELECT user_id, display_name, avatar_url + FROM user_directory + WHERE vector @@ to_tsquery('english', ?) + ORDER BY ts_rank_cd(vector, to_tsquery('english', ?)) DESC + LIMIT ? + """ + args = (search_term, search_term, limit + 1,) + elif isinstance(self.database_engine, Sqlite3Engine): + sql = """ + SELECT user_id, display_name, avatar_url + FROM user_directory + WHERE value MATCH ? + ORDER BY rank(matchinfo(user_directory)) DESC + LIMIT ? + """ + args = (search_term, limit + 1) + else: + # This should be unreachable. + raise Exception("Unrecognized database engine") + + results = yield self._execute( + "search_user_dir", self.cursor_to_dict, sql, *args + ) + + limited = len(results) > limit + + defer.returnValue({ + "limited": limited, + "results": results, + }) From 45a5df59147d9c5c4f2cdacaf92179e5935cd68a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 31 May 2017 14:11:55 +0100 Subject: [PATCH 061/139] Add REST API --- synapse/rest/__init__.py | 2 + .../rest/client/v2_alpha/user_directory.py | 59 +++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 synapse/rest/client/v2_alpha/user_directory.py diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py index aa8d874f9..3d809d181 100644 --- a/synapse/rest/__init__.py +++ b/synapse/rest/__init__.py @@ -51,6 +51,7 @@ from synapse.rest.client.v2_alpha import ( devices, thirdparty, sendtodevice, + user_directory, ) from synapse.http.server import JsonResource @@ -100,3 +101,4 @@ class ClientRestResource(JsonResource): devices.register_servlets(hs, client_resource) thirdparty.register_servlets(hs, client_resource) sendtodevice.register_servlets(hs, client_resource) + user_directory.register_servlets(hs, client_resource) diff --git a/synapse/rest/client/v2_alpha/user_directory.py b/synapse/rest/client/v2_alpha/user_directory.py new file mode 100644 index 000000000..f1bae0b34 --- /dev/null +++ b/synapse/rest/client/v2_alpha/user_directory.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +# Copyright 2017 Vector Creations Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from twisted.internet import defer + +from synapse.api.errors import SynapseError +from synapse.http.servlet import RestServlet, parse_json_object_from_request +from ._base import client_v2_patterns + +logger = logging.getLogger(__name__) + + +class UserDirectorySearchRestServlet(RestServlet): + PATTERNS = client_v2_patterns("/user_directory/search$") + + def __init__(self, hs): + """ + Args: + hs (synapse.server.HomeServer): server + """ + super(UserDirectorySearchRestServlet, self).__init__() + self.hs = hs + self.auth = hs.get_auth() + self.user_directory_handler = hs.get_user_directory_handler() + + @defer.inlineCallbacks + def on_GET(self, request): + yield self.auth.get_user_by_req(request, allow_guest=False) + body = parse_json_object_from_request(request) + + limit = body.get("limit", 10) + limit = min(limit, 50) + + try: + search_term = body["search_term"] + except: + raise SynapseError(400, "`search_term` is required field") + + results = yield self.user_directory_handler.search_users(search_term, limit) + + defer.returnValue((200, results)) + + +def register_servlets(hs, http_server): + UserDirectorySearchRestServlet(hs).register(http_server) From 535c99f157a76b7d2a27393ce62268d0cef4abef Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 31 May 2017 14:15:45 +0100 Subject: [PATCH 062/139] Use POST --- synapse/rest/client/v2_alpha/user_directory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/client/v2_alpha/user_directory.py b/synapse/rest/client/v2_alpha/user_directory.py index f1bae0b34..fe9120719 100644 --- a/synapse/rest/client/v2_alpha/user_directory.py +++ b/synapse/rest/client/v2_alpha/user_directory.py @@ -38,7 +38,7 @@ class UserDirectorySearchRestServlet(RestServlet): self.user_directory_handler = hs.get_user_directory_handler() @defer.inlineCallbacks - def on_GET(self, request): + def on_POST(self, request): yield self.auth.get_user_by_req(request, allow_guest=False) body = parse_json_object_from_request(request) From 293ef296559fa5bb721592bfa9605f7282df0f6e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 31 May 2017 14:29:32 +0100 Subject: [PATCH 063/139] Weight differently --- synapse/storage/user_directory.py | 34 ++++++++++++++++++++++--------- synapse/types.py | 7 +++++++ 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 650c49982..ebcc8b963 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -19,6 +19,7 @@ from ._base import SQLBaseStore from synapse.util.caches.descriptors import cached, cachedInlineCallbacks from synapse.api.constants import EventTypes, JoinRules from synapse.storage.engines import PostgresEngine, Sqlite3Engine +from synapse.types import get_domain_from_id, get_localpart_from_id class UserDirectoryStore(SQLBaseStore): @@ -50,26 +51,39 @@ class UserDirectoryStore(SQLBaseStore): sql = """ INSERT INTO user_directory (user_id, room_id, display_name, avatar_url, vector) - VALUES (?,?,?,?,to_tsvector('english', ?)) + VALUES (?,?,?,?, + setweight(to_tsvector('english', ?), 'A') + || to_tsvector('english', ?) + || to_tsvector('english', COALESCE(?, '')) + ) """ + args = ( + ( + user_id, room_id, p.display_name, p.avatar_url, + get_localpart_from_id(user_id), get_domain_from_id(user_id), + p.display_name, + ) + for user_id, p in users_with_profile.iteritems() + ) elif isinstance(self.database_engine, Sqlite3Engine): sql = """ INSERT INTO user_directory (user_id, room_id, display_name, avatar_url, value) VALUES (?,?,?,?,?) """ - else: - # This should be unreachable. - raise Exception("Unrecognized database engine") - - def _add_profiles_to_user_dir_txn(txn): - txn.executemany(sql, ( + args = ( ( user_id, room_id, p.display_name, p.avatar_url, "%s %s" % (user_id, p.display_name,) if p.display_name else user_id ) for user_id, p in users_with_profile.iteritems() - )) + ) + else: + # This should be unreachable. + raise Exception("Unrecognized database engine") + + def _add_profiles_to_user_dir_txn(txn): + txn.executemany(sql, args) for user_id in users_with_profile: txn.call_after( self.get_user_in_directory.invalidate, (user_id,) @@ -160,8 +174,8 @@ class UserDirectoryStore(SQLBaseStore): sql = """ SELECT user_id, display_name, avatar_url FROM user_directory - WHERE vector @@ to_tsquery('english', ?) - ORDER BY ts_rank_cd(vector, to_tsquery('english', ?)) DESC + WHERE vector @@ plainto_tsquery('english', ?) + ORDER BY ts_rank_cd(vector, plainto_tsquery('english', ?)) DESC LIMIT ? """ args = (search_term, search_term, limit + 1,) diff --git a/synapse/types.py b/synapse/types.py index 445bdcb4d..111948540 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -62,6 +62,13 @@ def get_domain_from_id(string): return string[idx + 1:] +def get_localpart_from_id(string): + idx = string.find(":") + if idx == -1: + raise SynapseError(400, "Invalid ID: %r" % (string,)) + return string[1:idx] + + class DomainSpecificString( namedtuple("DomainSpecificString", ("localpart", "domain")) ): From 63fda37e20015f0fe56aed86f907035d42fdc2ca Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 31 May 2017 15:00:29 +0100 Subject: [PATCH 064/139] Add comments --- synapse/handlers/user_directory.py | 169 +++++++++++++----- .../rest/client/v2_alpha/user_directory.py | 16 ++ synapse/storage/schema/delta/42/user_dir.py | 2 +- synapse/storage/user_directory.py | 39 +++- 4 files changed, 177 insertions(+), 49 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 4a9565df9..88b79e332 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -26,25 +26,54 @@ logger = logging.getLogger(__name__) class UserDirectoyHandler(object): + """Handles querying of and keeping updated the user_directory. + + N.B.: ASSUMES IT IS THE ONLY THING THAT MODIFIES THE USER DIRECTORY + """ + def __init__(self, hs): self.store = hs.get_datastore() self.state = hs.get_state_handler() self.server_name = hs.hostname self.clock = hs.get_clock() + # When start up for the first time we need to populate the user_directory. + # This is a set of user_id's we've inserted already self.initially_handled_users = set() + # The current position in the current_state_delta stream self.pos = None + # Guard to ensure we only process deltas one at a time self._is_processing = False + # We kick this off so that we don't have to wait for a change before + # we start populating the user directory self.clock.call_later(0, self.notify_new_event) def search_users(self, search_term, limit): + """Searches for users in directory + + Returns: + dict of the form:: + + { + "limited": , # whether there were more results or not + "results": [ # Ordered by best match first + { + "user_id": , + "display_name": , + "avatar_url": + } + ] + } + """ return self.store.search_user_dir(search_term, limit) @defer.inlineCallbacks def notify_new_event(self): + """Called when there may be more deltas to process + """ if self._is_processing: return @@ -56,13 +85,16 @@ class UserDirectoyHandler(object): @defer.inlineCallbacks def _unsafe_process(self): + # If self.pos is None then means we haven't fetched it from DB if self.pos is None: self.pos = yield self.store.get_user_directory_stream_pos() + # If still None then we need to do the initial fill of directory if self.pos is None: yield self._do_initial_spam() self.pos = yield self.store.get_user_directory_stream_pos() + # Loop round handling deltas until we're up to date while True: with Measure(self.clock, "user_dir_delta"): deltas = yield self.store.get_current_state_deltas(self.pos) @@ -75,7 +107,31 @@ class UserDirectoyHandler(object): yield self.store.update_user_directory_stream_pos(self.pos) @defer.inlineCallbacks - def _handle_room(self, room_id): + def _do_initial_spam(self): + """Populates the user_directory from the current state of the DB, used + when synapse first starts with user_directory support + """ + + # TODO: pull from current delta stream_id + new_pos = self.store.get_room_max_stream_ordering() + + # Delete any existing entries just in case there are any + yield self.store.delete_all_from_user_dir() + + # We process by going through each existing room at a time. + room_ids = yield self.store.get_all_rooms() + + for room_id in room_ids: + yield self._handle_intial_room(room_id) + + self.initially_handled_users = None + + yield self.store.update_user_directory_stream_pos(new_pos) + + @defer.inlineCallbacks + def _handle_intial_room(self, room_id): + """Called when we initially fill out user_directory one room at a time + """ # TODO: Check we're still joined to room is_public = yield self.store.is_room_world_readable_or_publicly_joinable(room_id) @@ -93,50 +149,10 @@ class UserDirectoyHandler(object): self.initially_handled_users |= unhandled_users - @defer.inlineCallbacks - def _do_initial_spam(self): - # TODO: pull from current delta stream_id - new_pos = self.store.get_room_max_stream_ordering() - - yield self.store.delete_all_from_user_dir() - - room_ids = yield self.store.get_all_rooms() - - for room_id in room_ids: - yield self._handle_room(room_id) - - self.initially_handled_users = None - - yield self.store.update_user_directory_stream_pos(new_pos) - - @defer.inlineCallbacks - def _handle_new_user(self, room_id, user_id, profile): - row = yield self.store.get_user_in_directory(user_id) - if row: - return - - yield self.store.add_profiles_to_user_dir(room_id, {user_id: profile}) - - def _handle_remove_user(self, room_id, user_id): - row = yield self.store.get_user_in_directory(user_id) - if not row or row["room_id"] != room_id: - return - - # TODO: Make this faster? - rooms = yield self.store.get_rooms_for_user(user_id) - for j_room_id in rooms: - is_public = yield self.store.is_room_world_readable_or_publicly_joinable( - j_room_id - ) - - if is_public: - yield self.store.update_user_in_user_dir(user_id, j_room_id) - return - - yield self.store.remove_from_user_dir(user_id) - @defer.inlineCallbacks def _handle_deltas(self, deltas): + """Called with the state deltas to process + """ for delta in deltas: typ = delta["type"] state_key = delta["state_key"] @@ -144,22 +160,33 @@ class UserDirectoyHandler(object): event_id = delta["event_id"] prev_event_id = delta["prev_event_id"] + # For join rule and visibility changes we need to check if the room + # may have become public or not and add/remove the users in said room if typ == EventTypes.RoomHistoryVisibility: change = yield self._get_key_change( prev_event_id, event_id, key_name="history_visibility", public_value="world_readable", ) + + # If change is None, no change. True => become world readable, + # False => was world readable if change is None: continue + # There's been a change to or from being world readable. + is_public = yield self.store.is_room_world_readable_or_publicly_joinable( room_id ) - if change and is_public: + if change and not is_public: + # If we became world readable but room isn't currently public then + # we ignore the change continue - elif not change and not is_public: + elif not change and is_public: + # If we stopped being world readable but are still public, + # ignore the change continue users_with_profile = yield self.state.get_current_user_in_room(room_id) @@ -213,8 +240,60 @@ class UserDirectoyHandler(object): else: yield self._handle_remove_user(room_id, state_key) + @defer.inlineCallbacks + def _handle_new_user(self, room_id, user_id, profile): + """Called when we might need to add user to directory + + Args: + room_id (str): room_id that user joined or started being public that + user_id (str) + """ + row = yield self.store.get_user_in_directory(user_id) + if row: + return + + yield self.store.add_profiles_to_user_dir(room_id, {user_id: profile}) + + def _handle_remove_user(self, room_id, user_id): + """Called when we might need to remove user to directory + + Args: + room_id (str): room_id that user left or stopped being public that + user_id (str) + """ + row = yield self.store.get_user_in_directory(user_id) + if not row or row["room_id"] != room_id: + # Either the user wasn't in directory or we're still in a room that + # is public (i.e. the room_id in the database) + return + + # TODO: Make this faster? + rooms = yield self.store.get_rooms_for_user(user_id) + for j_room_id in rooms: + is_public = yield self.store.is_room_world_readable_or_publicly_joinable( + j_room_id + ) + + if is_public: + yield self.store.update_user_in_user_dir(user_id, j_room_id) + return + + yield self.store.remove_from_user_dir(user_id) + @defer.inlineCallbacks def _get_key_change(self, prev_event_id, event_id, key_name, public_value): + """Given two events check if the `key_name` field in content changed + from not matching `public_value` to doing so. + + For example, check if `history_visibility` (`key_name`) changed from + `shared` to `world_readable` (`public_value`). + + Returns: + None if the field in the events either both match `public_value` o + neither do, i.e. there has been no change. + True if it didnt match `public_value` but now does + Falsse if it did match `public_value` but now doesn't + """ prev_event = None event = None if prev_event_id: diff --git a/synapse/rest/client/v2_alpha/user_directory.py b/synapse/rest/client/v2_alpha/user_directory.py index fe9120719..17d3dffc8 100644 --- a/synapse/rest/client/v2_alpha/user_directory.py +++ b/synapse/rest/client/v2_alpha/user_directory.py @@ -39,6 +39,22 @@ class UserDirectorySearchRestServlet(RestServlet): @defer.inlineCallbacks def on_POST(self, request): + """Searches for users in directory + + Returns: + dict of the form:: + + { + "limited": , # whether there were more results or not + "results": [ # Ordered by best match first + { + "user_id": , + "display_name": , + "avatar_url": + } + ] + } + """ yield self.auth.get_user_by_req(request, allow_guest=False) body = parse_json_object_from_request(request) diff --git a/synapse/storage/schema/delta/42/user_dir.py b/synapse/storage/schema/delta/42/user_dir.py index 38538960a..57b89ba55 100644 --- a/synapse/storage/schema/delta/42/user_dir.py +++ b/synapse/storage/schema/delta/42/user_dir.py @@ -34,7 +34,7 @@ INSERT INTO user_directory_stream_pos (stream_id) VALUES (null); POSTGRES_TABLE = """ CREATE TABLE user_directory ( user_id TEXT NOT NULL, - room_id TEXT NOT NULL, + room_id TEXT NOT NULL, -- A room_id that we know is public display_name TEXT, avatar_url TEXT, vector tsvector diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index ebcc8b963..83812bf09 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -26,6 +26,8 @@ class UserDirectoryStore(SQLBaseStore): @cachedInlineCallbacks(cache_context=True) def is_room_world_readable_or_publicly_joinable(self, room_id, cache_context): + """Check if the room is either world_readable or publically joinable + """ current_state_ids = yield self.get_current_state_ids( room_id, on_invalidate=cache_context.invalidate ) @@ -47,14 +49,24 @@ class UserDirectoryStore(SQLBaseStore): defer.returnValue(False) def add_profiles_to_user_dir(self, room_id, users_with_profile): + """Add profiles to the user directory + + Args: + room_id (str): A room_id that all users are in that is world_readable + or publically joinable + users_with_profile (dict): Users to add to directory in the form of + mapping of user_id -> ProfileInfo + """ if isinstance(self.database_engine, PostgresEngine): + # We weight the loclpart most highly, then display name and finally + # server name sql = """ INSERT INTO user_directory (user_id, room_id, display_name, avatar_url, vector) VALUES (?,?,?,?, setweight(to_tsvector('english', ?), 'A') - || to_tsvector('english', ?) - || to_tsvector('english', COALESCE(?, '')) + || setweight(to_tsvector('english', ?), 'D') + || setweight(to_tsvector('english', COALESCE(?, '')), 'B') ) """ args = ( @@ -113,6 +125,8 @@ class UserDirectoryStore(SQLBaseStore): self.get_user_in_directory.invalidate((user_id,)) def get_all_rooms(self): + """Get all room_ids we've ever known about + """ return self._simple_select_onecol( table="current_state_events", keyvalues={}, @@ -121,6 +135,8 @@ class UserDirectoryStore(SQLBaseStore): ) def delete_all_from_user_dir(self): + """Delete the entire user directory + """ def _delete_all_from_user_dir_txn(txn): txn.execute("DELETE FROM user_directory") txn.call_after(self.get_user_in_directory.invalidate_all) @@ -170,12 +186,29 @@ class UserDirectoryStore(SQLBaseStore): @defer.inlineCallbacks def search_user_dir(self, search_term, limit): + """Searches for users in directory + + Returns: + dict of the form:: + + { + "limited": , # whether there were more results or not + "results": [ # Ordered by best match first + { + "user_id": , + "display_name": , + "avatar_url": + } + ] + } + """ + if isinstance(self.database_engine, PostgresEngine): sql = """ SELECT user_id, display_name, avatar_url FROM user_directory WHERE vector @@ plainto_tsquery('english', ?) - ORDER BY ts_rank_cd(vector, plainto_tsquery('english', ?)) DESC + ORDER BY ts_rank_cd(vector, plainto_tsquery('english', ?)) DESC LIMIT ? """ args = (search_term, search_term, limit + 1,) From 350622a107c356da630eba09b63ed4b6de94b198 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 31 May 2017 15:11:36 +0100 Subject: [PATCH 065/139] Handle the server leaving a public room --- synapse/handlers/user_directory.py | 23 ++++++++++++++++++--- synapse/state.py | 11 ++++++++++ synapse/storage/schema/delta/42/user_dir.py | 4 ++++ synapse/storage/user_directory.py | 11 ++++++++++ 4 files changed, 46 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 88b79e332..4e491a43e 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -132,7 +132,9 @@ class UserDirectoyHandler(object): def _handle_intial_room(self, room_id): """Called when we initially fill out user_directory one room at a time """ - # TODO: Check we're still joined to room + is_in_room = yield self.store.get_is_host_in_room(room_id, self.server_name) + if not is_in_room: + return is_public = yield self.store.is_room_world_readable_or_publicly_joinable(room_id) if not is_public: @@ -229,7 +231,22 @@ class UserDirectoyHandler(object): if change is None: continue - if change: + if not change: + # Need to check if the server left the room entirely, if so + # we might need to remove all the users in that room + is_in_room = yield self.store.get_is_host_in_room( + room_id, self.server_name, + ) + if not is_in_room: + # Fetch all the users that we marked as being in user + # directory due to being in the room and then check if + # need to remove those users or not + user_ids = yield self.store.get_users_in_dir_due_to_room(room_id) + for user_id in user_ids: + yield self._handle_remove_user(room_id, user_id) + return + + if change: # The user joined event = yield self.store.get_event(event_id) profile = ProfileInfo( avatar_url=event.content.get("avatar_url"), @@ -237,7 +254,7 @@ class UserDirectoyHandler(object): ) yield self._handle_new_user(room_id, state_key, profile) - else: + else: # The user left yield self._handle_remove_user(room_id, state_key) @defer.inlineCallbacks diff --git a/synapse/state.py b/synapse/state.py index 02fee47f3..dffa79e4c 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -186,6 +186,17 @@ class StateHandler(object): ) defer.returnValue(joined_hosts) + @defer.inlineCallbacks + def get_is_host_in_room(self, room_id, host, latest_event_ids=None): + if not latest_event_ids: + latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) + logger.debug("calling resolve_state_groups from get_is_host_in_room") + entry = yield self.resolve_state_groups(room_id, latest_event_ids) + is_host_joined = yield self.store.is_host_joined( + room_id, host, entry.state_id, entry.state + ) + defer.returnValue(is_host_joined) + @defer.inlineCallbacks def compute_event_context(self, event, old_state=None): """Build an EventContext structure for the event. diff --git a/synapse/storage/schema/delta/42/user_dir.py b/synapse/storage/schema/delta/42/user_dir.py index 57b89ba55..95a7a79fd 100644 --- a/synapse/storage/schema/delta/42/user_dir.py +++ b/synapse/storage/schema/delta/42/user_dir.py @@ -41,6 +41,7 @@ CREATE TABLE user_directory ( ); CREATE INDEX user_directory_fts_idx ON user_directory USING gin(vector); +CREATE INDEX user_directory_room_idx ON user_directory(room_id); CREATE INDEX user_directory_user_idx ON user_directory(user_id); """ @@ -48,6 +49,9 @@ CREATE INDEX user_directory_user_idx ON user_directory(user_id); SQLITE_TABLE = """ CREATE VIRTUAL TABLE user_directory USING fts4 ( user_id, room_id, display_name, avatar_url, value ); + +CREATE INDEX user_directory_room_idx ON user_directory(room_id); +CREATE INDEX user_directory_user_idx ON user_directory(user_id); """ diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 83812bf09..0df979cb0 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -124,6 +124,17 @@ class UserDirectoryStore(SQLBaseStore): ) self.get_user_in_directory.invalidate((user_id,)) + def get_users_in_dir_due_to_room(self, room_id): + """Get all user_ids that are in the room directory becuase they're + in the given room_id + """ + return self._simple_select_onecol( + table="user_directory", + keyvalues={"room_id": room_id}, + retcol="user_id", + desc="get_users_in_dir_due_to_room", + ) + def get_all_rooms(self): """Get all room_ids we've ever known about """ From dc51af3d031030fdf553f8478f7930596f2694f7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 31 May 2017 15:13:49 +0100 Subject: [PATCH 066/139] Pull max id from correct table --- synapse/handlers/user_directory.py | 6 ++---- synapse/storage/user_directory.py | 8 ++++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 4e491a43e..8331f6422 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -111,9 +111,7 @@ class UserDirectoyHandler(object): """Populates the user_directory from the current state of the DB, used when synapse first starts with user_directory support """ - - # TODO: pull from current delta stream_id - new_pos = self.store.get_room_max_stream_ordering() + new_pos = yield self.store.get_max_stream_id_in_current_state_deltas() # Delete any existing entries just in case there are any yield self.store.delete_all_from_user_dir() @@ -284,7 +282,7 @@ class UserDirectoyHandler(object): # is public (i.e. the room_id in the database) return - # TODO: Make this faster? + # XXX: Make this faster? rooms = yield self.store.get_rooms_for_user(user_id) for j_room_id in rooms: is_public = yield self.store.is_room_world_readable_or_publicly_joinable( diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 0df979cb0..011c711ec 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -195,6 +195,14 @@ class UserDirectoryStore(SQLBaseStore): "get_current_state_deltas", self.cursor_to_dict, sql, prev_stream_id ) + def get_max_stream_id_in_current_state_deltas(self): + return self._simple_select_one_onecol( + table="current_state_delta_stream", + keyvalues={}, + retcol="COALESCE(MAX(stream_id), -1)", + desc="get_max_stream_id_in_current_state_deltas", + ) + @defer.inlineCallbacks def search_user_dir(self, search_term, limit): """Searches for users in directory From 5d79d728f5f38463171f0d063713905f8cb9faec Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 31 May 2017 15:23:49 +0100 Subject: [PATCH 067/139] Split out directory and search tables --- synapse/storage/schema/delta/42/user_dir.py | 25 +++++---- synapse/storage/user_directory.py | 60 ++++++++++++++------- 2 files changed, 56 insertions(+), 29 deletions(-) diff --git a/synapse/storage/schema/delta/42/user_dir.py b/synapse/storage/schema/delta/42/user_dir.py index 95a7a79fd..7e3266292 100644 --- a/synapse/storage/schema/delta/42/user_dir.py +++ b/synapse/storage/schema/delta/42/user_dir.py @@ -28,30 +28,33 @@ CREATE TABLE user_directory_stream_pos ( ); INSERT INTO user_directory_stream_pos (stream_id) VALUES (null); -""" - -POSTGRES_TABLE = """ CREATE TABLE user_directory ( user_id TEXT NOT NULL, room_id TEXT NOT NULL, -- A room_id that we know is public display_name TEXT, - avatar_url TEXT, - vector tsvector + avatar_url TEXT ); -CREATE INDEX user_directory_fts_idx ON user_directory USING gin(vector); CREATE INDEX user_directory_room_idx ON user_directory(room_id); CREATE INDEX user_directory_user_idx ON user_directory(user_id); """ -SQLITE_TABLE = """ -CREATE VIRTUAL TABLE user_directory - USING fts4 ( user_id, room_id, display_name, avatar_url, value ); +POSTGRES_TABLE = """ +CREATE TABLE user_directory_search ( + user_id TEXT NOT NULL, + vector tsvector +); -CREATE INDEX user_directory_room_idx ON user_directory(room_id); -CREATE INDEX user_directory_user_idx ON user_directory(user_id); +CREATE INDEX user_directory_search_fts_idx ON user_directory_search USING gin(vector); +CREATE INDEX user_directory_search_user_idx ON user_directory_search(user_id); +""" + + +SQLITE_TABLE = """ +CREATE VIRTUAL TABLE user_directory_search + USING fts4 ( user_id, value ); """ diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 011c711ec..b1957cb87 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -61,9 +61,8 @@ class UserDirectoryStore(SQLBaseStore): # We weight the loclpart most highly, then display name and finally # server name sql = """ - INSERT INTO user_directory - (user_id, room_id, display_name, avatar_url, vector) - VALUES (?,?,?,?, + INSERT INTO user_directory_search(user_id, vector) + VALUES (?, setweight(to_tsvector('english', ?), 'A') || setweight(to_tsvector('english', ?), 'D') || setweight(to_tsvector('english', COALESCE(?, '')), 'B') @@ -71,21 +70,19 @@ class UserDirectoryStore(SQLBaseStore): """ args = ( ( - user_id, room_id, p.display_name, p.avatar_url, - get_localpart_from_id(user_id), get_domain_from_id(user_id), - p.display_name, + user_id, get_localpart_from_id(user_id), get_domain_from_id(user_id), + profile.display_name, ) - for user_id, p in users_with_profile.iteritems() + for user_id, profile in users_with_profile.iteritems() ) elif isinstance(self.database_engine, Sqlite3Engine): sql = """ - INSERT INTO user_directory - (user_id, room_id, display_name, avatar_url, value) - VALUES (?,?,?,?,?) + INSERT INTO user_directory_search(user_id, value) + VALUES (?,?) """ args = ( ( - user_id, room_id, p.display_name, p.avatar_url, + user_id, "%s %s" % (user_id, p.display_name,) if p.display_name else user_id ) for user_id, p in users_with_profile.iteritems() @@ -96,6 +93,19 @@ class UserDirectoryStore(SQLBaseStore): def _add_profiles_to_user_dir_txn(txn): txn.executemany(sql, args) + self._simple_insert_many_txn( + txn, + table="user_directory", + values=[ + { + "user_id": user_id, + "room_id": room_id, + "display_name": profile.display_name, + "avatar_url": profile.avatar_url, + } + for user_id, profile in users_with_profile.iteritems() + ] + ) for user_id in users_with_profile: txn.call_after( self.get_user_in_directory.invalidate, (user_id,) @@ -117,12 +127,23 @@ class UserDirectoryStore(SQLBaseStore): @defer.inlineCallbacks def remove_from_user_dir(self, user_id): - yield self._simple_delete( - table="user_directory", - keyvalues={"user_id": user_id}, - desc="remove_from_user_dir", + def _remove_from_user_dir_txn(txn): + self._simple_delete_txn( + txn, + table="user_directory", + keyvalues={"user_id": user_id}, + ) + self._simple_delete_txn( + txn, + table="user_directory_search", + keyvalues={"user_id": user_id}, + ) + txn.call_after( + self.get_user_in_directory.invalidate, (user_id,) + ) + return self.runInteraction( + "remove_from_user_dir", _remove_from_user_dir_txn, ) - self.get_user_in_directory.invalidate((user_id,)) def get_users_in_dir_due_to_room(self, room_id): """Get all user_ids that are in the room directory becuase they're @@ -150,6 +171,7 @@ class UserDirectoryStore(SQLBaseStore): """ def _delete_all_from_user_dir_txn(txn): txn.execute("DELETE FROM user_directory") + txn.execute("DELETE FROM user_directory_search") txn.call_after(self.get_user_in_directory.invalidate_all) return self.runInteraction( "delete_all_from_user_dir", _delete_all_from_user_dir_txn @@ -225,7 +247,8 @@ class UserDirectoryStore(SQLBaseStore): if isinstance(self.database_engine, PostgresEngine): sql = """ SELECT user_id, display_name, avatar_url - FROM user_directory + FROM user_directory_search + INNER JOIN user_directory USING (user_id) WHERE vector @@ plainto_tsquery('english', ?) ORDER BY ts_rank_cd(vector, plainto_tsquery('english', ?)) DESC LIMIT ? @@ -234,7 +257,8 @@ class UserDirectoryStore(SQLBaseStore): elif isinstance(self.database_engine, Sqlite3Engine): sql = """ SELECT user_id, display_name, avatar_url - FROM user_directory + FROM user_directory_search + INNER JOIN user_directory USING (user_id) WHERE value MATCH ? ORDER BY rank(matchinfo(user_directory)) DESC LIMIT ? From 304880d18545b59a51c5d4b928e563c6d1514fdc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 31 May 2017 15:46:36 +0100 Subject: [PATCH 068/139] Add stream change cache --- synapse/storage/__init__.py | 12 ++++++++++++ synapse/storage/events.py | 4 ++++ synapse/storage/user_directory.py | 4 +++- synapse/util/caches/stream_change_cache.py | 15 +++++++++++++++ 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 11655bf60..3c88ba986 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -223,6 +223,18 @@ class DataStore(RoomMemberStore, RoomStore, "DeviceListFederationStreamChangeCache", device_list_max, ) + curr_state_delta_prefill, min_curr_state_delta_id = self._get_cache_dict( + db_conn, "current_state_delta_stream", + entity_column="room_id", + stream_column="stream_id", + max_value=events_max, # As we share the stream id with events token + limit=1000, + ) + self._curr_state_delta_stream_cache = StreamChangeCache( + "_curr_state_delta_stream_cache", min_curr_state_delta_id, + prefilled_cache=curr_state_delta_prefill, + ) + cur = LoggingTransaction( db_conn.cursor(), name="_find_stream_orderings_for_times_txn", diff --git a/synapse/storage/events.py b/synapse/storage/events.py index dfb57f9d1..77861488d 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -755,6 +755,10 @@ class EventsStore(SQLBaseStore): ] ) + self._curr_state_delta_stream_cache.enttity_has_changed( + room_id, max_stream_order, + ) + # Invalidate the various caches # Figure out the changes of membership to invalidate the diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index b1957cb87..15b8ea046 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -204,7 +204,9 @@ class UserDirectoryStore(SQLBaseStore): ) def get_current_state_deltas(self, prev_stream_id): - # TODO: Add stream change cache + if not self._curr_state_delta_stream_cache.has_any_entity_changed(prev_stream_id): + return [] + # TODO: Add limit sql = """ SELECT stream_id, room_id, type, state_key, event_id, prev_event_id diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py index 70fe00ce0..c498aee46 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py @@ -89,6 +89,21 @@ class StreamChangeCache(object): return result + def has_any_entity_changed(self, stream_pos): + """Returns if any entity has changed + """ + assert type(stream_pos) is int + + if stream_pos >= self._earliest_known_stream_pos: + self.metrics.inc_hits() + if stream_pos >= max(self._cache): + return False + else: + return True + else: + self.metrics.inc_misses() + return True + def get_all_entities_changed(self, stream_pos): """Returns all entites that have had new things since the given position. If the position is too old it will return None. From 63c58c2a3fced42c254da1c1ae5e55a977b7141c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 31 May 2017 16:17:58 +0100 Subject: [PATCH 069/139] Limit number of things we fetch out of the db --- synapse/storage/user_directory.py | 39 ++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 15b8ea046..9137fc24e 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -207,16 +207,37 @@ class UserDirectoryStore(SQLBaseStore): if not self._curr_state_delta_stream_cache.has_any_entity_changed(prev_stream_id): return [] - # TODO: Add limit - sql = """ - SELECT stream_id, room_id, type, state_key, event_id, prev_event_id - FROM current_state_delta_stream - WHERE stream_id > ? - ORDER BY stream_id ASC - """ + def get_current_state_deltas_txn(txn): + # First we calculate the max stream id that will give us less than + # N results + sql = """ + SELECT stream_id, count(*) + FROM current_state_delta_stream + WHERE stream_id > ? + GROUP BY stream_id + ORDER BY stream_id ASC + LIMIT 100 + """ + txn.execute(sql, (prev_stream_id,)) - return self._execute( - "get_current_state_deltas", self.cursor_to_dict, sql, prev_stream_id + total = 0 + for max_stream_id, count in txn: + total += count + if total > 50: + break + + # Now actually get the deltas + sql = """ + SELECT stream_id, room_id, type, state_key, event_id, prev_event_id + FROM current_state_delta_stream + WHERE ? < stream_id AND stream_id <= ? + ORDER BY stream_id ASC + """ + txn.execute(sql, (prev_stream_id, max_stream_id,)) + return self.cursor_to_dict(txn) + + return self.runInteraction( + "get_current_state_deltas", get_current_state_deltas_txn ) def get_max_stream_id_in_current_state_deltas(self): From 4abcff0177768c43eb64ed7784ca8ebf30f3435c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 31 May 2017 16:22:36 +0100 Subject: [PATCH 070/139] Fix typo --- synapse/storage/events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 77861488d..528f19eb8 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -755,7 +755,7 @@ class EventsStore(SQLBaseStore): ] ) - self._curr_state_delta_stream_cache.enttity_has_changed( + self._curr_state_delta_stream_cache.entity_has_changed( room_id, max_stream_order, ) From f0910617111fe81b79777a2c597b1e4240d61a9a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 31 May 2017 16:34:40 +0100 Subject: [PATCH 071/139] Fix tests --- synapse/handlers/user_directory.py | 4 ++-- synapse/storage/user_directory.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 8331f6422..75f259ee4 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -130,7 +130,7 @@ class UserDirectoyHandler(object): def _handle_intial_room(self, room_id): """Called when we initially fill out user_directory one room at a time """ - is_in_room = yield self.store.get_is_host_in_room(room_id, self.server_name) + is_in_room = yield self.state.get_is_host_in_room(room_id, self.server_name) if not is_in_room: return @@ -232,7 +232,7 @@ class UserDirectoyHandler(object): if not change: # Need to check if the server left the room entirely, if so # we might need to remove all the users in that room - is_in_room = yield self.store.get_is_host_in_room( + is_in_room = yield self.state.get_is_host_in_room( room_id, self.server_name, ) if not is_in_room: diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 9137fc24e..348064436 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -221,6 +221,7 @@ class UserDirectoryStore(SQLBaseStore): txn.execute(sql, (prev_stream_id,)) total = 0 + max_stream_id = prev_stream_id for max_stream_id, count in txn: total += count if total > 50: From f9791498ae2ee267aef6965bd04998d7d1bb8d43 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 31 May 2017 16:50:57 +0100 Subject: [PATCH 072/139] Typos --- synapse/handlers/user_directory.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 75f259ee4..7f8da1a87 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -304,10 +304,10 @@ class UserDirectoyHandler(object): `shared` to `world_readable` (`public_value`). Returns: - None if the field in the events either both match `public_value` o + None if the field in the events either both match `public_value` neither do, i.e. there has been no change. True if it didnt match `public_value` but now does - Falsse if it did match `public_value` but now doesn't + False if it did match `public_value` but now doesn't """ prev_event = None event = None @@ -320,18 +320,18 @@ class UserDirectoyHandler(object): if not event and not prev_event: defer.returnValue(None) - prev_hist_vis = None - hist_vis = None + prev_value = None + value = None if prev_event: - prev_hist_vis = prev_event.content.get(key_name, None) + prev_value = prev_event.content.get(key_name, None) if event: - hist_vis = event.content.get(key_name, None) + value = event.content.get(key_name, None) - if hist_vis == public_value and prev_hist_vis != public_value: + if value == public_value and prev_value != public_value: defer.returnValue(True) - elif hist_vis != public_value and prev_hist_vis == public_value: + elif value != public_value and prev_value == public_value: defer.returnValue(False) else: defer.returnValue(None) From b2d8d0710912ab25e327f03bfea69b3c8333b2c8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 31 May 2017 17:00:24 +0100 Subject: [PATCH 073/139] Lifts things into separate function --- synapse/handlers/user_directory.py | 110 ++++++++++++++--------------- 1 file changed, 54 insertions(+), 56 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 7f8da1a87..48d3c4818 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -162,63 +162,10 @@ class UserDirectoyHandler(object): # For join rule and visibility changes we need to check if the room # may have become public or not and add/remove the users in said room - if typ == EventTypes.RoomHistoryVisibility: - change = yield self._get_key_change( - prev_event_id, event_id, - key_name="history_visibility", - public_value="world_readable", + if typ in (EventTypes.RoomHistoryVisibility, EventTypes.JoinRules): + yield self._handle_room_publicity_change( + room_id, prev_event_id, event_id, typ, ) - - # If change is None, no change. True => become world readable, - # False => was world readable - if change is None: - continue - - # There's been a change to or from being world readable. - - is_public = yield self.store.is_room_world_readable_or_publicly_joinable( - room_id - ) - - if change and not is_public: - # If we became world readable but room isn't currently public then - # we ignore the change - continue - elif not change and is_public: - # If we stopped being world readable but are still public, - # ignore the change - continue - - users_with_profile = yield self.state.get_current_user_in_room(room_id) - for user_id, profile in users_with_profile.iteritems(): - if change: - yield self._handle_new_user(room_id, user_id, profile) - else: - yield self._handle_remove_user(room_id, user_id) - elif typ == EventTypes.JoinRules: - change = yield self._get_key_change( - prev_event_id, event_id, - key_name="join_rules", - public_value=JoinRules.PUBLIC, - ) - if change is None: - continue - - is_public = yield self.store.is_room_world_readable_or_publicly_joinable( - room_id - ) - - if change and is_public: - continue - elif not change and not is_public: - continue - - users_with_profile = yield self.state.get_current_user_in_room(room_id) - for user_id, profile in users_with_profile.iteritems(): - if change: - yield self._handle_new_user(room_id, user_id, profile) - else: - yield self._handle_remove_user(room_id, user_id) elif typ == EventTypes.Member: change = yield self._get_key_change( prev_event_id, event_id, @@ -255,6 +202,57 @@ class UserDirectoyHandler(object): else: # The user left yield self._handle_remove_user(room_id, state_key) + def _handle_room_publicity_change(self, room_id, prev_event_id, event_id, typ): + """Handle a room having potentially changed from/to world_readable/publically + joinable. + + Args: + room_id (str) + prev_event_id (str|None): The previous event before the state change + event_id (str|None): The new event after the state change + typ (str): Type of the event + """ + if typ == EventTypes.RoomHistoryVisibility: + change = yield self._get_key_change( + prev_event_id, event_id, + key_name="history_visibility", + public_value="world_readable", + ) + elif typ == EventTypes.JoinRules: + change = yield self._get_key_change( + prev_event_id, event_id, + key_name="join_rules", + public_value=JoinRules.PUBLIC, + ) + else: + raise Exception("Invalid event type") + # If change is None, no change. True => become world_readable/public, + # False => was world_readable/public + if change is None: + return + + # There's been a change to or from being world readable. + + is_public = yield self.store.is_room_world_readable_or_publicly_joinable( + room_id + ) + + if change and not is_public: + # If we became world readable but room isn't currently public then + # we ignore the change + return + elif not change and is_public: + # If we stopped being world readable but are still public, + # ignore the change + return + + users_with_profile = yield self.state.get_current_user_in_room(room_id) + for user_id, profile in users_with_profile.iteritems(): + if change: + yield self._handle_new_user(room_id, user_id, profile) + else: + yield self._handle_remove_user(room_id, user_id) + @defer.inlineCallbacks def _handle_new_user(self, room_id, user_id, profile): """Called when we might need to add user to directory From f1378aef9199390ae0130cc6bda5c7f4fa7a2e33 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 31 May 2017 17:03:08 +0100 Subject: [PATCH 074/139] Convert to int --- synapse/storage/user_directory.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 348064436..71b050264 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -204,6 +204,7 @@ class UserDirectoryStore(SQLBaseStore): ) def get_current_state_deltas(self, prev_stream_id): + prev_stream_id = int(prev_stream_id) if not self._curr_state_delta_stream_cache.has_any_entity_changed(prev_stream_id): return [] From cc7609aa9fe525cc3096c87df2a7f6d090d500ca Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 31 May 2017 17:11:18 +0100 Subject: [PATCH 075/139] Comment briefly on how we keep user_directory up to date --- synapse/handlers/user_directory.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 48d3c4818..7130cc6ee 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -29,6 +29,16 @@ class UserDirectoyHandler(object): """Handles querying of and keeping updated the user_directory. N.B.: ASSUMES IT IS THE ONLY THING THAT MODIFIES THE USER DIRECTORY + + The user directory is filled with users who this server can see are joined to a + world_readable or publically joinable room. We keep a database table up to date + by streaming changes of the current state and recalculating whether users should + be in the directory or not when necessary. + + For each user in the directory we also store a room_id which is public and that the + user is joined to. This allows us to ignore history_visibility and join_rules changes + for that user in all other public rooms, as we know they'll still be in at least + one public room. """ def __init__(self, hs): From 5dd1b2c525cb786614d1503757bf52a7086f3bf9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 31 May 2017 17:29:12 +0100 Subject: [PATCH 076/139] Use unique indices --- synapse/storage/schema/delta/42/user_dir.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/schema/delta/42/user_dir.py b/synapse/storage/schema/delta/42/user_dir.py index 7e3266292..c34aa5e7d 100644 --- a/synapse/storage/schema/delta/42/user_dir.py +++ b/synapse/storage/schema/delta/42/user_dir.py @@ -37,7 +37,7 @@ CREATE TABLE user_directory ( ); CREATE INDEX user_directory_room_idx ON user_directory(room_id); -CREATE INDEX user_directory_user_idx ON user_directory(user_id); +CREATE UNIQUE INDEX user_directory_user_idx ON user_directory(user_id); """ @@ -48,7 +48,7 @@ CREATE TABLE user_directory_search ( ); CREATE INDEX user_directory_search_fts_idx ON user_directory_search USING gin(vector); -CREATE INDEX user_directory_search_user_idx ON user_directory_search(user_id); +CREATE UNIQUE INDEX user_directory_search_user_idx ON user_directory_search(user_id); """ From f5cc22bdc63e58857f435227b70d145d07aabb77 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 31 May 2017 17:30:26 +0100 Subject: [PATCH 077/139] Comment on why arbitrary comments --- synapse/storage/user_directory.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 71b050264..2e9175f50 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -210,7 +210,9 @@ class UserDirectoryStore(SQLBaseStore): def get_current_state_deltas_txn(txn): # First we calculate the max stream id that will give us less than - # N results + # N results. + # We arbitarily limit to 100 stream_id entries to ensure we don't + # select toooo many. sql = """ SELECT stream_id, count(*) FROM current_state_delta_stream @@ -225,7 +227,9 @@ class UserDirectoryStore(SQLBaseStore): max_stream_id = prev_stream_id for max_stream_id, count in txn: total += count - if total > 50: + if total > 100: + # We arbitarily limit to 100 entries to ensure we don't + # select toooo many. break # Now actually get the deltas From a757dd4863d0a467becf5b73ca15eafeb3c2823c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 31 May 2017 18:07:12 +0100 Subject: [PATCH 078/139] Use prefix matching --- synapse/storage/user_directory.py | 34 ++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 2e9175f50..ca2be9daf 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -21,6 +21,8 @@ from synapse.api.constants import EventTypes, JoinRules from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.types import get_domain_from_id, get_localpart_from_id +import re + class UserDirectoryStore(SQLBaseStore): @@ -272,17 +274,17 @@ class UserDirectoryStore(SQLBaseStore): ] } """ - + search_query = _parse_query(self.database_engine, search_term) if isinstance(self.database_engine, PostgresEngine): sql = """ SELECT user_id, display_name, avatar_url FROM user_directory_search INNER JOIN user_directory USING (user_id) - WHERE vector @@ plainto_tsquery('english', ?) - ORDER BY ts_rank_cd(vector, plainto_tsquery('english', ?)) DESC + WHERE vector @@ to_tsquery('english', ?) + ORDER BY ts_rank_cd(vector, to_tsquery('english', ?)) DESC LIMIT ? """ - args = (search_term, search_term, limit + 1,) + args = (search_query, search_query, limit + 1,) elif isinstance(self.database_engine, Sqlite3Engine): sql = """ SELECT user_id, display_name, avatar_url @@ -292,7 +294,7 @@ class UserDirectoryStore(SQLBaseStore): ORDER BY rank(matchinfo(user_directory)) DESC LIMIT ? """ - args = (search_term, limit + 1) + args = (search_query, limit + 1) else: # This should be unreachable. raise Exception("Unrecognized database engine") @@ -307,3 +309,25 @@ class UserDirectoryStore(SQLBaseStore): "limited": limited, "results": results, }) + + +def _parse_query(database_engine, search_term): + """Takes a plain unicode string from the user and converts it into a form + that can be passed to database. + We use this so that we can add prefix matching, which isn't something + that is supported by default. + + We specifically add both a prefix and non prefix matching term so that + exact matches get ranked higher. + """ + + # Pull out the individual words, discarding any non-word characters. + results = re.findall(r"([\w\-]+)", search_term, re.UNICODE) + + if isinstance(database_engine, PostgresEngine): + return " & ".join("%s:* & %s" % (result, result,) for result in results) + elif isinstance(database_engine, Sqlite3Engine): + return " & ".join("%s* & %s" % (result, result,) for result in results) + else: + # This should be unreachable. + raise Exception("Unrecognized database engine") From 036362ede6cadc4d6f289dbcabfc5e06d370a587 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 31 May 2017 18:17:47 +0100 Subject: [PATCH 079/139] Order by if they have profile info --- synapse/storage/user_directory.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index ca2be9daf..79161f274 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -274,14 +274,20 @@ class UserDirectoryStore(SQLBaseStore): ] } """ + search_query = _parse_query(self.database_engine, search_term) + if isinstance(self.database_engine, PostgresEngine): + # We order by rank and then if they have profile info sql = """ SELECT user_id, display_name, avatar_url FROM user_directory_search INNER JOIN user_directory USING (user_id) WHERE vector @@ to_tsquery('english', ?) - ORDER BY ts_rank_cd(vector, to_tsquery('english', ?)) DESC + ORDER BY + ts_rank_cd(vector, to_tsquery('english', ?)) DESC, + display_name IS NULL, + avatar_url IS NULL LIMIT ? """ args = (search_query, search_query, limit + 1,) @@ -291,7 +297,10 @@ class UserDirectoryStore(SQLBaseStore): FROM user_directory_search INNER JOIN user_directory USING (user_id) WHERE value MATCH ? - ORDER BY rank(matchinfo(user_directory)) DESC + ORDER BY + rank(matchinfo(user_directory)) DESC, + display_name IS NULL, + avatar_url IS NULL LIMIT ? """ args = (search_query, limit + 1) From 0fe6f3c521498fc92c58c49d8edcc6984471da08 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Jun 2017 11:09:49 +0100 Subject: [PATCH 080/139] Bug fixes and logging - Check if room is public when a user joins before adding to user dir - Fix typo of field name "content.join_rules" -> "content.join_rule" --- synapse/handlers/user_directory.py | 22 +++++++++++++++++++++- synapse/storage/user_directory.py | 2 +- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 7130cc6ee..130ff45ec 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -170,6 +170,8 @@ class UserDirectoyHandler(object): event_id = delta["event_id"] prev_event_id = delta["prev_event_id"] + logger.debug("Handling: %r %r, %s", typ, state_key, event_id) + # For join rule and visibility changes we need to check if the room # may have become public or not and add/remove the users in said room if typ in (EventTypes.RoomHistoryVisibility, EventTypes.JoinRules): @@ -201,7 +203,14 @@ class UserDirectoyHandler(object): yield self._handle_remove_user(room_id, user_id) return + is_public = yield self.store.is_room_world_readable_or_publicly_joinable( + room_id + ) + if change: # The user joined + if not is_public: + return + event = yield self.store.get_event(event_id) profile = ProfileInfo( avatar_url=event.content.get("avatar_url"), @@ -211,7 +220,10 @@ class UserDirectoyHandler(object): yield self._handle_new_user(room_id, state_key, profile) else: # The user left yield self._handle_remove_user(room_id, state_key) + else: + logger.debug("Ignoring irrelevant type: %r", typ) + @defer.inlineCallbacks def _handle_room_publicity_change(self, room_id, prev_event_id, event_id, typ): """Handle a room having potentially changed from/to world_readable/publically joinable. @@ -222,6 +234,8 @@ class UserDirectoyHandler(object): event_id (str|None): The new event after the state change typ (str): Type of the event """ + logger.debug("Handling change for %s", typ) + if typ == EventTypes.RoomHistoryVisibility: change = yield self._get_key_change( prev_event_id, event_id, @@ -231,7 +245,7 @@ class UserDirectoyHandler(object): elif typ == EventTypes.JoinRules: change = yield self._get_key_change( prev_event_id, event_id, - key_name="join_rules", + key_name="join_rule", public_value=JoinRules.PUBLIC, ) else: @@ -239,6 +253,7 @@ class UserDirectoyHandler(object): # If change is None, no change. True => become world_readable/public, # False => was world_readable/public if change is None: + logger.debug("No change") return # There's been a change to or from being world readable. @@ -247,6 +262,8 @@ class UserDirectoyHandler(object): room_id ) + logger.debug("Change: %r, is_public: %r", change, is_public) + if change and not is_public: # If we became world readable but room isn't currently public then # we ignore the change @@ -326,6 +343,7 @@ class UserDirectoyHandler(object): event = yield self.store.get_event(event_id, allow_none=True) if not event and not prev_event: + logger.debug("Neither event exists: %r %r", prev_event_id, event_id) defer.returnValue(None) prev_value = None @@ -337,6 +355,8 @@ class UserDirectoyHandler(object): if event: value = event.content.get(key_name, None) + logger.debug("prev_value: %r -> value: %r", prev_value, value) + if value == public_value and prev_value != public_value: defer.returnValue(True) elif value != public_value and prev_value == public_value: diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 79161f274..7323d783a 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -38,7 +38,7 @@ class UserDirectoryStore(SQLBaseStore): if join_rules_id: join_rule_ev = yield self.get_event(join_rules_id, allow_none=True) if join_rule_ev: - if join_rule_ev.content.get("join_rules") == JoinRules.PUBLIC: + if join_rule_ev.content.get("join_rule") == JoinRules.PUBLIC: defer.returnValue(True) hist_vis_id = current_state_ids.get((EventTypes.RoomHistoryVisibility, "")) From 9c7db2491bd051ca733d9556620ba23ceb52918a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Jun 2017 11:36:50 +0100 Subject: [PATCH 081/139] Fix removing users --- synapse/handlers/user_directory.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 130ff45ec..85efd61d3 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -294,6 +294,7 @@ class UserDirectoyHandler(object): yield self.store.add_profiles_to_user_dir(room_id, {user_id: profile}) + @defer.inlineCallbacks def _handle_remove_user(self, room_id, user_id): """Called when we might need to remove user to directory @@ -301,10 +302,13 @@ class UserDirectoyHandler(object): room_id (str): room_id that user left or stopped being public that user_id (str) """ + logger.debug("Maybe removing user %r", user_id) + row = yield self.store.get_user_in_directory(user_id) if not row or row["room_id"] != room_id: # Either the user wasn't in directory or we're still in a room that # is public (i.e. the room_id in the database) + logger.debug("Not removing as row: %r", row) return # XXX: Make this faster? @@ -316,6 +320,7 @@ class UserDirectoyHandler(object): if is_public: yield self.store.update_user_in_user_dir(user_id, j_room_id) + logger.debug("Not removing as found other public room: %r", j_room_id) return yield self.store.remove_from_user_dir(user_id) From 59dbb470654ff812975f888a3ec41537916091ab Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Jun 2017 11:41:29 +0100 Subject: [PATCH 082/139] Remove spurious inlineCallbacks --- synapse/storage/user_directory.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 7323d783a..a251aee46 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -127,7 +127,6 @@ class UserDirectoryStore(SQLBaseStore): ) self.get_user_in_directory.invalidate((user_id,)) - @defer.inlineCallbacks def remove_from_user_dir(self, user_id): def _remove_from_user_dir_txn(txn): self._simple_delete_txn( From 8be6fd95a3a0f9d2924650ede4d19c1c22da8cd4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Jun 2017 13:05:39 +0100 Subject: [PATCH 083/139] Check if host is still in room --- synapse/handlers/user_directory.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 85efd61d3..83715e5ff 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -195,6 +195,7 @@ class UserDirectoyHandler(object): room_id, self.server_name, ) if not is_in_room: + logger.debug("Server left room: %r", room_id) # Fetch all the users that we marked as being in user # directory due to being in the room and then check if # need to remove those users or not @@ -202,6 +203,8 @@ class UserDirectoyHandler(object): for user_id in user_ids: yield self._handle_remove_user(room_id, user_id) return + else: + logger.debug("Server is still in room: %r", room_id) is_public = yield self.store.is_room_world_readable_or_publicly_joinable( room_id @@ -288,6 +291,7 @@ class UserDirectoyHandler(object): room_id (str): room_id that user joined or started being public that user_id (str) """ + logger.debug("Adding user to dir, %r", user_id) row = yield self.store.get_user_in_directory(user_id) if row: return @@ -314,6 +318,13 @@ class UserDirectoyHandler(object): # XXX: Make this faster? rooms = yield self.store.get_rooms_for_user(user_id) for j_room_id in rooms: + is_in_room = yield self.state.get_is_host_in_room( + j_room_id, self.server_name, + ) + + if not is_in_room: + continue + is_public = yield self.store.is_room_world_readable_or_publicly_joinable( j_room_id ) From 7233341eac0ad0a25dacf913d4a54d25994ea185 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Jun 2017 13:11:38 +0100 Subject: [PATCH 084/139] Comments --- synapse/handlers/user_directory.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 83715e5ff..a8525fc86 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -346,7 +346,7 @@ class UserDirectoyHandler(object): Returns: None if the field in the events either both match `public_value` - neither do, i.e. there has been no change. + or if neither do, i.e. there has been no change. True if it didnt match `public_value` but now does False if it did match `public_value` but now doesn't """ @@ -366,10 +366,10 @@ class UserDirectoyHandler(object): value = None if prev_event: - prev_value = prev_event.content.get(key_name, None) + prev_value = prev_event.content.get(key_name) if event: - value = event.content.get(key_name, None) + value = event.content.get(key_name) logger.debug("prev_value: %r -> value: %r", prev_value, value) From 02a6108235610304b981939bd2c74ae7f36dd929 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Jun 2017 13:16:40 +0100 Subject: [PATCH 085/139] Tweak search query --- synapse/storage/user_directory.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index a251aee46..c2ea26128 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -333,9 +333,9 @@ def _parse_query(database_engine, search_term): results = re.findall(r"([\w\-]+)", search_term, re.UNICODE) if isinstance(database_engine, PostgresEngine): - return " & ".join("%s:* & %s" % (result, result,) for result in results) + return " & ".join("(%s:* | %s)" % (result, result,) for result in results) elif isinstance(database_engine, Sqlite3Engine): - return " & ".join("%s* & %s" % (result, result,) for result in results) + return " & ".join("(%s* | %s)" % (result, result,) for result in results) else: # This should be unreachable. raise Exception("Unrecognized database engine") From d5477c7afd884f200c55a1c6a187983756f49577 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Jun 2017 13:27:28 +0100 Subject: [PATCH 086/139] Tweak search query --- synapse/storage/user_directory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index c2ea26128..4fe30ce72 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -284,7 +284,7 @@ class UserDirectoryStore(SQLBaseStore): INNER JOIN user_directory USING (user_id) WHERE vector @@ to_tsquery('english', ?) ORDER BY - ts_rank_cd(vector, to_tsquery('english', ?)) DESC, + ts_rank_cd(vector, to_tsquery('english', ?), 1) DESC, display_name IS NULL, avatar_url IS NULL LIMIT ? From 21e255a8f1948c2fd298ce2e037d20bdd25f2f69 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Jun 2017 14:50:46 +0100 Subject: [PATCH 087/139] Split the table in two --- synapse/handlers/user_directory.py | 77 +++++++++++++-------- synapse/storage/_base.py | 5 ++ synapse/storage/schema/delta/42/user_dir.py | 10 ++- synapse/storage/user_directory.py | 77 ++++++++++++++++++++- 4 files changed, 138 insertions(+), 31 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index a8525fc86..d795a9f8d 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -50,6 +50,7 @@ class UserDirectoyHandler(object): # When start up for the first time we need to populate the user_directory. # This is a set of user_id's we've inserted already self.initially_handled_users = set() + self.initially_handled_users_in_public = set() # The current position in the current_state_delta stream self.pos = None @@ -145,8 +146,6 @@ class UserDirectoyHandler(object): return is_public = yield self.store.is_room_world_readable_or_publicly_joinable(room_id) - if not is_public: - return users_with_profile = yield self.state.get_current_user_in_room(room_id) unhandled_users = set(users_with_profile) - self.initially_handled_users @@ -159,6 +158,13 @@ class UserDirectoyHandler(object): self.initially_handled_users |= unhandled_users + if is_public: + yield self.store.add_users_to_public_room( + room_id, + user_ids=unhandled_users - self.initially_handled_users_in_public + ) + self.initially_handled_users_in_public != unhandled_users + @defer.inlineCallbacks def _handle_deltas(self, deltas): """Called with the state deltas to process @@ -206,14 +212,7 @@ class UserDirectoyHandler(object): else: logger.debug("Server is still in room: %r", room_id) - is_public = yield self.store.is_room_world_readable_or_publicly_joinable( - room_id - ) - if change: # The user joined - if not is_public: - return - event = yield self.store.get_event(event_id) profile = ProfileInfo( avatar_url=event.content.get("avatar_url"), @@ -276,11 +275,13 @@ class UserDirectoyHandler(object): # ignore the change return - users_with_profile = yield self.state.get_current_user_in_room(room_id) - for user_id, profile in users_with_profile.iteritems(): - if change: + if change: + users_with_profile = yield self.state.get_current_user_in_room(room_id) + for user_id, profile in users_with_profile.iteritems(): yield self._handle_new_user(room_id, user_id, profile) - else: + else: + users = yield self.store.get_users_in_public_due_to_room(room_id) + for user_id in users: yield self._handle_remove_user(room_id, user_id) @defer.inlineCallbacks @@ -292,11 +293,21 @@ class UserDirectoyHandler(object): user_id (str) """ logger.debug("Adding user to dir, %r", user_id) + row = yield self.store.get_user_in_directory(user_id) - if row: + if not row: + yield self.store.add_profiles_to_user_dir(room_id, {user_id: profile}) + + is_public = yield self.store.is_room_world_readable_or_publicly_joinable( + room_id + ) + + if not is_public: return - yield self.store.add_profiles_to_user_dir(room_id, {user_id: profile}) + row = yield self.store.get_user_in_public_room(user_id) + if not row: + yield self.store.add_users_to_public_room(room_id, [user_id]) @defer.inlineCallbacks def _handle_remove_user(self, room_id, user_id): @@ -309,15 +320,20 @@ class UserDirectoyHandler(object): logger.debug("Maybe removing user %r", user_id) row = yield self.store.get_user_in_directory(user_id) - if not row or row["room_id"] != room_id: - # Either the user wasn't in directory or we're still in a room that - # is public (i.e. the room_id in the database) - logger.debug("Not removing as row: %r", row) + update_user_dir = row and row["room_id"] == room_id + + row = yield self.store.get_user_in_public_room(user_id) + update_user_in_public = row and row["room_id"] == room_id + + if not update_user_in_public and not update_user_dir: return # XXX: Make this faster? rooms = yield self.store.get_rooms_for_user(user_id) for j_room_id in rooms: + if not update_user_in_public and not update_user_dir: + break + is_in_room = yield self.state.get_is_host_in_room( j_room_id, self.server_name, ) @@ -325,16 +341,23 @@ class UserDirectoyHandler(object): if not is_in_room: continue - is_public = yield self.store.is_room_world_readable_or_publicly_joinable( - j_room_id - ) - - if is_public: + if update_user_dir: + update_user_dir = False yield self.store.update_user_in_user_dir(user_id, j_room_id) - logger.debug("Not removing as found other public room: %r", j_room_id) - return - yield self.store.remove_from_user_dir(user_id) + if update_user_in_public: + is_public = yield self.store.is_room_world_readable_or_publicly_joinable( + j_room_id + ) + + if is_public: + yield self.store.update_user_in_public_user_list(user_id, j_room_id) + update_user_in_public = False + + if update_user_dir: + yield self.store.remove_from_user_dir(user_id) + elif update_user_in_public: + yield self.store.remove_from_user_in_public_room(user_id) @defer.inlineCallbacks def _get_key_change(self, prev_event_id, event_id, key_name, public_value): diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 58b73af7d..db816346f 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -425,6 +425,11 @@ class SQLBaseStore(object): txn.execute(sql, vals) + def _simple_insert_many(self, table, values, desc): + return self.runInteraction( + desc, self._simple_insert_many_txn, table, values + ) + @staticmethod def _simple_insert_many_txn(txn, table, values): if not values: diff --git a/synapse/storage/schema/delta/42/user_dir.py b/synapse/storage/schema/delta/42/user_dir.py index c34aa5e7d..ea6a18196 100644 --- a/synapse/storage/schema/delta/42/user_dir.py +++ b/synapse/storage/schema/delta/42/user_dir.py @@ -31,13 +31,21 @@ INSERT INTO user_directory_stream_pos (stream_id) VALUES (null); CREATE TABLE user_directory ( user_id TEXT NOT NULL, - room_id TEXT NOT NULL, -- A room_id that we know is public + room_id TEXT NOT NULL, -- A room_id that we know the user is joined to display_name TEXT, avatar_url TEXT ); CREATE INDEX user_directory_room_idx ON user_directory(room_id); CREATE UNIQUE INDEX user_directory_user_idx ON user_directory(user_id); + +CREATE TABLE users_in_pubic_room ( + user_id TEXT NOT NULL, + room_id TEXT NOT NULL -- A room_id that we know is public +); + +CREATE INDEX users_in_pubic_room_room_idx ON users_in_pubic_room(room_id); +CREATE UNIQUE INDEX users_in_pubic_room_user_idx ON users_in_pubic_room(user_id); """ diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 4fe30ce72..cab0afc5c 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -50,12 +50,34 @@ class UserDirectoryStore(SQLBaseStore): defer.returnValue(False) - def add_profiles_to_user_dir(self, room_id, users_with_profile): - """Add profiles to the user directory + @defer.inlineCallbacks + def add_users_to_public_room(self, room_id, user_ids): + """Add user to the list of users in public rooms Args: room_id (str): A room_id that all users are in that is world_readable or publically joinable + user_ids (list(str)): Users to add + """ + yield self._simple_insert_many( + table="users_in_pubic_room", + values=[ + { + "user_id": user_id, + "room_id": room_id, + } + for user_id in user_ids + ], + desc="add_users_to_public_room" + ) + for user_id in user_ids: + self.get_user_in_public_room.invalidate((user_id,)) + + def add_profiles_to_user_dir(self, room_id, users_with_profile): + """Add profiles to the user directory + + Args: + room_id (str): A room_id that all users are joined to users_with_profile (dict): Users to add to directory in the form of mapping of user_id -> ProfileInfo """ @@ -125,7 +147,15 @@ class UserDirectoryStore(SQLBaseStore): updatevalues={"room_id": room_id}, desc="update_user_in_user_dir", ) - self.get_user_in_directory.invalidate((user_id,)) + + @defer.inlineCallbacks + def update_user_in_public_user_list(self, user_id, room_id): + yield self._simple_update_one( + table="users_in_pubic_room", + keyvalues={"user_id": user_id}, + updatevalues={"room_id": room_id}, + desc="update_user_in_public_user_list", + ) def remove_from_user_dir(self, user_id): def _remove_from_user_dir_txn(txn): @@ -139,13 +169,41 @@ class UserDirectoryStore(SQLBaseStore): table="user_directory_search", keyvalues={"user_id": user_id}, ) + self._simple_delete_txn( + txn, + table="users_in_pubic_room", + keyvalues={"user_id": user_id}, + ) txn.call_after( self.get_user_in_directory.invalidate, (user_id,) ) + txn.call_after( + self.get_user_in_public_room.invalidate, (user_id,) + ) return self.runInteraction( "remove_from_user_dir", _remove_from_user_dir_txn, ) + @defer.inlineCallbacks + def remove_from_user_in_public_room(self, user_id): + yield self._simple_delete( + table="users_in_pubic_room", + keyvalues={"user_id": user_id}, + desc="remove_from_user_in_public_room", + ) + self.get_user_in_public_room.invalidate((user_id,)) + + def get_users_in_public_due_to_room(self, room_id): + """Get all user_ids that are in the room directory becuase they're + in the given room_id + """ + return self._simple_select_onecol( + table="users_in_pubic_room", + keyvalues={"room_id": room_id}, + retcol="user_id", + desc="get_users_in_public_due_to_room", + ) + def get_users_in_dir_due_to_room(self, room_id): """Get all user_ids that are in the room directory becuase they're in the given room_id @@ -173,6 +231,7 @@ class UserDirectoryStore(SQLBaseStore): def _delete_all_from_user_dir_txn(txn): txn.execute("DELETE FROM user_directory") txn.execute("DELETE FROM user_directory_search") + txn.execute("DELETE FROM users_in_pubic_room") txn.call_after(self.get_user_in_directory.invalidate_all) return self.runInteraction( "delete_all_from_user_dir", _delete_all_from_user_dir_txn @@ -188,6 +247,16 @@ class UserDirectoryStore(SQLBaseStore): desc="get_user_in_directory", ) + @cached() + def get_user_in_public_room(self, user_id): + return self._simple_select_one( + table="users_in_pubic_room", + keyvalues={"user_id": user_id}, + retcols=("room_id",), + allow_none=True, + desc="get_user_in_public_room", + ) + def get_user_directory_stream_pos(self): return self._simple_select_one_onecol( table="user_directory_stream_pos", @@ -282,6 +351,7 @@ class UserDirectoryStore(SQLBaseStore): SELECT user_id, display_name, avatar_url FROM user_directory_search INNER JOIN user_directory USING (user_id) + INNER JOIN users_in_pubic_room USING (user_id) WHERE vector @@ to_tsquery('english', ?) ORDER BY ts_rank_cd(vector, to_tsquery('english', ?), 1) DESC, @@ -295,6 +365,7 @@ class UserDirectoryStore(SQLBaseStore): SELECT user_id, display_name, avatar_url FROM user_directory_search INNER JOIN user_directory USING (user_id) + INNER JOIN users_in_pubic_room USING (user_id) WHERE value MATCH ? ORDER BY rank(matchinfo(user_directory)) DESC, From 4d039aa2ca78730ca5f8bb9043ab75328004d7a1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Jun 2017 14:58:48 +0100 Subject: [PATCH 088/139] Fix sqlite --- synapse/storage/user_directory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index cab0afc5c..bcf24fa4d 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -368,7 +368,7 @@ class UserDirectoryStore(SQLBaseStore): INNER JOIN users_in_pubic_room USING (user_id) WHERE value MATCH ? ORDER BY - rank(matchinfo(user_directory)) DESC, + rank(matchinfo(user_directory_search)) DESC, display_name IS NULL, avatar_url IS NULL LIMIT ? From 1a01af079e97d25bbac46127b8bd069189c02c97 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Jun 2017 15:39:51 +0100 Subject: [PATCH 089/139] Handle profile updates in user directory --- synapse/handlers/user_directory.py | 25 +++++++++++++++ synapse/storage/user_directory.py | 49 ++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index d795a9f8d..0182cf86d 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -192,6 +192,8 @@ class UserDirectoyHandler(object): ) if change is None: + # Handle any profile changes + yield self._handle_profile_change(state_key, prev_event_id, event_id) continue if not change: @@ -359,6 +361,29 @@ class UserDirectoyHandler(object): elif update_user_in_public: yield self.store.remove_from_user_in_public_room(user_id) + @defer.inlineCallbacks + def _handle_profile_change(self, user_id, prev_event_id, event_id): + """Check member event changes for any profile changes and update the + database if there are. + """ + if not prev_event_id or not event_id: + return + + prev_event = yield self.store.get_event(prev_event_id) + event = yield self.store.get_event(event_id) + + if event.membership != Membership.JOIN: + return + + prev_name = prev_event.content.get("displayname") + new_name = event.content.get("displayname") + + prev_avatar = prev_event.content.get("avatar_url") + new_avatar = event.content.get("avatar_url") + + if prev_name != new_name or prev_avatar != new_avatar: + yield self.store.update_profile_in_user_dir(user_id, new_name, new_avatar) + @defer.inlineCallbacks def _get_key_change(self, prev_event_id, event_id, key_name, public_value): """Given two events check if the `key_name` field in content changed diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index bcf24fa4d..6a4bf63f0 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -147,6 +147,53 @@ class UserDirectoryStore(SQLBaseStore): updatevalues={"room_id": room_id}, desc="update_user_in_user_dir", ) + self.get_user_in_directory.invalidate((user_id,)) + + def update_profile_in_user_dir(self, user_id, display_name, avatar_url): + def _update_profile_in_user_dir_txn(txn): + self._simple_update_one_txn( + txn, + table="user_directory", + keyvalues={"user_id": user_id}, + updatevalues={"display_name": display_name, "avatar_url": avatar_url}, + ) + + if isinstance(self.database_engine, PostgresEngine): + # We weight the loclpart most highly, then display name and finally + # server name + sql = """ + UPDATE user_directory_search + SET vector = setweight(to_tsvector('english', ?), 'A') + || setweight(to_tsvector('english', ?), 'D') + || setweight(to_tsvector('english', COALESCE(?, '')), 'B') + WHERE user_id = ? + """ + args = ( + get_localpart_from_id(user_id), get_domain_from_id(user_id), + display_name, + user_id, + ) + elif isinstance(self.database_engine, Sqlite3Engine): + sql = """ + UPDATE user_directory_search + set value = ? + WHERE user_id = ? + """ + args = ( + "%s %s" % (user_id, display_name,) if display_name else user_id, + user_id, + ) + else: + # This should be unreachable. + raise Exception("Unrecognized database engine") + + txn.execute(sql, args) + + txn.call_after(self.get_user_in_directory.invalidate, (user_id,)) + + return self.runInteraction( + "update_profile_in_user_dir", _update_profile_in_user_dir_txn + ) @defer.inlineCallbacks def update_user_in_public_user_list(self, user_id, room_id): @@ -156,6 +203,7 @@ class UserDirectoryStore(SQLBaseStore): updatevalues={"room_id": room_id}, desc="update_user_in_public_user_list", ) + self.get_user_in_public_room.invalidate((user_id,)) def remove_from_user_dir(self, user_id): def _remove_from_user_dir_txn(txn): @@ -233,6 +281,7 @@ class UserDirectoryStore(SQLBaseStore): txn.execute("DELETE FROM user_directory_search") txn.execute("DELETE FROM users_in_pubic_room") txn.call_after(self.get_user_in_directory.invalidate_all) + txn.call_after(self.get_user_in_public_room.invalidate_all) return self.runInteraction( "delete_all_from_user_dir", _delete_all_from_user_dir_txn ) From 66b121b2fcc159065eb59dd577f8cadb74da7a9e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 6 Jun 2017 11:46:38 +0100 Subject: [PATCH 090/139] Fix wrong number of arguments --- synapse/push/pusher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/push/pusher.py b/synapse/push/pusher.py index 9385c80ce..f2c8eb870 100644 --- a/synapse/push/pusher.py +++ b/synapse/push/pusher.py @@ -57,7 +57,7 @@ class PusherFactory(object): logger.info("found pusher") return self.pusher_types[pusherdict['kind']](self.hs, pusherdict) - def _create_email_pusher(self, pusherdict): + def _create_email_pusher(self, _hs, pusherdict): app_name = self._brand_from_pusherdict mailer = self.mailers.get(app_name) if not mailer: From cca94272fa9dc47f23585e3a33484384734c62bc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 6 Jun 2017 11:50:07 +0100 Subject: [PATCH 091/139] Fix typo when getting app name --- synapse/push/pusher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/push/pusher.py b/synapse/push/pusher.py index f2c8eb870..491f27bde 100644 --- a/synapse/push/pusher.py +++ b/synapse/push/pusher.py @@ -58,7 +58,7 @@ class PusherFactory(object): return self.pusher_types[pusherdict['kind']](self.hs, pusherdict) def _create_email_pusher(self, _hs, pusherdict): - app_name = self._brand_from_pusherdict + app_name = self._app_name_from_pusherdict(pusherdict) mailer = self.mailers.get(app_name) if not mailer: mailer = Mailer( From 6f83c4537cd6b0eeef2cfa735df085bf70228131 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 7 Jun 2017 10:18:44 +0100 Subject: [PATCH 092/139] Increase size of IP cache --- synapse/storage/client_ips.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 747d2df62..014ab635b 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -20,6 +20,8 @@ from twisted.internet import defer from ._base import Cache from . import background_updates +import os + logger = logging.getLogger(__name__) # Number of msec of granularity to store the user IP 'last seen' time. Smaller @@ -28,12 +30,15 @@ logger = logging.getLogger(__name__) LAST_SEEN_GRANULARITY = 120 * 1000 +CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.1)) + + class ClientIpStore(background_updates.BackgroundUpdateStore): def __init__(self, hs): self.client_ip_last_seen = Cache( name="client_ip_last_seen", keylen=4, - max_entries=5000, + max_entries=50000 * CACHE_SIZE_FACTOR, ) super(ClientIpStore, self).__init__(hs) From 65f0513a3306d21aa5e6959b21642ba15dbdcad5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 7 Jun 2017 11:02:38 +0100 Subject: [PATCH 093/139] Split up device_lists_outbound_pokes table for faster updates. --- synapse/storage/devices.py | 82 ++++++------------- .../schema/delta/42/device_list_last_id.sql | 33 ++++++++ 2 files changed, 57 insertions(+), 58 deletions(-) create mode 100644 synapse/storage/schema/delta/42/device_list_last_id.sql diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index d9936c88b..77b02c8a2 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -37,10 +37,6 @@ class DeviceStore(SQLBaseStore): max_entries=10000, ) - self._clock.looping_call( - self._prune_old_outbound_device_pokes, 60 * 60 * 1000 - ) - self.register_background_index_update( "device_lists_stream_idx", index_name="device_lists_stream_user_id", @@ -368,7 +364,7 @@ class DeviceStore(SQLBaseStore): prev_sent_id_sql = """ SELECT coalesce(max(stream_id), 0) as stream_id - FROM device_lists_outbound_pokes + FROM device_lists_outbound_last_success WHERE destination = ? AND user_id = ? AND stream_id <= ? """ @@ -510,32 +506,43 @@ class DeviceStore(SQLBaseStore): ) def _mark_as_sent_devices_by_remote_txn(self, txn, destination, stream_id): - # First we DELETE all rows such that only the latest row for each - # (destination, user_id is left. We do this by selecting first and - # deleting. + # We update the device_lists_outbound_last_success with the successfully + # poked users. We do the join to see which users need to be inserted and + # which updated. sql = """ - SELECT user_id, coalesce(max(stream_id), 0) FROM device_lists_outbound_pokes - WHERE destination = ? AND stream_id <= ? + SELECT user_id, coalesce(max(o.stream_id), 0), (max(s.stream_id) IS NOT NULL) + FROM device_lists_outbound_pokes as o + LEFT JOIN device_lists_outbound_last_success as s + USING (destination, user_id) + WHERE destination = ? AND o.stream_id <= ? GROUP BY user_id - HAVING count(*) > 1 """ txn.execute(sql, (destination, stream_id,)) rows = txn.fetchall() sql = """ - DELETE FROM device_lists_outbound_pokes - WHERE destination = ? AND user_id = ? AND stream_id < ? + UPDATE device_lists_outbound_last_success + SET stream_id = ? + WHERE destination = ? AND user_id = ? """ txn.executemany( - sql, ((destination, row[0], row[1],) for row in rows) + sql, ((row[1], destination, row[0],) for row in rows if row[2]) ) - # Mark everything that is left as sent sql = """ - UPDATE device_lists_outbound_pokes SET sent = ? + INSERT INTO device_lists_outbound_last_success + (destination, user_id, stream_id) VALUES (?, ?, ?) + """ + txn.executemany( + sql, ((destination, row[0], row[1],) for row in rows if not row[2]) + ) + + # Delete all sent outbound pokes + sql = """ + DELETE FROM device_lists_outbound_pokes WHERE destination = ? AND stream_id <= ? """ - txn.execute(sql, (True, destination, stream_id,)) + txn.execute(sql, (destination, stream_id,)) @defer.inlineCallbacks def get_user_whose_devices_changed(self, from_key): @@ -634,44 +641,3 @@ class DeviceStore(SQLBaseStore): def get_device_stream_token(self): return self._device_list_id_gen.get_current_token() - - def _prune_old_outbound_device_pokes(self): - """Delete old entries out of the device_lists_outbound_pokes to ensure - that we don't fill up due to dead servers. We keep one entry per - (destination, user_id) tuple to ensure that the prev_ids remain correct - if the server does come back. - """ - yesterday = self._clock.time_msec() - 24 * 60 * 60 * 1000 - - def _prune_txn(txn): - select_sql = """ - SELECT destination, user_id, max(stream_id) as stream_id - FROM device_lists_outbound_pokes - GROUP BY destination, user_id - HAVING min(ts) < ? AND count(*) > 1 - """ - - txn.execute(select_sql, (yesterday,)) - rows = txn.fetchall() - - if not rows: - return - - delete_sql = """ - DELETE FROM device_lists_outbound_pokes - WHERE ts < ? AND destination = ? AND user_id = ? AND stream_id < ? - """ - - txn.executemany( - delete_sql, - ( - (yesterday, row[0], row[1], row[2]) - for row in rows - ) - ) - - logger.info("Pruned %d device list outbound pokes", txn.rowcount) - - return self.runInteraction( - "_prune_old_outbound_device_pokes", _prune_txn - ) diff --git a/synapse/storage/schema/delta/42/device_list_last_id.sql b/synapse/storage/schema/delta/42/device_list_last_id.sql new file mode 100644 index 000000000..9ab8c14fa --- /dev/null +++ b/synapse/storage/schema/delta/42/device_list_last_id.sql @@ -0,0 +1,33 @@ +/* Copyright 2017 Vector Creations Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +-- Table of last stream_id that we sent to destination for user_id. This is +-- used to fill out the `prev_id` fields of outbound device list updates. +CREATE TABLE device_lists_outbound_last_success ( + destination TEXT NOT NULL, + user_id TEXT NOT NULL, + stream_id BIGINT NOT NULL +); + +INSERT INTO device_lists_outbound_last_success + SELECT destination, user_id, coalesce(max(stream_id), 0) as stream_id + FROM device_lists_outbound_pokes + WHERE sent = (1 = 1) -- sqlite doesn't have inbuilt boolean values + GROUP BY destination, user_id; + +CREATE INDEX device_lists_outbound_last_success_idx ON device_lists_outbound_last_success( + destination, user_id, stream_id +); From 6e2a7ee1bc7f576592573de2300a3a2e0f82e001 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 7 Jun 2017 11:05:17 +0100 Subject: [PATCH 094/139] Remove spurious log lines --- synapse/federation/transaction_queue.py | 1 - synapse/state.py | 2 -- synapse/storage/roommember.py | 1 - 3 files changed, 4 deletions(-) diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 4c25ef110..003eaba89 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -188,7 +188,6 @@ class TransactionQueue(object): ], ) destinations = set(destinations) - logger.info("destinations: %r", destinations) if send_on_behalf_of is not None: # If we are sending the event on behalf of another server diff --git a/synapse/state.py b/synapse/state.py index 5fbe0a097..d1b1a70a9 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -179,9 +179,7 @@ class StateHandler(object): latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) logger.debug("calling resolve_state_groups from get_current_hosts_in_room") entry = yield self.resolve_state_groups(room_id, latest_event_ids) - logger.info("State: %r", entry.state_group) joined_hosts = yield self.store.get_joined_hosts(room_id, entry) - logger.info("returning: %r", joined_hosts) defer.returnValue(joined_hosts) @defer.inlineCallbacks diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 0e9e71f60..7155bfdc6 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -560,7 +560,6 @@ class RoomMemberStore(SQLBaseStore): cache = self._get_joined_hosts_cache(room_id) joined_hosts = yield cache.get_destinations(state_entry) - logger.info("returning: %r", joined_hosts) defer.returnValue(joined_hosts) From 6ba21bf2b8207e714d0d523379727bcf2c14684d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 7 Jun 2017 11:08:36 +0100 Subject: [PATCH 095/139] Comments --- synapse/storage/roommember.py | 9 +++++++++ synapse/storage/state.py | 6 ++++++ 2 files changed, 15 insertions(+) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 7155bfdc6..8656455f6 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -644,6 +644,10 @@ class RoomMemberStore(SQLBaseStore): class _JoinedHostsCache(object): + """Cache for joined hosts in a room that is optimised to handle updates + via state deltas. + """ + def __init__(self, store, room_id): self.store = store self.room_id = room_id @@ -658,6 +662,11 @@ class _JoinedHostsCache(object): @defer.inlineCallbacks def get_destinations(self, state_entry): + """Get set of destinations for a state entry + + Args: + state_entry(synapse.state._StateCacheEntry) + """ if state_entry.state_group == self.state_group: defer.returnValue(frozenset(self.hosts_to_joined_users)) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 01474ff5f..c3eecbe82 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -99,6 +99,12 @@ class StateStore(SQLBaseStore): ) def get_state_group_delta(self, state_group): + """Given a state group try to return a previous group and a delta between + the old and the new. + + Returns: + (prev_group, delta_ids), where both may be None. + """ def _get_state_group_delta_txn(txn): prev_group = self._simple_select_one_onecol_txn( txn, From 1a81a1898e3ce1cae7e4a96183b6f404231bceab Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 7 Jun 2017 11:16:56 +0100 Subject: [PATCH 096/139] Keep pruning background task --- synapse/storage/devices.py | 45 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index 77b02c8a2..83b1d2eeb 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -37,6 +37,10 @@ class DeviceStore(SQLBaseStore): max_entries=10000, ) + self._clock.looping_call( + self._prune_old_outbound_device_pokes, 60 * 60 * 1000 + ) + self.register_background_index_update( "device_lists_stream_idx", index_name="device_lists_stream_user_id", @@ -641,3 +645,44 @@ class DeviceStore(SQLBaseStore): def get_device_stream_token(self): return self._device_list_id_gen.get_current_token() + + def _prune_old_outbound_device_pokes(self): + """Delete old entries out of the device_lists_outbound_pokes to ensure + that we don't fill up due to dead servers. We keep one entry per + (destination, user_id) tuple to ensure that the prev_ids remain correct + if the server does come back. + """ + yesterday = self._clock.time_msec() - 24 * 60 * 60 * 1000 + + def _prune_txn(txn): + select_sql = """ + SELECT destination, user_id, max(stream_id) as stream_id + FROM device_lists_outbound_pokes + GROUP BY destination, user_id + HAVING min(ts) < ? AND count(*) > 1 + """ + + txn.execute(select_sql, (yesterday,)) + rows = txn.fetchall() + + if not rows: + return + + delete_sql = """ + DELETE FROM device_lists_outbound_pokes + WHERE ts < ? AND destination = ? AND user_id = ? AND stream_id < ? + """ + + txn.executemany( + delete_sql, + ( + (yesterday, row[0], row[1], row[2]) + for row in rows + ) + ) + + logger.info("Pruned %d device list outbound pokes", txn.rowcount) + + return self.runInteraction( + "_prune_old_outbound_device_pokes", _prune_txn + ) From 64ed74c01efa035a2c9d95c97b1bf1b1f1c83ff6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 7 Jun 2017 11:20:47 +0100 Subject: [PATCH 097/139] When pruning, delete from device_lists_outbound_last_success --- synapse/storage/devices.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index 83b1d2eeb..bb27fd1f7 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -681,6 +681,14 @@ class DeviceStore(SQLBaseStore): ) ) + # Since we've deleted unsent deltas, we need to remove the entry + # of last successful sent so that the prev_ids are correctly set. + sql = """ + DELETE FROM device_lists_outbound_last_success + WHERE destination = ? AND user_id = ? + """ + txn.executemany(sql, ((row[0], row[1]) for row in rows)) + logger.info("Pruned %d device list outbound pokes", txn.rowcount) return self.runInteraction( From 2f34ad31ac1e5bdc1ddc38ee3f5149bcc4c6e663 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 7 Jun 2017 11:50:44 +0100 Subject: [PATCH 098/139] Add some logging to user directory --- synapse/handlers/user_directory.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 0182cf86d..4e568de8c 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -130,8 +130,15 @@ class UserDirectoyHandler(object): # We process by going through each existing room at a time. room_ids = yield self.store.get_all_rooms() + logger.info("Doing initial update of user directory. %d rooms", len(room_ids)) + num_processed_rooms = 1 + for room_id in room_ids: + logger.info("Handling room %d/%d", num_processed_rooms, len(room_ids)) yield self._handle_intial_room(room_id) + num_processed_rooms += 1 + + logger.info("Processed all rooms.") self.initially_handled_users = None From ecdd2a36583ef822cbdf0d9aba4a3b2007713cb5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 7 Jun 2017 12:02:53 +0100 Subject: [PATCH 099/139] Don't start user_directory handling on workers --- synapse/handlers/user_directory.py | 3 +++ synapse/notifier.py | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 4e568de8c..43eb1c78e 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -46,6 +46,9 @@ class UserDirectoyHandler(object): self.state = hs.get_state_handler() self.server_name = hs.hostname self.clock = hs.get_clock() + self.notifier = hs.get_notifier() + + self.notifier.add_replication_callback(self.notify_new_event) # When start up for the first time we need to populate the user_directory. # This is a set of user_id's we've inserted already diff --git a/synapse/notifier.py b/synapse/notifier.py index 6b1709d70..385208b57 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -167,7 +167,6 @@ class Notifier(object): self.clock = hs.get_clock() self.appservice_handler = hs.get_application_service_handler() - self.user_directory_handler = hs.get_user_directory_handler() if hs.should_send_federation(): self.federation_sender = hs.get_federation_sender() @@ -255,8 +254,6 @@ class Notifier(object): room_stream_id ) - preserve_fn(self.user_directory_handler.notify_new_event)() - if self.federation_sender: preserve_fn(self.federation_sender.notify_new_events)( room_stream_id From 197bd126f09b0df42b2cbb0bd7e121b04ab9d670 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 7 Jun 2017 17:34:20 +0100 Subject: [PATCH 100/139] Fix bug where state_group tables got corrupted This is due to the fact that we prefilled caches using txn.call_after, which always gets called including on error. We fix this by making txn.call_after only fire when a transaction completes successfully, which is what we want most of the time anyway. --- synapse/storage/__init__.py | 3 ++- synapse/storage/_base.py | 29 +++++++++++++++++++++-------- synapse/storage/events.py | 2 +- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index d604e7668..349f96e24 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -225,7 +225,8 @@ class DataStore(RoomMemberStore, RoomStore, db_conn.cursor(), name="_find_stream_orderings_for_times_txn", database_engine=self.database_engine, - after_callbacks=[] + after_callbacks=[], + final_callbacks=[], ) self._find_stream_orderings_for_times_txn(cur) cur.close() diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 58b73af7d..f214b9d4c 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -52,13 +52,17 @@ class LoggingTransaction(object): """An object that almost-transparently proxies for the 'txn' object passed to the constructor. Adds logging and metrics to the .execute() method.""" - __slots__ = ["txn", "name", "database_engine", "after_callbacks"] + __slots__ = [ + "txn", "name", "database_engine", "after_callbacks", "final_callbacks", + ] - def __init__(self, txn, name, database_engine, after_callbacks): + def __init__(self, txn, name, database_engine, after_callbacks, + final_callbacks): object.__setattr__(self, "txn", txn) object.__setattr__(self, "name", name) object.__setattr__(self, "database_engine", database_engine) object.__setattr__(self, "after_callbacks", after_callbacks) + object.__setattr__(self, "final_callbacks", final_callbacks) def call_after(self, callback, *args, **kwargs): """Call the given callback on the main twisted thread after the @@ -67,6 +71,9 @@ class LoggingTransaction(object): """ self.after_callbacks.append((callback, args, kwargs)) + def call_finally(self, callback, *args, **kwargs): + self.final_callbacks.append((callback, args, kwargs)) + def __getattr__(self, name): return getattr(self.txn, name) @@ -217,8 +224,8 @@ class SQLBaseStore(object): self._clock.looping_call(loop, 10000) - def _new_transaction(self, conn, desc, after_callbacks, logging_context, - func, *args, **kwargs): + def _new_transaction(self, conn, desc, after_callbacks, final_callbacks, + logging_context, func, *args, **kwargs): start = time.time() * 1000 txn_id = self._TXN_ID @@ -237,7 +244,8 @@ class SQLBaseStore(object): try: txn = conn.cursor() txn = LoggingTransaction( - txn, name, self.database_engine, after_callbacks + txn, name, self.database_engine, after_callbacks, + final_callbacks, ) r = func(txn, *args, **kwargs) conn.commit() @@ -298,6 +306,7 @@ class SQLBaseStore(object): start_time = time.time() * 1000 after_callbacks = [] + final_callbacks = [] def inner_func(conn, *args, **kwargs): with LoggingContext("runInteraction") as context: @@ -309,7 +318,7 @@ class SQLBaseStore(object): current_context.copy_to(context) return self._new_transaction( - conn, desc, after_callbacks, current_context, + conn, desc, after_callbacks, final_callbacks, current_context, func, *args, **kwargs ) @@ -318,9 +327,13 @@ class SQLBaseStore(object): result = yield self._db_pool.runWithConnection( inner_func, *args, **kwargs ) - finally: + for after_callback, after_args, after_kwargs in after_callbacks: after_callback(*after_args, **after_kwargs) + finally: + for after_callback, after_args, after_kwargs in final_callbacks: + after_callback(*after_args, **after_kwargs) + defer.returnValue(result) @defer.inlineCallbacks @@ -936,7 +949,7 @@ class SQLBaseStore(object): # __exit__ called after the transaction finishes. ctx = self._cache_id_gen.get_next() stream_id = ctx.__enter__() - txn.call_after(ctx.__exit__, None, None, None) + txn.call_finally(ctx.__exit__, None, None, None) txn.call_after(self.hs.get_notifier().on_new_replication_data) self._simple_insert_txn( diff --git a/synapse/storage/events.py b/synapse/storage/events.py index c4aeb4880..73283eb4c 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1419,7 +1419,7 @@ class EventsStore(SQLBaseStore): ] rows = self._new_transaction( - conn, "do_fetch", [], None, self._fetch_event_rows, event_ids + conn, "do_fetch", [], [], None, self._fetch_event_rows, event_ids ) row_dict = { From ea11ee09f31ce239120aceaf11b38df3cd94cf69 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 8 Jun 2017 11:59:57 +0100 Subject: [PATCH 101/139] Ensure we don't use unpersisted state group as prev group --- synapse/state.py | 13 ++++++------- synapse/storage/state.py | 13 +++++++++++++ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/synapse/state.py b/synapse/state.py index a98145598..576eb6b78 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -273,7 +273,8 @@ class StateHandler(object): } elif entry.prev_group: context.prev_group = entry.prev_group - context.delta_ids = entry.delta_ids + context.delta_ids = dict(entry.delta_ids) + context.delta_ids[key] = event.event_id else: if entry.state_group is None: entry.state_group = self.store.get_next_state_group() @@ -364,12 +365,10 @@ class StateHandler(object): if new_state_event_ids == frozenset(e_id for e_id in events): state_group = sg break - if state_group is None: - # Worker instances don't have access to this method, but we want - # to set the state_group on the main instance to increase cache - # hits. - if hasattr(self.store, "get_next_state_group"): - state_group = self.store.get_next_state_group() + + # TODO: We want to create a state group for this set of events, to + # increase cache hits, but we need to make sure that it doesn't + # end up as a prev_group without being added to the database prev_group = None delta_ids = None diff --git a/synapse/storage/state.py b/synapse/storage/state.py index c3eecbe82..151223219 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -223,6 +223,19 @@ class StateStore(SQLBaseStore): # We persist as a delta if we can, while also ensuring the chain # of deltas isn't tooo long, as otherwise read performance degrades. if context.prev_group: + is_in_db = self._simple_select_one_onecol_txn( + txn, + table="state_groups", + keyvalues={"id": context.prev_group}, + retcol="id", + allow_none=True, + ) + if not is_in_db: + raise Exception( + "Trying to persist state with unpersisted prev_group: %r" + % (context.prev_group,) + ) + potential_hops = self._count_state_group_hops_txn( txn, context.prev_group ) From 7132e5cdfff19ce1d4dd1ca5f16672a610b60328 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 9 Jun 2017 10:08:18 +0100 Subject: [PATCH 102/139] Mark remote invite rejections as outliers --- synapse/handlers/federation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index a333acc4a..a72c92c62 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1102,6 +1102,7 @@ class FederationHandler(BaseHandler): user_id, "leave" ) + event.internal_metadata.outlier = True event = self._sign_event(event) # Try the host that we succesfully called /make_leave/ on first for From 0185b75381c160edaf2bec0b9f4def0bb0d67a02 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 9 Jun 2017 10:52:26 +0100 Subject: [PATCH 103/139] Change is_host_joined to use current_state table This bypasses a bug where using the state groups to figure out if a host is in a room sometimes errors if the servers isn't in the room. (For example when the server rejected an invite to a remote room) --- synapse/api/auth.py | 13 ++------ synapse/handlers/user_directory.py | 6 ++-- synapse/state.py | 11 ------- synapse/storage/roommember.py | 52 +++++++++++++----------------- 4 files changed, 28 insertions(+), 54 deletions(-) diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 9dbc7993d..0c297cb02 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -144,17 +144,8 @@ class Auth(object): @defer.inlineCallbacks def check_host_in_room(self, room_id, host): with Measure(self.clock, "check_host_in_room"): - latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) - - logger.debug("calling resolve_state_groups from check_host_in_room") - entry = yield self.state.resolve_state_groups( - room_id, latest_event_ids - ) - - ret = yield self.store.is_host_joined( - room_id, host, entry.state_group, entry.state - ) - defer.returnValue(ret) + latest_event_ids = yield self.store.is_host_joined(room_id, host) + defer.returnValue(latest_event_ids) def _check_joined_room(self, member, user_id, room_id): if not member or member.membership != Membership.JOIN: diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 43eb1c78e..02b720b65 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -151,7 +151,7 @@ class UserDirectoyHandler(object): def _handle_intial_room(self, room_id): """Called when we initially fill out user_directory one room at a time """ - is_in_room = yield self.state.get_is_host_in_room(room_id, self.server_name) + is_in_room = yield self.store.is_host_joined(room_id, self.server_name) if not is_in_room: return @@ -209,7 +209,7 @@ class UserDirectoyHandler(object): if not change: # Need to check if the server left the room entirely, if so # we might need to remove all the users in that room - is_in_room = yield self.state.get_is_host_in_room( + is_in_room = yield self.store.is_host_joined( room_id, self.server_name, ) if not is_in_room: @@ -346,7 +346,7 @@ class UserDirectoyHandler(object): if not update_user_in_public and not update_user_dir: break - is_in_room = yield self.state.get_is_host_in_room( + is_in_room = yield self.store.is_host_joined( j_room_id, self.server_name, ) diff --git a/synapse/state.py b/synapse/state.py index 576eb6b78..5b386e318 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -182,17 +182,6 @@ class StateHandler(object): joined_hosts = yield self.store.get_joined_hosts(room_id, entry) defer.returnValue(joined_hosts) - @defer.inlineCallbacks - def get_is_host_in_room(self, room_id, host, latest_event_ids=None): - if not latest_event_ids: - latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) - logger.debug("calling resolve_state_groups from get_is_host_in_room") - entry = yield self.resolve_state_groups(room_id, latest_event_ids) - is_host_joined = yield self.store.is_host_joined( - room_id, host, entry.state_id, entry.state - ) - defer.returnValue(is_host_joined) - @defer.inlineCallbacks def compute_event_context(self, event, old_state=None): """Build an EventContext structure for the event. diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 8656455f6..447c6364e 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -501,40 +501,34 @@ class RoomMemberStore(SQLBaseStore): defer.returnValue(users_in_room) - def is_host_joined(self, room_id, host, state_group, state_ids): - if not state_group: - # If state_group is None it means it has yet to be assigned a - # state group, i.e. we need to make sure that calls with a state_group - # of None don't hit previous cached calls with a None state_group. - # To do this we set the state_group to a new object as object() != object() - state_group = object() + @defer.inlineCallbacks + def is_host_joined(self, room_id, host): + if '%' in host or '_' in host: + raise Exception("Invalid host name") - return self._is_host_joined( - room_id, host, state_group, state_ids - ) + sql = """ + SELECT state_key FROM current_state_events + INNER JOIN room_memberships USING (room_id, event_id) + WHERE membership = 'join' AND room_id = ? AND state_key LIKE ? + LIMIT 1 + """ - @cachedInlineCallbacks(num_args=3) - def _is_host_joined(self, room_id, host, state_group, current_state_ids): - # We don't use `state_group`, its there so that we can cache based - # on it. However, its important that its never None, since two current_state's - # with a state_group of None are likely to be different. - # See bulk_get_push_rules_for_room for how we work around this. - assert state_group is not None + # We do need to be careful to ensure that host doesn't have any wild cards + # in it, but we checked above for known ones and we'll check below that + # the returned user actually has the correct domain. + like_clause = "%:" + host - for (etype, state_key), event_id in current_state_ids.items(): - if etype == EventTypes.Member: - try: - if get_domain_from_id(state_key) != host: - continue - except: - logger.warn("state_key not user_id: %s", state_key) - continue + rows = yield self._execute("is_host_joined", None, sql, room_id, like_clause) - event = yield self.get_event(event_id, allow_none=True) - if event and event.content["membership"] == Membership.JOIN: - defer.returnValue(True) + if not rows: + defer.returnValue(False) - defer.returnValue(False) + user_id = rows[0][0] + if get_domain_from_id(user_id) != host: + # This can only happen if the host name has something funky in it + raise Exception("Invalid host name") + + defer.returnValue(True) def get_joined_hosts(self, room_id, state_entry): state_group = state_entry.state_group From 298d83b34053a45beaa5ad1d202b3ca8e3b1bafe Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 9 Jun 2017 11:01:28 +0100 Subject: [PATCH 104/139] Fix replication --- synapse/replication/slave/storage/events.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index 6cd3a843d..3f33d473c 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -154,7 +154,6 @@ class SlavedEventStore(BaseSlavedStore): DataStore.get_room_events_stream_for_rooms.__func__ ) is_host_joined = DataStore.is_host_joined.__func__ - _is_host_joined = RoomMemberStore.__dict__["_is_host_joined"] get_stream_token_for_event = DataStore.get_stream_token_for_event.__func__ _set_before_and_after = staticmethod(DataStore._set_before_and_after) From 2cac7623a5adfa84df6bd57d74d954daba3cc149 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 9 Jun 2017 11:24:41 +0100 Subject: [PATCH 105/139] Add missing notifier --- synapse/replication/tcp/resource.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/replication/tcp/resource.py b/synapse/replication/tcp/resource.py index 8b2c4c304..69c46911e 100644 --- a/synapse/replication/tcp/resource.py +++ b/synapse/replication/tcp/resource.py @@ -67,6 +67,7 @@ class ReplicationStreamer(object): self.store = hs.get_datastore() self.presence_handler = hs.get_presence_handler() self.clock = hs.get_clock() + self.notifier = hs.get_notifier() # Current connections. self.connections = [] @@ -99,7 +100,7 @@ class ReplicationStreamer(object): if not hs.config.send_federation: self.federation_sender = hs.get_federation_sender() - hs.get_notifier().add_replication_callback(self.on_notifier_poke) + self.notifier.add_replication_callback(self.on_notifier_poke) # Keeps track of whether we are currently checking for updates self.is_looping = False From eed59dcc1efdda95ea2deaad6dd8b70e5d346a6e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 9 Jun 2017 11:39:35 +0100 Subject: [PATCH 106/139] Fix has_any_entity_changed Occaisonally has_any_entity_changed would throw the error: "Set changed size during iteration" when taking the max of the `sorteddict`. While its uncertain how that happens, its quite inefficient to iterate over the entire dict anyway so we change to using the more traditional `bisect_*` functions. --- synapse/util/caches/stream_change_cache.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py index c498aee46..b723e3364 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py @@ -96,10 +96,10 @@ class StreamChangeCache(object): if stream_pos >= self._earliest_known_stream_pos: self.metrics.inc_hits() - if stream_pos >= max(self._cache): - return False - else: - return True + keys = self._cache.keys() + i = keys.bisect_right(stream_pos) + + return len(keys[i:]) > 0 else: self.metrics.inc_misses() return True From 935e588799cac738d808faa8024bcba5fd5c6c06 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 9 Jun 2017 13:01:23 +0100 Subject: [PATCH 107/139] Tweak SQL --- synapse/storage/roommember.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 447c6364e..e38bbd22a 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -507,9 +507,12 @@ class RoomMemberStore(SQLBaseStore): raise Exception("Invalid host name") sql = """ - SELECT state_key FROM current_state_events - INNER JOIN room_memberships USING (room_id, event_id) - WHERE membership = 'join' AND room_id = ? AND state_key LIKE ? + SELECT state_key FROM current_state_events AS c + INNER JOIN room_memberships USING (event_id) + WHERE membership = 'join' + AND type = 'm.room.member' + AND c.room_id = ? + AND state_key LIKE ? LIMIT 1 """ From 1664948e419f20e527c300e634246641e3b9bc66 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 9 Jun 2017 13:05:05 +0100 Subject: [PATCH 108/139] Comment --- synapse/handlers/federation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index a72c92c62..39d2bee8d 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1102,6 +1102,8 @@ class FederationHandler(BaseHandler): user_id, "leave" ) + # Mark as outlier as we don't have any state for this event; we're not + # even in the room. event.internal_metadata.outlier = True event = self._sign_event(event) From efc2b7db95c78f658d3719862702b85d5d9d4a76 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 9 Jun 2017 13:35:15 +0100 Subject: [PATCH 109/139] Rewrite conditional --- synapse/util/caches/stream_change_cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py index b723e3364..609625b32 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py @@ -99,7 +99,7 @@ class StreamChangeCache(object): keys = self._cache.keys() i = keys.bisect_right(stream_pos) - return len(keys[i:]) > 0 + return i < len(keys) else: self.metrics.inc_misses() return True From e54d7d536ef58271a0fbbf28d9bc1aeaa5428a4b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 9 Jun 2017 16:24:00 +0100 Subject: [PATCH 110/139] Cache state deltas --- synapse/storage/state.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 151223219..0bea7374f 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -20,6 +20,7 @@ from synapse.util.stringutils import to_ascii from synapse.storage.engines import PostgresEngine from twisted.internet import defer +from collections import namedtuple import logging @@ -29,6 +30,13 @@ logger = logging.getLogger(__name__) MAX_STATE_DELTA_HOPS = 100 +class _GetStateGroupDelta(namedtuple("_GetStateGroupDelta", ("prev_group", "delta_ids"))): + __slots__ = [] + + def __len__(self): + return len(self.delta_ids) if self.delta_ids else None + + class StateStore(SQLBaseStore): """ Keeps track of the state at a given event. @@ -98,6 +106,7 @@ class StateStore(SQLBaseStore): _get_current_state_ids_txn, ) + @cached(max_entries=10000, iterable=True) def get_state_group_delta(self, state_group): """Given a state group try to return a previous group and a delta between the old and the new. @@ -117,7 +126,7 @@ class StateStore(SQLBaseStore): ) if not prev_group: - return None, None + return _GetStateGroupDelta(None, None) delta_ids = self._simple_select_list_txn( txn, @@ -128,10 +137,10 @@ class StateStore(SQLBaseStore): retcols=("type", "state_key", "event_id",) ) - return prev_group, { + return _GetStateGroupDelta(prev_group, { (row["type"], row["state_key"]): row["event_id"] for row in delta_ids - } + }) return self.runInteraction( "get_state_group_delta", _get_state_group_delta_txn, From b0d975e21619b6eb1dbcf24a5ed68629298bfdce Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 9 Jun 2017 16:25:42 +0100 Subject: [PATCH 111/139] Comments --- synapse/storage/state.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 0bea7374f..24503cd5a 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -31,6 +31,9 @@ MAX_STATE_DELTA_HOPS = 100 class _GetStateGroupDelta(namedtuple("_GetStateGroupDelta", ("prev_group", "delta_ids"))): + """Return type of get_state_group_delta that implements __len__, which lets + us use the itrable flag when caching + """ __slots__ = [] def __len__(self): From 80609743440467a69c4bdcd111c60d84f617d6c4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 9 Jun 2017 16:40:52 +0100 Subject: [PATCH 112/139] Fix replication --- synapse/replication/slave/storage/events.py | 2 +- synapse/storage/state.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index 6cd3a843d..f0a367aa9 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -108,7 +108,7 @@ class SlavedEventStore(BaseSlavedStore): get_current_state_ids = ( StateStore.__dict__["get_current_state_ids"] ) - get_state_group_delta = DataStore.get_state_group_delta.__func__ + get_state_group_delta = StateStore.__dict__["get_state_group_delta"] _get_joined_hosts_cache = RoomMemberStore.__dict__["_get_joined_hosts_cache"] has_room_changed_since = DataStore.has_room_changed_since.__func__ diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 24503cd5a..d1e679719 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -37,7 +37,7 @@ class _GetStateGroupDelta(namedtuple("_GetStateGroupDelta", ("prev_group", "delt __slots__ = [] def __len__(self): - return len(self.delta_ids) if self.delta_ids else None + return len(self.delta_ids) if self.delta_ids else 0 class StateStore(SQLBaseStore): From d53fe399ebb497f67efc99ebb12d96486503629a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Jun 2017 09:56:18 +0100 Subject: [PATCH 113/139] Add cache for is_host_joined --- synapse/replication/slave/storage/events.py | 2 +- synapse/storage/events.py | 5 +++++ synapse/storage/roommember.py | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index 7034f48b5..94ebbffc1 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -153,7 +153,7 @@ class SlavedEventStore(BaseSlavedStore): get_room_events_stream_for_rooms = ( DataStore.get_room_events_stream_for_rooms.__func__ ) - is_host_joined = DataStore.is_host_joined.__func__ + is_host_joined = RoomMemberStore.__dict__["is_host_joined"] get_stream_token_for_event = DataStore.get_stream_token_for_event.__func__ _set_before_and_after = staticmethod(DataStore._set_before_and_after) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index c80d181fc..72ce84b0b 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -776,6 +776,11 @@ class EventsStore(SQLBaseStore): txn, self.get_rooms_for_user, (member,) ) + for host in set(get_domain_from_id(u) for u in members_changed): + self._invalidate_cache_and_stream( + txn, self.is_host_joined, (room_id, host) + ) + self._invalidate_cache_and_stream( txn, self.get_users_in_room, (room_id,) ) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index e38bbd22a..457ca288d 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -501,7 +501,7 @@ class RoomMemberStore(SQLBaseStore): defer.returnValue(users_in_room) - @defer.inlineCallbacks + @cachedInlineCallbacks(max_entries=10000) def is_host_joined(self, room_id, host): if '%' in host or '_' in host: raise Exception("Invalid host name") From b58e24cc3c328e0577e6a8a44889d3639c0b289e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Jun 2017 10:16:31 +0100 Subject: [PATCH 114/139] Tweak the ranking of PG user dir search --- synapse/storage/user_directory.py | 53 +++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 6a4bf63f0..0b874e022 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -391,11 +391,14 @@ class UserDirectoryStore(SQLBaseStore): ] } """ - - search_query = _parse_query(self.database_engine, search_term) - if isinstance(self.database_engine, PostgresEngine): + full_query, exact_query, prefix_query = _parse_query_postgres(search_term) + # We order by rank and then if they have profile info + # The ranking algorithm is hand tweaked for "best" results. Broadly + # the idea is we give a higher weight to exact matches. + # The array of numbers are the weights for the various part of the + # search: (domain, _, display name, localpart) sql = """ SELECT user_id, display_name, avatar_url FROM user_directory_search @@ -403,13 +406,27 @@ class UserDirectoryStore(SQLBaseStore): INNER JOIN users_in_pubic_room USING (user_id) WHERE vector @@ to_tsquery('english', ?) ORDER BY - ts_rank_cd(vector, to_tsquery('english', ?), 1) DESC, + 2 * ts_rank_cd( + '{0.1, 0.1, 0.9, 1.0}', + vector, + to_tsquery('english', ?), + 8 + ) + + ts_rank_cd( + '{0.1, 0.1, 0.9, 1.0}', + vector, + to_tsquery('english', ?), + 8 + ) + DESC, display_name IS NULL, avatar_url IS NULL LIMIT ? """ - args = (search_query, search_query, limit + 1,) + args = (full_query, exact_query, prefix_query, limit + 1,) elif isinstance(self.database_engine, Sqlite3Engine): + search_query = _parse_query_sqlite(search_term) + sql = """ SELECT user_id, display_name, avatar_url FROM user_directory_search @@ -439,7 +456,7 @@ class UserDirectoryStore(SQLBaseStore): }) -def _parse_query(database_engine, search_term): +def _parse_query_sqlite(search_term): """Takes a plain unicode string from the user and converts it into a form that can be passed to database. We use this so that we can add prefix matching, which isn't something @@ -451,11 +468,21 @@ def _parse_query(database_engine, search_term): # Pull out the individual words, discarding any non-word characters. results = re.findall(r"([\w\-]+)", search_term, re.UNICODE) + return " & ".join("(%s* | %s)" % (result, result,) for result in results) - if isinstance(database_engine, PostgresEngine): - return " & ".join("(%s:* | %s)" % (result, result,) for result in results) - elif isinstance(database_engine, Sqlite3Engine): - return " & ".join("(%s* | %s)" % (result, result,) for result in results) - else: - # This should be unreachable. - raise Exception("Unrecognized database engine") + +def _parse_query_postgres(search_term): + """Takes a plain unicode string from the user and converts it into a form + that can be passed to database. + We use this so that we can add prefix matching, which isn't something + that is supported by default. + """ + + # Pull out the individual words, discarding any non-word characters. + results = re.findall(r"([\w\-]+)", search_term, re.UNICODE) + + both = " & ".join("(%s:* | %s)" % (result, result,) for result in results) + exact = " & ".join("%s" % (result,) for result in results) + prefix = " & ".join("%s:*" % (result,) for result in results) + + return both, exact, prefix From 6ae8373d40f9008ab80dd7894c4c79beb7ceca48 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Jun 2017 10:19:26 +0100 Subject: [PATCH 115/139] Don't assume existance of events when updating user directory --- synapse/handlers/user_directory.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 02b720b65..ce5a506b7 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -225,7 +225,7 @@ class UserDirectoyHandler(object): logger.debug("Server is still in room: %r", room_id) if change: # The user joined - event = yield self.store.get_event(event_id) + event = yield self.store.get_event(event_id, allow_none=True) profile = ProfileInfo( avatar_url=event.content.get("avatar_url"), display_name=event.content.get("displayname"), @@ -379,8 +379,11 @@ class UserDirectoyHandler(object): if not prev_event_id or not event_id: return - prev_event = yield self.store.get_event(prev_event_id) - event = yield self.store.get_event(event_id) + prev_event = yield self.store.get_event(prev_event_id, allow_none=True) + event = yield self.store.get_event(event_id, allow_none=True) + + if not prev_event or not event: + return if event.membership != Membership.JOIN: return From 48d29494165f2b12e2d2b6206766828e2a542ef4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Jun 2017 10:23:14 +0100 Subject: [PATCH 116/139] Throw exception when not retrying when downloading media --- synapse/rest/media/v1/media_repository.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index caca96c22..bae2b4c75 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -184,6 +184,7 @@ class MediaRepository(object): raise except NotRetryingDestination: logger.warn("Not retrying destination %r", server_name) + raise SynapseError(502, "Failed to fetch remote media") except Exception: logger.exception("Failed to fetch remote media %s/%s", server_name, media_id) From 6fd7e6db3d8c896d2e1b92efaad29596fd9cb39c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Jun 2017 11:11:26 +0100 Subject: [PATCH 117/139] Fix user dir to not assume existence of user --- synapse/storage/user_directory.py | 59 +++++++++++++++++++------------ 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 0b874e022..3f3eee862 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -151,37 +151,52 @@ class UserDirectoryStore(SQLBaseStore): def update_profile_in_user_dir(self, user_id, display_name, avatar_url): def _update_profile_in_user_dir_txn(txn): - self._simple_update_one_txn( + new_entry = self._simple_upsert_txn( txn, table="user_directory", keyvalues={"user_id": user_id}, - updatevalues={"display_name": display_name, "avatar_url": avatar_url}, + values={"display_name": display_name, "avatar_url": avatar_url}, + lock=False, # We're only inserter ) if isinstance(self.database_engine, PostgresEngine): # We weight the loclpart most highly, then display name and finally # server name - sql = """ - UPDATE user_directory_search - SET vector = setweight(to_tsvector('english', ?), 'A') - || setweight(to_tsvector('english', ?), 'D') - || setweight(to_tsvector('english', COALESCE(?, '')), 'B') - WHERE user_id = ? - """ - args = ( - get_localpart_from_id(user_id), get_domain_from_id(user_id), - display_name, - user_id, - ) + if new_entry: + sql = """ + INSERT INTO user_directory_search(user_id, vector) + VALUES (?, + setweight(to_tsvector('english', ?), 'A') + || setweight(to_tsvector('english', ?), 'D') + || setweight(to_tsvector('english', COALESCE(?, '')), 'B') + ) + """ + args = ( + user_id, + get_localpart_from_id(user_id), get_domain_from_id(user_id), + display_name, + ) + else: + sql = """ + UPDATE user_directory_search + SET vector = setweight(to_tsvector('english', ?), 'A') + || setweight(to_tsvector('english', ?), 'D') + || setweight(to_tsvector('english', COALESCE(?, '')), 'B') + WHERE user_id = ? + """ + args = ( + get_localpart_from_id(user_id), get_domain_from_id(user_id), + display_name, + user_id, + ) elif isinstance(self.database_engine, Sqlite3Engine): - sql = """ - UPDATE user_directory_search - set value = ? - WHERE user_id = ? - """ - args = ( - "%s %s" % (user_id, display_name,) if display_name else user_id, - user_id, + value = "%s %s" % (user_id, display_name,) if display_name else user_id + self._simple_upsert_txn( + txn, + table="user_directory_search", + keyvalues={"user_id": user_id}, + values={"value": value}, + lock=False, # We're only inserter ) else: # This should be unreachable. From 505e7e8b9d51a2529790632799e47a047be1f7d2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Jun 2017 11:19:18 +0100 Subject: [PATCH 118/139] Fix up sql --- synapse/storage/user_directory.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 3f3eee862..67b14cf26 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -171,10 +171,12 @@ class UserDirectoryStore(SQLBaseStore): || setweight(to_tsvector('english', COALESCE(?, '')), 'B') ) """ - args = ( - user_id, - get_localpart_from_id(user_id), get_domain_from_id(user_id), - display_name, + txn.execute( + sql, + ( + user_id, get_localpart_from_id(user_id), + get_domain_from_id(user_id), display_name, + ) ) else: sql = """ @@ -184,10 +186,12 @@ class UserDirectoryStore(SQLBaseStore): || setweight(to_tsvector('english', COALESCE(?, '')), 'B') WHERE user_id = ? """ - args = ( - get_localpart_from_id(user_id), get_domain_from_id(user_id), - display_name, - user_id, + txn.execute( + sql, + ( + get_localpart_from_id(user_id), get_domain_from_id(user_id), + display_name, user_id, + ) ) elif isinstance(self.database_engine, Sqlite3Engine): value = "%s %s" % (user_id, display_name,) if display_name else user_id @@ -202,8 +206,6 @@ class UserDirectoryStore(SQLBaseStore): # This should be unreachable. raise Exception("Unrecognized database engine") - txn.execute(sql, args) - txn.call_after(self.get_user_in_directory.invalidate, (user_id,)) return self.runInteraction( From d9fd937e39cf04a3da15412bbdbd352bb175750a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Jun 2017 11:49:39 +0100 Subject: [PATCH 119/139] Fix user directory insertion due to missing room_id --- synapse/handlers/user_directory.py | 10 +++++++--- synapse/storage/user_directory.py | 3 ++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index ce5a506b7..f4451e5df 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -203,7 +203,9 @@ class UserDirectoyHandler(object): if change is None: # Handle any profile changes - yield self._handle_profile_change(state_key, prev_event_id, event_id) + yield self._handle_profile_change( + state_key, room_id, prev_event_id, event_id, + ) continue if not change: @@ -372,7 +374,7 @@ class UserDirectoyHandler(object): yield self.store.remove_from_user_in_public_room(user_id) @defer.inlineCallbacks - def _handle_profile_change(self, user_id, prev_event_id, event_id): + def _handle_profile_change(self, user_id, room_id, prev_event_id, event_id): """Check member event changes for any profile changes and update the database if there are. """ @@ -395,7 +397,9 @@ class UserDirectoyHandler(object): new_avatar = event.content.get("avatar_url") if prev_name != new_name or prev_avatar != new_avatar: - yield self.store.update_profile_in_user_dir(user_id, new_name, new_avatar) + yield self.store.update_profile_in_user_dir( + user_id, new_name, new_avatar, room_id, + ) @defer.inlineCallbacks def _get_key_change(self, prev_event_id, event_id, key_name, public_value): diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 67b14cf26..137aca288 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -149,12 +149,13 @@ class UserDirectoryStore(SQLBaseStore): ) self.get_user_in_directory.invalidate((user_id,)) - def update_profile_in_user_dir(self, user_id, display_name, avatar_url): + def update_profile_in_user_dir(self, user_id, display_name, avatar_url, room_id): def _update_profile_in_user_dir_txn(txn): new_entry = self._simple_upsert_txn( txn, table="user_directory", keyvalues={"user_id": user_id}, + insertion_values={"room_id": room_id}, values={"display_name": display_name, "avatar_url": avatar_url}, lock=False, # We're only inserter ) From ba502fb89a4c57c57de81669bfaa5ef02b4af904 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 14 Jun 2017 02:23:06 +0100 Subject: [PATCH 120/139] add notes on running out of FDs --- README.rst | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/README.rst b/README.rst index 35141ac71..12f0c0c51 100644 --- a/README.rst +++ b/README.rst @@ -528,6 +528,30 @@ fix try re-installing from PyPI or directly from # Install from github pip install --user https://github.com/pyca/pynacl/tarball/master +Running out of File Handles +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If synapse runs out of filehandles, it typically fails badly - live-locking +at 100% CPU, and/or failing to accept new TCP connections (blocking the +connecting client). Matrix currently can legitimately use a lot of file handles, +thanks to busy rooms like #matrix:matrix.org containing hundreds of participating +servers. The first time a server talks in a room it will try to connect +simultaneously to all participating servers, which could exhaust the available +file descriptors between DNS queries & HTTPS sockets, especially if DNS is slow +to respond. (We need to improve the routing algorithm used to be better than +full mesh, but as of June 2017 this hasn't happened yet). + +If you hit this failure mode, we recommend increasing the maximum number of +open file handles to be at least 4096 (assuming a default of 1024 or 256). +This is typically done by editing ``/etc/security/limits.conf`` + +Separately, Synapse may leak file handles if inbound HTTP requests get stuck +during processing - e.g. blocked behind a lock or talking to a remote server etc. +This is best diagnosed by matching up the 'Received request' and 'Processed request' +log lines and looking for any 'Processed request' lines which take more than +a few seconds to execute. Please let us know at #matrix-dev:matrix.org if +you see this failure mode so we can help debug it, however. + ArchLinux ~~~~~~~~~ From 617304b2cf72460006a7eda26dba9fef14642305 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Jun 2017 19:37:17 +0100 Subject: [PATCH 121/139] Fix phone home stats --- synapse/app/homeserver.py | 49 ++++++--------------- synapse/storage/__init__.py | 26 ++++++----- synapse/storage/events.py | 88 ++++++++++++------------------------- 3 files changed, 56 insertions(+), 107 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 345740259..c9a2f148d 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -35,7 +35,7 @@ from synapse.storage.prepare_database import UpgradeDatabaseException, prepare_d from synapse.server import HomeServer -from twisted.internet import reactor, task, defer +from twisted.internet import reactor, defer from twisted.application import service from twisted.web.resource import Resource, EncodingResourceWrapper from twisted.web.static import File @@ -53,7 +53,7 @@ from synapse.api.urls import ( from synapse.config.homeserver import HomeServerConfig from synapse.crypto import context_factory from synapse.util.logcontext import LoggingContext, PreserveLoggingContext -from synapse.metrics import register_memory_metrics, get_metrics_for +from synapse.metrics import register_memory_metrics from synapse.metrics.resource import MetricsResource, METRICS_PREFIX from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory from synapse.federation.transport.server import TransportLayerServer @@ -398,7 +398,8 @@ def run(hs): ThreadPool._worker = profile(ThreadPool._worker) reactor.run = profile(reactor.run) - start_time = hs.get_clock().time() + clock = hs.get_clock() + start_time = clock.time() stats = {} @@ -410,41 +411,14 @@ def run(hs): if uptime < 0: uptime = 0 - # If the stats directory is empty then this is the first time we've - # reported stats. - first_time = not stats - stats["homeserver"] = hs.config.server_name stats["timestamp"] = now stats["uptime_seconds"] = uptime stats["total_users"] = yield hs.get_datastore().count_all_users() - - room_count = yield hs.get_datastore().get_room_count() - stats["total_room_count"] = room_count - stats["daily_active_users"] = yield hs.get_datastore().count_daily_users() - daily_messages = yield hs.get_datastore().count_daily_messages() - if daily_messages is not None: - stats["daily_messages"] = daily_messages - else: - stats.pop("daily_messages", None) - - if first_time: - # Add callbacks to report the synapse stats as metrics whenever - # prometheus requests them, typically every 30s. - # As some of the stats are expensive to calculate we only update - # them when synapse phones home to matrix.org every 24 hours. - metrics = get_metrics_for("synapse.usage") - metrics.add_callback("timestamp", lambda: stats["timestamp"]) - metrics.add_callback("uptime_seconds", lambda: stats["uptime_seconds"]) - metrics.add_callback("total_users", lambda: stats["total_users"]) - metrics.add_callback("total_room_count", lambda: stats["total_room_count"]) - metrics.add_callback( - "daily_active_users", lambda: stats["daily_active_users"] - ) - metrics.add_callback( - "daily_messages", lambda: stats.get("daily_messages", 0) - ) + stats["daily_messages"] = yield hs.get_datastore().count_daily_messages() + daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages() + stats["daily_sent_messages"] = daily_sent_messages logger.info("Reporting stats to matrix.org: %s" % (stats,)) try: @@ -456,9 +430,12 @@ def run(hs): logger.warn("Error reporting stats: %s", e) if hs.config.report_stats: - phone_home_task = task.LoopingCall(phone_stats_home) - logger.info("Scheduling stats reporting for 24 hour intervals") - phone_home_task.start(60 * 60 * 24, now=False) + logger.info("Scheduling stats reporting for 3 hour intervals") + clock.looping_call(phone_stats_home, 3 * 60 * 60 * 1000) + + # We wait 5 minutes to send the first set of stats as the server can + # be quite busy the first few minutes + clock.call_later(5 * 60, phone_stats_home) def in_thread(): # Uncomment to enable tracing of log context changes. diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index d604e7668..2970df138 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -231,7 +231,7 @@ class DataStore(RoomMemberStore, RoomStore, cur.close() self.find_stream_orderings_looping_call = self._clock.looping_call( - self._find_stream_orderings_for_times, 60 * 60 * 1000 + self._find_stream_orderings_for_times, 10 * 60 * 1000 ) self._stream_order_on_start = self.get_room_max_stream_ordering() @@ -272,17 +272,19 @@ class DataStore(RoomMemberStore, RoomStore, Counts the number of users who used this homeserver in the last 24 hours. """ def _count_users(txn): - txn.execute( - "SELECT COUNT(DISTINCT user_id) AS users" - " FROM user_ips" - " WHERE last_seen > ?", - # This is close enough to a day for our purposes. - (int(self._clock.time_msec()) - (1000 * 60 * 60 * 24),) - ) - rows = self.cursor_to_dict(txn) - if rows: - return rows[0]["users"] - return 0 + yesterday = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24), + + sql = """ + SELECT COALESCE(count(*), 0) FROM ( + SELECT user_id FROM user_ips + WHERE last_seen > ? + GROUP BY user_id + ) u + """ + + txn.execute(sql, (yesterday,)) + count, = txn.fetchone() + return count ret = yield self.runInteraction("count_users", _count_users) defer.returnValue(ret) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index c4aeb4880..8e7ae73a7 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -38,7 +38,6 @@ from functools import wraps import synapse.metrics import logging -import math import ujson as json # these are only included to make the type annotations work @@ -1599,68 +1598,39 @@ class EventsStore(SQLBaseStore): call to this function, it will return None. """ def _count_messages(txn): - now = self.hs.get_clock().time() - - txn.execute( - "SELECT reported_stream_token, reported_time FROM stats_reporting" - ) - last_reported = self.cursor_to_dict(txn) - - txn.execute( - "SELECT stream_ordering" - " FROM events" - " ORDER BY stream_ordering DESC" - " LIMIT 1" - ) - now_reporting = self.cursor_to_dict(txn) - if not now_reporting: - logger.info("Calculating daily messages skipped; no now_reporting") - return None - now_reporting = now_reporting[0]["stream_ordering"] - - txn.execute("DELETE FROM stats_reporting") - txn.execute( - "INSERT INTO stats_reporting" - " (reported_stream_token, reported_time)" - " VALUES (?, ?)", - (now_reporting, now,) - ) - - if not last_reported: - logger.info("Calculating daily messages skipped; no last_reported") - return None - - # Close enough to correct for our purposes. - yesterday = (now - 24 * 60 * 60) - since_yesterday_seconds = yesterday - last_reported[0]["reported_time"] - any_since_yesterday = math.fabs(since_yesterday_seconds) > 60 * 60 - if any_since_yesterday: - logger.info( - "Calculating daily messages skipped; since_yesterday_seconds: %d" % - (since_yesterday_seconds,) - ) - return None - - txn.execute( - "SELECT COUNT(*) as messages" - " FROM events NATURAL JOIN event_json" - " WHERE json like '%m.room.message%'" - " AND stream_ordering > ?" - " AND stream_ordering <= ?", - ( - last_reported[0]["reported_stream_token"], - now_reporting, - ) - ) - rows = self.cursor_to_dict(txn) - if not rows: - logger.info("Calculating daily messages skipped; messages count missing") - return None - return rows[0]["messages"] + sql = """ + SELECT COALESCE(COUNT(*), 0) FROM events + WHERE type = 'm.room.message' + AND stream_ordering > ? + """ + txn.execute(sql, (self.stream_ordering_day_ago,)) + count, = txn.fetchone() + return count ret = yield self.runInteraction("count_messages", _count_messages) defer.returnValue(ret) + @defer.inlineCallbacks + def count_daily_sent_messages(self): + def _count_messages(txn): + # This is good enough as if you have silly characters in your own + # hostname then thats your own fault. + like_clause = "%:" + self.hs.hostname + + sql = """ + SELECT COALESCE(COUNT(*), 0) FROM events + WHERE type = 'm.room.message' + AND sender LIKE ? + AND stream_ordering > ? + """ + + txn.execute(sql, (like_clause, self.stream_ordering_day_ago,)) + count, = txn.fetchone() + return count + + ret = yield self.runInteraction("count_daily_sent_messages", _count_messages) + defer.returnValue(ret) + @defer.inlineCallbacks def _background_reindex_fields_sender(self, progress, batch_size): target_min_stream_id = progress["target_min_stream_id_inclusive"] From 93e7a38370cfadb9dc65e18b16e7d76c05546e48 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Jun 2017 09:30:54 +0100 Subject: [PATCH 122/139] Remove unhelpful test --- tests/storage/test_events.py | 115 ----------------------------------- 1 file changed, 115 deletions(-) delete mode 100644 tests/storage/test_events.py diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py deleted file mode 100644 index 14443b53b..000000000 --- a/tests/storage/test_events.py +++ /dev/null @@ -1,115 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2015, 2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from mock import Mock -from synapse.types import RoomID, UserID - -from tests import unittest -from twisted.internet import defer -from tests.storage.event_injector import EventInjector - -from tests.utils import setup_test_homeserver - - -class EventsStoreTestCase(unittest.TestCase): - - @defer.inlineCallbacks - def setUp(self): - self.hs = yield setup_test_homeserver( - resource_for_federation=Mock(), - http_client=None, - ) - self.store = self.hs.get_datastore() - self.db_pool = self.hs.get_db_pool() - self.message_handler = self.hs.get_handlers().message_handler - self.event_injector = EventInjector(self.hs) - - @defer.inlineCallbacks - def test_count_daily_messages(self): - yield self.db_pool.runQuery("DELETE FROM stats_reporting") - - self.hs.clock.now = 100 - - # Never reported before, and nothing which could be reported - count = yield self.store.count_daily_messages() - self.assertIsNone(count) - count = yield self.db_pool.runQuery("SELECT COUNT(*) FROM stats_reporting") - self.assertEqual([(0,)], count) - - # Create something to report - room = RoomID.from_string("!abc123:test") - user = UserID.from_string("@raccoonlover:test") - yield self.event_injector.create_room(room, user) - - self.base_event = yield self._get_last_stream_token() - - yield self.event_injector.inject_message(room, user, "Raccoons are really cute") - - # Never reported before, something could be reported, but isn't because - # it isn't old enough. - count = yield self.store.count_daily_messages() - self.assertIsNone(count) - yield self._assert_stats_reporting(1, self.hs.clock.now) - - # Already reported yesterday, two new events from today. - yield self.event_injector.inject_message(room, user, "Yeah they are!") - yield self.event_injector.inject_message(room, user, "Incredibly!") - self.hs.clock.now += 60 * 60 * 24 - count = yield self.store.count_daily_messages() - self.assertEqual(2, count) # 2 since yesterday - yield self._assert_stats_reporting(3, self.hs.clock.now) # 3 ever - - # Last reported too recently. - yield self.event_injector.inject_message(room, user, "Who could disagree?") - self.hs.clock.now += 60 * 60 * 22 - count = yield self.store.count_daily_messages() - self.assertIsNone(count) - yield self._assert_stats_reporting(4, self.hs.clock.now) - - # Last reported too long ago - yield self.event_injector.inject_message(room, user, "No one.") - self.hs.clock.now += 60 * 60 * 26 - count = yield self.store.count_daily_messages() - self.assertIsNone(count) - yield self._assert_stats_reporting(5, self.hs.clock.now) - - # And now let's actually report something - yield self.event_injector.inject_message(room, user, "Indeed.") - yield self.event_injector.inject_message(room, user, "Indeed.") - yield self.event_injector.inject_message(room, user, "Indeed.") - # A little over 24 hours is fine :) - self.hs.clock.now += (60 * 60 * 24) + 50 - count = yield self.store.count_daily_messages() - self.assertEqual(3, count) - yield self._assert_stats_reporting(8, self.hs.clock.now) - - @defer.inlineCallbacks - def _get_last_stream_token(self): - rows = yield self.db_pool.runQuery( - "SELECT stream_ordering" - " FROM events" - " ORDER BY stream_ordering DESC" - " LIMIT 1" - ) - if not rows: - defer.returnValue(0) - else: - defer.returnValue(rows[0][0]) - - @defer.inlineCallbacks - def _assert_stats_reporting(self, messages, time): - rows = yield self.db_pool.runQuery( - "SELECT reported_stream_token, reported_time FROM stats_reporting" - ) - self.assertEqual([(self.base_event + messages, time,)], rows) From 4b461a69311fc96ce13ed75ebe388fd6718286d1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Jun 2017 09:39:39 +0100 Subject: [PATCH 123/139] Add some more stats --- synapse/app/homeserver.py | 7 +++++++ synapse/storage/events.py | 15 +++++++++++++++ synapse/storage/registration.py | 13 +++++++++++++ 3 files changed, 35 insertions(+) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index c9a2f148d..6af8259be 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -415,8 +415,15 @@ def run(hs): stats["timestamp"] = now stats["uptime_seconds"] = uptime stats["total_users"] = yield hs.get_datastore().count_all_users() + stats["total_users"] = yield hs.get_datastore().count_nonbridged_users() + + room_count = yield hs.get_datastore().get_room_count() + stats["total_room_count"] = room_count + stats["daily_active_users"] = yield hs.get_datastore().count_daily_users() + stats["daily_active_rooms"] = yield hs.get_datastore().count_daily_active_rooms() stats["daily_messages"] = yield hs.get_datastore().count_daily_messages() + daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages() stats["daily_sent_messages"] = daily_sent_messages diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 8e7ae73a7..f29d71589 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1631,6 +1631,21 @@ class EventsStore(SQLBaseStore): ret = yield self.runInteraction("count_daily_sent_messages", _count_messages) defer.returnValue(ret) + @defer.inlineCallbacks + def count_daily_active_rooms(self): + def _count(txn): + sql = """ + SELECT COALESCE(COUNT(DISTINCT room_id), 0) FROM events + WHERE type = 'm.room.message' + AND stream_ordering > ? + """ + txn.execute(sql, (self.stream_ordering_day_ago,)) + count, = txn.fetchone() + return count + + ret = yield self.runInteraction("count_daily_active_rooms", _count) + defer.returnValue(ret) + @defer.inlineCallbacks def _background_reindex_fields_sender(self, progress, batch_size): target_min_stream_id = progress["target_min_stream_id_inclusive"] diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index ec2c52ab9..20acd58fc 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -437,6 +437,19 @@ class RegistrationStore(background_updates.BackgroundUpdateStore): ret = yield self.runInteraction("count_users", _count_users) defer.returnValue(ret) + @defer.inlineCallbacks + def count_nonbridged_users(self): + def _count_users(txn): + txn.execute(""" + SELECT COALESCE(COUNT(*), 0) FROM users + WHERE appservice_id IS NULL + """) + count, = txn.fetchone() + return count + + ret = yield self.runInteraction("count_users", _count_users) + defer.returnValue(ret) + @defer.inlineCallbacks def find_next_generated_user_id_localpart(self): """ From ebcd55d641345467d583836ce1f06ffe5d36f98d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Jun 2017 09:45:48 +0100 Subject: [PATCH 124/139] Add DB schema for tracking users who share rooms --- synapse/storage/prepare_database.py | 2 +- .../storage/schema/delta/43/user_share.sql | 32 +++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 synapse/storage/schema/delta/43/user_share.sql diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index eaba699e2..72b670b83 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 42 +SCHEMA_VERSION = 43 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/schema/delta/43/user_share.sql b/synapse/storage/schema/delta/43/user_share.sql new file mode 100644 index 000000000..f552b6eb7 --- /dev/null +++ b/synapse/storage/schema/delta/43/user_share.sql @@ -0,0 +1,32 @@ +/* Copyright 2017 Vector Creations Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Table keeping track of who shares a room with who. We only keep track +-- of this for local users, so `user_id` is local users only (but we do keep track +-- of which remote users share a room) +CREATE TABLE users_who_share_rooms ( + user_id TEXT NOT NULL, + other_user_id TEXT NOT NULL, + room_id TEXT NOT NULL, + share_private BOOLEAN NOT NULL -- is the shared room private? i.e. they share a private room +); + + +CREATE UNIQUE INDEX users_who_share_rooms_u_idx ON users_who_share_rooms(user_id, other_user_id); +CREATE INDEX users_who_share_rooms_r_idx ON users_who_share_rooms(room_id, user_id); + + +-- Make sure that we popualte the table initially +UPDATE user_directory_stream_pos SET stream_id = NULL; From 72613bc3798d34a7bf93defd6624b84669078e2a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Jun 2017 09:59:04 +0100 Subject: [PATCH 125/139] Implement initial population of users who share rooms table --- synapse/handlers/user_directory.py | 78 +++++++++++++++++- synapse/storage/user_directory.py | 124 +++++++++++++++++++++++++++-- 2 files changed, 193 insertions(+), 9 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index f4451e5df..581c078bb 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -14,12 +14,12 @@ # limitations under the License. import logging - from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules, Membership from synapse.storage.roommember import ProfileInfo from synapse.util.metrics import Measure +from synapse.util.async import sleep logger = logging.getLogger(__name__) @@ -41,12 +41,15 @@ class UserDirectoyHandler(object): one public room. """ + INITIAL_SLEEP_MS = 50 + def __init__(self, hs): self.store = hs.get_datastore() self.state = hs.get_state_handler() self.server_name = hs.hostname self.clock = hs.get_clock() self.notifier = hs.get_notifier() + self.is_mine_id = hs.is_mine_id self.notifier.add_replication_callback(self.notify_new_event) @@ -55,6 +58,9 @@ class UserDirectoyHandler(object): self.initially_handled_users = set() self.initially_handled_users_in_public = set() + self.initially_handled_users_share = set() + self.initially_handled_users_share_private_room = set() + # The current position in the current_state_delta stream self.pos = None @@ -140,10 +146,14 @@ class UserDirectoyHandler(object): logger.info("Handling room %d/%d", num_processed_rooms, len(room_ids)) yield self._handle_intial_room(room_id) num_processed_rooms += 1 + yield sleep(self.INITIAL_SLEEP_MS / 1000.) logger.info("Processed all rooms.") self.initially_handled_users = None + self.initially_handled_users_in_public = None + self.initially_handled_users_share = None + self.initially_handled_users_share_private_room = None yield self.store.update_user_directory_stream_pos(new_pos) @@ -158,7 +168,8 @@ class UserDirectoyHandler(object): is_public = yield self.store.is_room_world_readable_or_publicly_joinable(room_id) users_with_profile = yield self.state.get_current_user_in_room(room_id) - unhandled_users = set(users_with_profile) - self.initially_handled_users + user_ids = set(users_with_profile) + unhandled_users = user_ids - self.initially_handled_users yield self.store.add_profiles_to_user_dir( room_id, { @@ -175,6 +186,69 @@ class UserDirectoyHandler(object): ) self.initially_handled_users_in_public != unhandled_users + # We now go and figure out the new users who share rooms with user entries + # We sleep aggressively here as otherwise it can starve resources. + # We also batch up inserts/updates, but try to avoid too many at once. + to_insert = set() + to_update = set() + count = 0 + for user_id in user_ids: + if count % 100 == 0: + yield sleep(self.INITIAL_SLEEP_MS / 1000.) + + if not self.is_mine_id(user_id): + count += 1 + continue + + for other_user_id in user_ids: + if user_id == other_user_id: + continue + + if count % 100 == 0: + yield sleep(self.INITIAL_SLEEP_MS / 1000.) + count += 1 + + user_set = (user_id, other_user_id) + + if user_set in self.initially_handled_users_share_private_room: + continue + + if user_set in self.initially_handled_users_share: + if is_public: + continue + to_update.add(user_set) + else: + to_insert.add(user_set) + + if is_public: + self.initially_handled_users_share.add(user_set) + else: + self.initially_handled_users_share_private_room.add(user_set) + + if len(to_insert) > 100: + yield self.store.add_users_who_share_room( + room_id, not is_public, to_insert, + ) + to_insert.clear() + + if len(to_update) > 100: + yield self.store.update_users_who_share_room( + room_id, not is_public, to_update, + ) + to_update.clear() + + if to_insert: + yield self.store.add_users_who_share_room( + room_id, not is_public, to_insert, + ) + to_insert.clear() + + if to_update: + yield self.store.update_users_who_share_room( + room_id, not is_public, to_update, + ) + to_update.clear() + @defer.inlineCallbacks def _handle_deltas(self, deltas): """Called with the state deltas to process diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 137aca288..0123e28f9 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -16,16 +16,19 @@ from twisted.internet import defer from ._base import SQLBaseStore + from synapse.util.caches.descriptors import cached, cachedInlineCallbacks from synapse.api.constants import EventTypes, JoinRules from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.types import get_domain_from_id, get_localpart_from_id import re +import logging + +logger = logging.getLogger(__name__) class UserDirectoryStore(SQLBaseStore): - @cachedInlineCallbacks(cache_context=True) def is_room_world_readable_or_publicly_joinable(self, room_id, cache_context): """Check if the room is either world_readable or publically joinable @@ -281,14 +284,118 @@ class UserDirectoryStore(SQLBaseStore): desc="get_users_in_dir_due_to_room", ) + @defer.inlineCallbacks def get_all_rooms(self): - """Get all room_ids we've ever known about + """Get all room_ids we've ever known about, in ascending order of "size" """ - return self._simple_select_onecol( - table="current_state_events", - keyvalues={}, - retcol="DISTINCT room_id", - desc="get_all_rooms", + sql = """ + SELECT room_id FROM current_state_events + GROUP BY room_id + ORDER BY count(*) ASC + """ + rows = yield self._execute("get_all_rooms", None, sql) + defer.returnValue([room_id for room_id, in rows]) + + def add_users_who_share_room(self, room_id, share_private, user_id_tuples): + """Insert entries into the users_who_share_rooms table. The first + user should be a local user. + + Args: + room_id (str) + share_private (bool): Is the room private + user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs. + """ + def _add_users_who_share_room_txn(txn): + self._simple_insert_many_txn( + txn, + table="users_who_share_rooms", + values=[ + { + "user_id": user_id, + "other_user_id": other_user_id, + "room_id": room_id, + "share_private": share_private, + } + for user_id, other_user_id in user_id_tuples + ], + ) + for user_id, other_user_id in user_id_tuples: + txn.call_after( + self.get_users_who_share_room_from_dir.invalidate, + (user_id,), + ) + txn.call_after( + self.get_if_users_share_a_room.invalidate, + (user_id, other_user_id), + ) + return self.runInteraction( + "add_users_who_share_room", _add_users_who_share_room_txn + ) + + def update_users_who_share_room(self, room_id, share_private, user_id_sets): + """Updates entries in the users_who_share_rooms table. The first + user should be a local user. + + Args: + room_id (str) + share_private (bool): Is the room private + user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs. + """ + def _update_users_who_share_room_txn(txn): + sql = """ + UPDATE users_who_share_rooms + SET room_id = ?, share_private = ? + WHERE user_id = ? AND other_user_id = ? + """ + txn.executemany( + sql, + ( + (room_id, share_private, uid, oid) + for uid, oid in user_id_sets + ) + ) + for user_id, other_user_id in user_id_sets: + txn.call_after( + self.get_users_who_share_room_from_dir.invalidate, + (user_id,), + ) + txn.call_after( + self.get_if_users_share_a_room.invalidate, + (user_id, other_user_id), + ) + return self.runInteraction( + "update_users_who_share_room", _update_users_who_share_room_txn + ) + + def remove_user_who_share_room(self, user_id, other_user_id): + """Deletes entries in the users_who_share_rooms table. The first + user should be a local user. + + Args: + room_id (str) + share_private (bool): Is the room private + user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs. + """ + def _remove_user_who_share_room_txn(txn): + self._simple_delete_txn( + txn, + table="users_who_share_rooms", + keyvalues={ + "user_id": user_id, + "other_user_id": other_user_id, + }, + ) + txn.call_after( + self.get_users_who_share_room_from_dir.invalidate, + (user_id,), + ) + txn.call_after( + self.get_if_users_share_a_room.invalidate, + (user_id, other_user_id), + ) + + return self.runInteraction( + "remove_user_who_share_room", _remove_user_who_share_room_txn ) def delete_all_from_user_dir(self): @@ -298,8 +405,11 @@ class UserDirectoryStore(SQLBaseStore): txn.execute("DELETE FROM user_directory") txn.execute("DELETE FROM user_directory_search") txn.execute("DELETE FROM users_in_pubic_room") + txn.execute("DELETE FROM users_who_share_rooms") txn.call_after(self.get_user_in_directory.invalidate_all) txn.call_after(self.get_user_in_public_room.invalidate_all) + txn.call_after(self.get_users_who_share_room_from_dir.invalidate_all) + txn.call_after(self.get_if_users_share_a_room.invalidate_all) return self.runInteraction( "delete_all_from_user_dir", _delete_all_from_user_dir_txn ) From 4564b05483c8568b5435cfc527a73a6a7696fa61 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Jun 2017 10:15:00 +0100 Subject: [PATCH 126/139] Implement updating users who share rooms on the fly --- synapse/handlers/user_directory.py | 148 ++++++++++++++++++++++++----- synapse/storage/user_directory.py | 111 +++++++++++++++++++++- 2 files changed, 235 insertions(+), 24 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 581c078bb..aa8af9517 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -390,12 +390,77 @@ class UserDirectoyHandler(object): room_id ) - if not is_public: - return + if is_public: + row = yield self.store.get_user_in_public_room(user_id) + if not row: + yield self.store.add_users_to_public_room(room_id, [user_id]) - row = yield self.store.get_user_in_public_room(user_id) - if not row: - yield self.store.add_users_to_public_room(room_id, [user_id]) + # Now we update users who share rooms with users. We do this by getting + # all the current users in the room and seeing which aren't already + # marked in the database as sharing with `user_id` + + users_with_profile = yield self.state.get_current_user_in_room(room_id) + + to_insert = set() + to_update = set() + + # First, if they're our user then we need to update for every user + if self.is_mine_id(user_id): + # Returns a map of other_user_id -> shared_private. We only need + # to update mappings if for users that either don't share a room + # already (aren't in the map) or, if the room is private, those that + # only share a public room. + user_ids_shared = yield self.store.get_users_who_share_room_from_dir( + user_id + ) + + for other_user_id in users_with_profile: + if user_id == other_user_id: + continue + + shared_is_private = user_ids_shared.get(other_user_id) + if shared_is_private is True: + # We've already marked in the database they share a private room + continue + elif shared_is_private is False: + # They already share a public room, so only update if this is + # a private room + if not is_public: + to_update.add((user_id, other_user_id)) + elif shared_is_private is None: + # This is the first time they both share a room + to_insert.add((user_id, other_user_id)) + + # Next we need to update for every local user in the room + for other_user_id in users_with_profile: + if user_id == other_user_id: + continue + + if self.is_mine_id(other_user_id): + shared_is_private = yield self.store.get_if_users_share_a_room( + other_user_id, user_id, + ) + if shared_is_private is True: + # We've already marked in the database they share a private room + continue + elif shared_is_private is False: + # They already share a public room, so only update if this is + # a private room + if not is_public: + to_update.add((other_user_id, user_id)) + elif shared_is_private is None: + # This is the first time they both share a room + to_insert.add((other_user_id, user_id)) + + if to_insert: + yield self.store.add_users_who_share_room( + room_id, not is_public, to_insert, + ) + + if to_update: + yield self.store.update_users_who_share_room( + room_id, not is_public, to_update, + ) @defer.inlineCallbacks def _handle_remove_user(self, room_id, user_id): @@ -413,32 +478,29 @@ class UserDirectoyHandler(object): row = yield self.store.get_user_in_public_room(user_id) update_user_in_public = row and row["room_id"] == room_id - if not update_user_in_public and not update_user_dir: - return + if (update_user_in_public or update_user_dir): + # XXX: Make this faster? + rooms = yield self.store.get_rooms_for_user(user_id) + for j_room_id in rooms: + if (not update_user_in_public and not update_user_dir): + break - # XXX: Make this faster? - rooms = yield self.store.get_rooms_for_user(user_id) - for j_room_id in rooms: - if not update_user_in_public and not update_user_dir: - break + is_in_room = yield self.store.is_host_joined( + j_room_id, self.server_name, + ) - is_in_room = yield self.store.is_host_joined( - j_room_id, self.server_name, - ) + if not is_in_room: + continue - if not is_in_room: - continue + if update_user_dir: + update_user_dir = False + yield self.store.update_user_in_user_dir(user_id, j_room_id) - if update_user_dir: - update_user_dir = False - yield self.store.update_user_in_user_dir(user_id, j_room_id) - - if update_user_in_public: is_public = yield self.store.is_room_world_readable_or_publicly_joinable( j_room_id ) - if is_public: + if update_user_in_public and is_public: yield self.store.update_user_in_public_user_list(user_id, j_room_id) update_user_in_public = False @@ -447,6 +509,46 @@ class UserDirectoyHandler(object): elif update_user_in_public: yield self.store.remove_from_user_in_public_room(user_id) + # Now handle users_who_share_rooms. + + # Get a list of user tuples that were in the DB due to this room and + # users (this includes tuples where the other user matches `user_id`) + user_tuples = yield self.store.get_users_in_share_dir_with_room_id( + user_id, room_id, + ) + + for user_id, other_user_id in user_tuples: + # For each user tuple get a list of rooms that they still share, + # trying to find a private room, and update the entry in the DB + rooms = yield self.store.get_rooms_in_common_for_users(user_id, other_user_id) + + # If they dont share a room anymore, remove the mapping + if not rooms: + yield self.store.remove_user_who_share_room( + user_id, other_user_id, + ) + continue + + found_public_share = None + for j_room_id in rooms: + is_public = yield self.store.is_room_world_readable_or_publicly_joinable( + j_room_id + ) + + if is_public: + found_public_share = j_room_id + else: + found_public_share = None + yield self.store.update_users_who_share_room( + room_id, not is_public, [(user_id, other_user_id)], + ) + break + + if found_public_share: + yield self.store.update_users_who_share_room( + room_id, not is_public, [(user_id, other_user_id)], + ) + @defer.inlineCallbacks def _handle_profile_change(self, user_id, room_id, prev_event_id, event_id): """Check member event changes for any profile changes and update the diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 0123e28f9..2a17cbc9e 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -273,17 +273,38 @@ class UserDirectoryStore(SQLBaseStore): desc="get_users_in_public_due_to_room", ) + @defer.inlineCallbacks def get_users_in_dir_due_to_room(self, room_id): """Get all user_ids that are in the room directory becuase they're in the given room_id """ - return self._simple_select_onecol( + user_ids_dir = yield self._simple_select_onecol( table="user_directory", keyvalues={"room_id": room_id}, retcol="user_id", desc="get_users_in_dir_due_to_room", ) + user_ids_pub = yield self._simple_select_onecol( + table="users_in_pubic_room", + keyvalues={"room_id": room_id}, + retcol="user_id", + desc="get_users_in_dir_due_to_room", + ) + + user_ids_share = yield self._simple_select_onecol( + table="users_who_share_rooms", + keyvalues={"room_id": room_id}, + retcol="user_id", + desc="get_users_in_dir_due_to_room", + ) + + user_ids = set(user_ids_dir) + user_ids.update(user_ids_pub) + user_ids.update(user_ids_share) + + defer.returnValue(user_ids) + @defer.inlineCallbacks def get_all_rooms(self): """Get all room_ids we've ever known about, in ascending order of "size" @@ -398,6 +419,94 @@ class UserDirectoryStore(SQLBaseStore): "remove_user_who_share_room", _remove_user_who_share_room_txn ) + @cached(max_entries=500000) + def get_if_users_share_a_room(self, user_id, other_user_id): + """Gets if users share a room. + + Args: + user_id (str): Must be a local user_id + other_user_id (str) + + Returns: + bool|None: None if they don't share a room, otherwise whether they + share a private room or not. + """ + return self._simple_select_one_onecol( + table="users_who_share_rooms", + keyvalues={ + "user_id": user_id, + "other_user_id": other_user_id, + }, + retcol="share_private", + allow_none=True, + ) + + @cachedInlineCallbacks(max_entries=500000, iterable=True) + def get_users_who_share_room_from_dir(self, user_id): + """Returns the set of users who share a room with `user_id` + + Args: + user_id(str): Must be a local user + + Returns: + dict: user_id -> share_private mapping + """ + rows = yield self._simple_select_list( + table="users_who_share_rooms", + keyvalues={ + "user_id": user_id, + }, + retcols=("other_user_id", "share_private",), + desc="get_users_who_share_room_with_user", + ) + + defer.returnValue({ + row["other_user_id"]: row["share_private"] + for row in rows + }) + + def get_users_in_share_dir_with_room_id(self, user_id, room_id): + """Get all user tuples that are in the users_who_share_rooms due to the + given room_id. + + Returns: + [(user_id, other_user_id)]: where one of the two will match the given + user_id. + """ + sql = """ + SELECT user_id, other_user_id FROM users_who_share_rooms + WHERE room_id = ? AND (user_id = ? OR other_user_id = ?) + """ + return self._execute( + "get_users_in_share_dir_with_room_id", None, sql, room_id, user_id, user_id + ) + + @defer.inlineCallbacks + def get_rooms_in_common_for_users(self, user_id, other_user_id): + """Given two user_ids find out the list of rooms they share. + """ + sql = """ + SELECT room_id FROM ( + SELECT c.room_id FROM current_state_events AS c + INNER JOIN room_memberships USING (event_id) + WHERE type = 'm.room.member' + AND membership = 'join' + AND state_key = ? + ) AS f1 INNER JOIN ( + SELECT c.room_id FROM current_state_events AS c + INNER JOIN room_memberships USING (event_id) + WHERE type = 'm.room.member' + AND membership = 'join' + AND state_key = ? + ) f2 USING (room_id) + """ + + rows = yield self._execute( + "get_rooms_in_common_for_users", None, sql, user_id, other_user_id + ) + + defer.returnValue([room_id for room_id, in rows]) + def delete_all_from_user_dir(self): """Delete the entire user directory """ From a9d6fa8b2b31096b8f9fdb01b8fb5a2c6386e61f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Jun 2017 10:00:28 +0100 Subject: [PATCH 127/139] Include users who share room with requester in user directory --- synapse/handlers/user_directory.py | 4 +- .../rest/client/v2_alpha/user_directory.py | 8 ++- synapse/storage/user_directory.py | 61 ++++++++++++------- 3 files changed, 47 insertions(+), 26 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index aa8af9517..8928786fd 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -71,7 +71,7 @@ class UserDirectoyHandler(object): # we start populating the user directory self.clock.call_later(0, self.notify_new_event) - def search_users(self, search_term, limit): + def search_users(self, user_id, search_term, limit): """Searches for users in directory Returns: @@ -88,7 +88,7 @@ class UserDirectoyHandler(object): ] } """ - return self.store.search_user_dir(search_term, limit) + return self.store.search_user_dir(user_id, search_term, limit) @defer.inlineCallbacks def notify_new_event(self): diff --git a/synapse/rest/client/v2_alpha/user_directory.py b/synapse/rest/client/v2_alpha/user_directory.py index 17d3dffc8..6e012da4a 100644 --- a/synapse/rest/client/v2_alpha/user_directory.py +++ b/synapse/rest/client/v2_alpha/user_directory.py @@ -55,7 +55,9 @@ class UserDirectorySearchRestServlet(RestServlet): ] } """ - yield self.auth.get_user_by_req(request, allow_guest=False) + requester = yield self.auth.get_user_by_req(request, allow_guest=False) + user_id = requester.user.to_string() + body = parse_json_object_from_request(request) limit = body.get("limit", 10) @@ -66,7 +68,9 @@ class UserDirectorySearchRestServlet(RestServlet): except: raise SynapseError(400, "`search_term` is required field") - results = yield self.user_directory_handler.search_users(search_term, limit) + results = yield self.user_directory_handler.search_users( + user_id, search_term, limit, + ) defer.returnValue((200, results)) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 2a17cbc9e..52b184fe7 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -611,7 +611,7 @@ class UserDirectoryStore(SQLBaseStore): ) @defer.inlineCallbacks - def search_user_dir(self, search_term, limit): + def search_user_dir(self, user_id, search_term, limit): """Searches for users in directory Returns: @@ -637,46 +637,63 @@ class UserDirectoryStore(SQLBaseStore): # The array of numbers are the weights for the various part of the # search: (domain, _, display name, localpart) sql = """ - SELECT user_id, display_name, avatar_url + SELECT d.user_id, display_name, avatar_url FROM user_directory_search - INNER JOIN user_directory USING (user_id) - INNER JOIN users_in_pubic_room USING (user_id) - WHERE vector @@ to_tsquery('english', ?) + INNER JOIN user_directory AS d USING (user_id) + LEFT JOIN users_in_pubic_room AS p USING (user_id) + LEFT JOIN ( + SELECT other_user_id AS user_id FROM users_who_share_rooms + WHERE user_id = ? AND share_private + ) AS s USING (user_id) + WHERE + (s.user_id IS NOT NULL OR p.user_id IS NOT NULL) + AND vector @@ to_tsquery('english', ?) ORDER BY - 2 * ts_rank_cd( - '{0.1, 0.1, 0.9, 1.0}', - vector, - to_tsquery('english', ?), - 8 - ) - + ts_rank_cd( - '{0.1, 0.1, 0.9, 1.0}', - vector, - to_tsquery('english', ?), - 8 + (CASE WHEN s.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END) + * (CASE WHEN display_name IS NOT NULL THEN 1.2 ELSE 1.0 END) + * (CASE WHEN avatar_url IS NOT NULL THEN 1.2 ELSE 1.0 END) + * ( + 3 * ts_rank_cd( + '{0.1, 0.1, 0.9, 1.0}', + vector, + to_tsquery('english', ?), + 8 + ) + + ts_rank_cd( + '{0.1, 0.1, 0.9, 1.0}', + vector, + to_tsquery('english', ?), + 8 + ) ) DESC, display_name IS NULL, avatar_url IS NULL LIMIT ? """ - args = (full_query, exact_query, prefix_query, limit + 1,) + args = (user_id, full_query, exact_query, prefix_query, limit + 1,) elif isinstance(self.database_engine, Sqlite3Engine): search_query = _parse_query_sqlite(search_term) sql = """ - SELECT user_id, display_name, avatar_url + SELECT d.user_id, display_name, avatar_url FROM user_directory_search - INNER JOIN user_directory USING (user_id) - INNER JOIN users_in_pubic_room USING (user_id) - WHERE value MATCH ? + INNER JOIN user_directory AS d USING (user_id) + LEFT JOIN users_in_pubic_room AS p USING (user_id) + LEFT JOIN ( + SELECT other_user_id AS user_id FROM users_who_share_rooms + WHERE user_id = ? AND share_private + ) AS s USING (user_id) + WHERE + (s.user_id IS NOT NULL OR p.user_id IS NOT NULL) + AND value MATCH ? ORDER BY rank(matchinfo(user_directory_search)) DESC, display_name IS NULL, avatar_url IS NULL LIMIT ? """ - args = (search_query, limit + 1) + args = (user_id, search_query, limit + 1) else: # This should be unreachable. raise Exception("Unrecognized database engine") From 5ddd199870e4ecaa60541779f8aa64bce4ca95cd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Jun 2017 10:49:10 +0100 Subject: [PATCH 128/139] Typo --- synapse/app/homeserver.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 6af8259be..081e7cce5 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -415,7 +415,9 @@ def run(hs): stats["timestamp"] = now stats["uptime_seconds"] = uptime stats["total_users"] = yield hs.get_datastore().count_all_users() - stats["total_users"] = yield hs.get_datastore().count_nonbridged_users() + + total_nonbridged_users = yield hs.get_datastore().count_nonbridged_users() + stats["total_nonbridged_users"] = total_nonbridged_users room_count = yield hs.get_datastore().get_room_count() stats["total_room_count"] = room_count From 1ff419d34317c3141f378750d051def05ce9341c Mon Sep 17 00:00:00 2001 From: Krombel Date: Fri, 16 Jun 2017 11:17:10 +0200 Subject: [PATCH 129/139] allow Authorization header which handling got implemented in #1098 Signed-off-by: Matthias Kesler --- synapse/http/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index 14715878c..7ef3d526b 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -412,7 +412,7 @@ def set_cors_headers(request): ) request.setHeader( "Access-Control-Allow-Headers", - "Origin, X-Requested-With, Content-Type, Accept" + "Origin, X-Requested-With, Content-Type, Accept, Authorization" ) From 6aa5bc86351a617546f0adacfebab3388716be3f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Jun 2017 12:47:05 +0100 Subject: [PATCH 130/139] Initial worker impl --- synapse/app/federation_sender.py | 2 +- synapse/app/user_dir.py | 270 +++++++++++++++++++++++++++++ synapse/config/server.py | 4 + synapse/handlers/user_directory.py | 19 +- synapse/replication/tcp/streams.py | 22 +++ synapse/storage/events.py | 18 ++ 6 files changed, 328 insertions(+), 7 deletions(-) create mode 100644 synapse/app/user_dir.py diff --git a/synapse/app/federation_sender.py b/synapse/app/federation_sender.py index e51a69074..03327dc47 100644 --- a/synapse/app/federation_sender.py +++ b/synapse/app/federation_sender.py @@ -51,7 +51,7 @@ import sys import logging import gc -logger = logging.getLogger("synapse.app.appservice") +logger = logging.getLogger("synapse.app.federation_sender") class FederationSenderSlaveStore( diff --git a/synapse/app/user_dir.py b/synapse/app/user_dir.py new file mode 100644 index 000000000..9d8edaa8e --- /dev/null +++ b/synapse/app/user_dir.py @@ -0,0 +1,270 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright 2017 Vector Creations Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import synapse + +from synapse.server import HomeServer +from synapse.config._base import ConfigError +from synapse.config.logger import setup_logging +from synapse.config.homeserver import HomeServerConfig +from synapse.crypto import context_factory +from synapse.http.site import SynapseSite +from synapse.http.server import JsonResource +from synapse.metrics.resource import MetricsResource, METRICS_PREFIX +from synapse.replication.slave.storage._base import BaseSlavedStore +from synapse.replication.slave.storage.events import SlavedEventStore +from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore +from synapse.replication.slave.storage.registration import SlavedRegistrationStore +from synapse.replication.tcp.client import ReplicationClientHandler +from synapse.rest.client.v2_alpha import user_directory +from synapse.storage.engines import create_engine +from synapse.storage.client_ips import ClientIpStore +from synapse.storage.user_directory import UserDirectoryStore +from synapse.util.httpresourcetree import create_resource_tree +from synapse.util.logcontext import LoggingContext, PreserveLoggingContext, preserve_fn +from synapse.util.manhole import manhole +from synapse.util.rlimit import change_resource_limit +from synapse.util.versionstring import get_version_string +from synapse.util.caches.stream_change_cache import StreamChangeCache + +from synapse import events + +from twisted.internet import reactor +from twisted.web.resource import Resource + +from daemonize import Daemonize + +import sys +import logging +import gc + +logger = logging.getLogger("synapse.app.user_dir") + + +class UserDirectorySlaveStore( + SlavedEventStore, + SlavedApplicationServiceStore, + SlavedRegistrationStore, + UserDirectoryStore, + BaseSlavedStore, + ClientIpStore, # After BaseSlavedStore because the constructor is different +): + def __init__(self, db_conn, hs): + super(UserDirectorySlaveStore, self).__init__(db_conn, hs) + + events_max = self._stream_id_gen.get_current_token() + curr_state_delta_prefill, min_curr_state_delta_id = self._get_cache_dict( + db_conn, "current_state_delta_stream", + entity_column="room_id", + stream_column="stream_id", + max_value=events_max, # As we share the stream id with events token + limit=1000, + ) + self._curr_state_delta_stream_cache = StreamChangeCache( + "_curr_state_delta_stream_cache", min_curr_state_delta_id, + prefilled_cache=curr_state_delta_prefill, + ) + + self._current_state_delta_pos = events_max + + def stream_positions(self): + result = super(UserDirectorySlaveStore, self).stream_positions() + result["current_state_deltas"] = self._current_state_delta_pos + return result + + def process_replication_rows(self, stream_name, token, rows): + if stream_name == "current_state_deltas": + self._current_state_delta_pos = token + for row in rows: + self._curr_state_delta_stream_cache.entity_has_changed( + row.room_id, token + ) + return super(UserDirectorySlaveStore, self).process_replication_rows( + stream_name, token, rows + ) + + +class UserDirectoryServer(HomeServer): + def get_db_conn(self, run_new_connection=True): + # Any param beginning with cp_ is a parameter for adbapi, and should + # not be passed to the database engine. + db_params = { + k: v for k, v in self.db_config.get("args", {}).items() + if not k.startswith("cp_") + } + db_conn = self.database_engine.module.connect(**db_params) + + if run_new_connection: + self.database_engine.on_new_connection(db_conn) + return db_conn + + def setup(self): + logger.info("Setting up.") + self.datastore = UserDirectorySlaveStore(self.get_db_conn(), self) + logger.info("Finished setting up.") + + def _listen_http(self, listener_config): + port = listener_config["port"] + bind_addresses = listener_config["bind_addresses"] + site_tag = listener_config.get("tag", port) + resources = {} + for res in listener_config["resources"]: + for name in res["names"]: + if name == "metrics": + resources[METRICS_PREFIX] = MetricsResource(self) + elif name == "client": + resource = JsonResource(self, canonical_json=False) + user_directory.register_servlets(self, resource) + resources.update({ + "/_matrix/client/r0": resource, + "/_matrix/client/unstable": resource, + "/_matrix/client/v2_alpha": resource, + "/_matrix/client/api/v1": resource, + }) + + root_resource = create_resource_tree(resources, Resource()) + + for address in bind_addresses: + reactor.listenTCP( + port, + SynapseSite( + "synapse.access.http.%s" % (site_tag,), + site_tag, + listener_config, + root_resource, + ), + interface=address + ) + + logger.info("Synapse user_dir now listening on port %d", port) + + def start_listening(self, listeners): + for listener in listeners: + if listener["type"] == "http": + self._listen_http(listener) + elif listener["type"] == "manhole": + bind_addresses = listener["bind_addresses"] + + for address in bind_addresses: + reactor.listenTCP( + listener["port"], + manhole( + username="matrix", + password="rabbithole", + globals={"hs": self}, + ), + interface=address + ) + else: + logger.warn("Unrecognized listener type: %s", listener["type"]) + + self.get_tcp_replication().start_replication(self) + + def build_tcp_replication(self): + return UserDirectoryReplicationHandler(self) + + +class UserDirectoryReplicationHandler(ReplicationClientHandler): + def __init__(self, hs): + super(UserDirectoryReplicationHandler, self).__init__(hs.get_datastore()) + self.user_directory = hs.get_user_directory_handler() + + def on_rdata(self, stream_name, token, rows): + super(UserDirectoryReplicationHandler, self).on_rdata( + stream_name, token, rows + ) + if stream_name == "current_state_deltas": + preserve_fn(self.user_directory.notify_new_event)() + + +def start(config_options): + try: + config = HomeServerConfig.load_config( + "Synapse user directory", config_options + ) + except ConfigError as e: + sys.stderr.write("\n" + e.message + "\n") + sys.exit(1) + + assert config.worker_app == "synapse.app.user_dir" + + setup_logging(config, use_worker_options=True) + + events.USE_FROZEN_DICTS = config.use_frozen_dicts + + database_engine = create_engine(config.database_config) + + if config.update_user_directory: + sys.stderr.write( + "\nThe update_user_directory must be disabled in the main synapse process" + "\nbefore they can be run in a separate worker." + "\nPlease add ``update_user_directory: false`` to the main config" + "\n" + ) + sys.exit(1) + + # Force the pushers to start since they will be disabled in the main config + config.update_user_directory = True + + tls_server_context_factory = context_factory.ServerContextFactory(config) + + ps = UserDirectoryServer( + config.server_name, + db_config=config.database_config, + tls_server_context_factory=tls_server_context_factory, + config=config, + version_string="Synapse/" + get_version_string(synapse), + database_engine=database_engine, + ) + + ps.setup() + ps.start_listening(config.worker_listeners) + + def run(): + # make sure that we run the reactor with the sentinel log context, + # otherwise other PreserveLoggingContext instances will get confused + # and complain when they see the logcontext arbitrarily swapping + # between the sentinel and `run` logcontexts. + with PreserveLoggingContext(): + logger.info("Running") + change_resource_limit(config.soft_file_limit) + if config.gc_thresholds: + gc.set_threshold(*config.gc_thresholds) + reactor.run() + + def start(): + ps.get_datastore().start_profiling() + ps.get_state_handler().start_caching() + + reactor.callWhenRunning(start) + + if config.worker_daemonize: + daemon = Daemonize( + app="synapse-user-dir", + pid=config.worker_pid_file, + action=run, + auto_close_fds=False, + verbose=True, + logger=logger, + ) + daemon.start() + else: + run() + + +if __name__ == '__main__': + with LoggingContext("main"): + start(sys.argv[1:]) diff --git a/synapse/config/server.py b/synapse/config/server.py index 3910b9dc3..28b4e5f50 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -35,6 +35,10 @@ class ServerConfig(Config): # "disable" federation self.send_federation = config.get("send_federation", True) + # Whether to update the user directory or not. This should be set to + # false only if we are updating the user directory in a worker + self.update_user_directory = config.get("update_user_directory", True) + self.filter_timeline_limit = config.get("filter_timeline_limit", -1) if self.public_baseurl is not None: diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 8928786fd..d33a20a1f 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -50,8 +50,7 @@ class UserDirectoyHandler(object): self.clock = hs.get_clock() self.notifier = hs.get_notifier() self.is_mine_id = hs.is_mine_id - - self.notifier.add_replication_callback(self.notify_new_event) + self.update_user_directory = hs.config.update_user_directory # When start up for the first time we need to populate the user_directory. # This is a set of user_id's we've inserted already @@ -67,9 +66,12 @@ class UserDirectoyHandler(object): # Guard to ensure we only process deltas one at a time self._is_processing = False - # We kick this off so that we don't have to wait for a change before - # we start populating the user directory - self.clock.call_later(0, self.notify_new_event) + if self.update_user_directory: + self.notifier.add_replication_callback(self.notify_new_event) + + # We kick this off so that we don't have to wait for a change before + # we start populating the user directory + self.clock.call_later(0, self.notify_new_event) def search_users(self, user_id, search_term, limit): """Searches for users in directory @@ -94,6 +96,9 @@ class UserDirectoyHandler(object): def notify_new_event(self): """Called when there may be more deltas to process """ + if not self.update_user_directory: + return + if self._is_processing: return @@ -324,7 +329,7 @@ class UserDirectoyHandler(object): event_id (str|None): The new event after the state change typ (str): Type of the event """ - logger.debug("Handling change for %s", typ) + logger.debug("Handling change for %s: %s", typ, room_id) if typ == EventTypes.RoomHistoryVisibility: change = yield self._get_key_change( @@ -394,6 +399,8 @@ class UserDirectoyHandler(object): row = yield self.store.get_user_in_public_room(user_id) if not row: yield self.store.add_users_to_public_room(room_id, [user_id]) + else: + logger.debug("Not adding user to public dir, %r", user_id) # Now we update users who share rooms with users. We do this by getting # all the current users in the room and seeing which aren't already diff --git a/synapse/replication/tcp/streams.py b/synapse/replication/tcp/streams.py index 369d5f242..fbafe12cc 100644 --- a/synapse/replication/tcp/streams.py +++ b/synapse/replication/tcp/streams.py @@ -112,6 +112,12 @@ AccountDataStreamRow = namedtuple("AccountDataStream", ( "data_type", # str "data", # dict )) +CurrentStateDeltaStreamRow = namedtuple("CurrentStateDeltaStream", ( + "room_id", # str + "type", # str + "state_key", # str + "event_id", # str, optional +)) class Stream(object): @@ -443,6 +449,21 @@ class AccountDataStream(Stream): defer.returnValue(results) +class CurrentStateDeltaStream(Stream): + """Current state for a room was changed + """ + NAME = "current_state_deltas" + ROW_TYPE = CurrentStateDeltaStreamRow + + def __init__(self, hs): + store = hs.get_datastore() + + self.current_token = store.get_max_current_state_delta_stream_id + self.update_function = store.get_all_updated_current_state_deltas + + super(CurrentStateDeltaStream, self).__init__(hs) + + STREAMS_MAP = { stream.NAME: stream for stream in ( @@ -460,5 +481,6 @@ STREAMS_MAP = { FederationStream, TagAccountDataStream, AccountDataStream, + CurrentStateDeltaStream, ) } diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 72ce84b0b..90041b0da 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2284,6 +2284,24 @@ class EventsStore(SQLBaseStore): defer.returnValue((int(res["topological_ordering"]), int(res["stream_ordering"]))) + def get_max_current_state_delta_stream_id(self): + return self._stream_id_gen.get_current_token() + + def get_all_updated_current_state_deltas(self, from_token, to_token, limit): + def get_all_updated_current_state_deltas_txn(txn): + sql = """ + SELECT stream_id, room_id, type, state_key, event_id + FROM current_state_delta_stream + WHERE ? < stream_id AND stream_id <= ? + ORDER BY stream_id ASC LIMIT ? + """ + txn.execute(sql, (from_token, to_token, limit)) + return txn.fetchall() + return self.runInteraction( + "get_all_updated_current_state_deltas", + get_all_updated_current_state_deltas_txn, + ) + AllNewEventsResult = namedtuple("AllNewEventsResult", [ "new_forward_events", "new_backfill_events", From 776a0704212d9f709093af70c79e7c04ace9cdc1 Mon Sep 17 00:00:00 2001 From: Matthew Date: Fri, 16 Jun 2017 20:24:14 +0100 Subject: [PATCH 131/139] fix synapse_port script --- scripts/synapse_port_db | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/synapse_port_db b/scripts/synapse_port_db index 2e5d66670..6782b663e 100755 --- a/scripts/synapse_port_db +++ b/scripts/synapse_port_db @@ -121,7 +121,7 @@ class Store(object): try: txn = conn.cursor() return func( - LoggingTransaction(txn, desc, self.database_engine, []), + LoggingTransaction(txn, desc, self.database_engine, [], []), *args, **kwargs ) except self.database_engine.module.DatabaseError as e: From 3fafb7b1893075f6a8ffabb191192395bb367710 Mon Sep 17 00:00:00 2001 From: Matthew Date: Fri, 16 Jun 2017 20:51:19 +0100 Subject: [PATCH 132/139] add missing boolean to synapse_port_db --- scripts/synapse_port_db | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/synapse_port_db b/scripts/synapse_port_db index 6782b663e..7d158a46a 100755 --- a/scripts/synapse_port_db +++ b/scripts/synapse_port_db @@ -41,6 +41,7 @@ BOOLEAN_COLUMNS = { "presence_stream": ["currently_active"], "public_room_list_stream": ["visibility"], "device_lists_outbound_pokes": ["sent"], + "users_who_share_rooms": ["share_private"], } From d7fe6b356c5b74ffc5681f85a0d6100f4b4f2295 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 19 Jun 2017 12:36:28 +0100 Subject: [PATCH 133/139] Add shutdown room API --- synapse/handlers/federation.py | 4 ++ synapse/handlers/room_member.py | 5 ++ synapse/rest/client/v1/admin.py | 67 ++++++++++++++++++- synapse/storage/directory.py | 14 ++++ synapse/storage/room.py | 24 +++++++ .../storage/schema/delta/43/blocked_rooms.sql | 21 ++++++ 6 files changed, 134 insertions(+), 1 deletion(-) create mode 100644 synapse/storage/schema/delta/43/blocked_rooms.sql diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 39d2bee8d..f7ae369a1 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1068,6 +1068,10 @@ class FederationHandler(BaseHandler): """ event = pdu + is_blocked = yield self.store.is_room_blocked(event.room_id) + if is_blocked: + raise SynapseError(403, "This room has been blocked on this server") + event.internal_metadata.outlier = True event.internal_metadata.invite_from_remote = True diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 1ca88517a..c0d9c0836 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -203,6 +203,11 @@ class RoomMemberHandler(BaseHandler): if not remote_room_hosts: remote_room_hosts = [] + if effective_membership_state not in ("leave", "ban",): + is_blocked = yield self.store.is_room_blocked(room_id) + if is_blocked: + raise SynapseError(403, "This room has been blocked on this server") + latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) current_state_ids = yield self.state_handler.get_current_state_ids( room_id, latest_event_ids=latest_event_ids, diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 29fcd7237..086f7a098 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -15,8 +15,9 @@ from twisted.internet import defer +from synapse.api.constants import Membership from synapse.api.errors import AuthError, SynapseError -from synapse.types import UserID +from synapse.types import UserID, create_requester from synapse.http.servlet import parse_json_object_from_request from .base import ClientV1RestServlet, client_path_patterns @@ -157,6 +158,69 @@ class DeactivateAccountRestServlet(ClientV1RestServlet): defer.returnValue((200, {})) +class ShutdownRoomRestServlet(ClientV1RestServlet): + """Shuts down a room by removing all local users from the room and blocking + all future invites and joins to the room. Any local aliases will be repointed + to a given room id. + """ + PATTERNS = client_path_patterns("/admin/shutdown_room/(?P[^/]+)") + + def __init__(self, hs): + super(ShutdownRoomRestServlet, self).__init__(hs) + self.store = hs.get_datastore() + self.handlers = hs.get_handlers() + self.state = hs.get_state_handler() + + @defer.inlineCallbacks + def on_POST(self, request, room_id): + requester = yield self.auth.get_user_by_req(request) + is_admin = yield self.auth.is_server_admin(requester.user) + if not is_admin: + raise AuthError(403, "You are not a server admin") + + content = parse_json_object_from_request(request) + + repoint_aliases_to_room_id = content.get("repoint_aliases_to_room_id") + if not repoint_aliases_to_room_id: + raise SynapseError(400, "Please provide field `repoint_aliases_to_room_id`") + + requester_user_id = requester.user.to_string() + + logger.info("Shutting down room %r", room_id) + + yield self.store.block_room(room_id, requester_user_id) + + users = yield self.state.get_current_user_in_room(room_id) + kicked_users = [] + for user_id in users: + if not self.hs.is_mine_id(user_id): + continue + + logger.info("Kicking %r from %r...", user_id, room_id) + + target_requester = create_requester(user_id) + yield self.handlers.room_member_handler.update_membership( + requester=target_requester, + target=target_requester.user, + room_id=room_id, + action=Membership.LEAVE, + content={}, + ) + + kicked_users.append(user_id) + + aliases_for_room = yield self.store.get_aliases_for_room(room_id) + + yield self.store.update_aliases_for_room( + room_id, repoint_aliases_to_room_id, requester_user_id + ) + + defer.returnValue((200, { + "kicked_users": kicked_users, + "local_aliases": aliases_for_room, + })) + + class ResetPasswordRestServlet(ClientV1RestServlet): """Post request to allow an administrator reset password for a user. This need a user have a administrator access in Synapse. @@ -353,3 +417,4 @@ def register_servlets(hs, http_server): ResetPasswordRestServlet(hs).register(http_server) GetUsersPaginatedRestServlet(hs).register(http_server) SearchUsersRestServlet(hs).register(http_server) + ShutdownRoomRestServlet(hs).register(http_server) diff --git a/synapse/storage/directory.py b/synapse/storage/directory.py index 9caaf81f2..79e7c540a 100644 --- a/synapse/storage/directory.py +++ b/synapse/storage/directory.py @@ -170,3 +170,17 @@ class DirectoryStore(SQLBaseStore): "room_alias", desc="get_aliases_for_room", ) + + def update_aliases_for_room(self, old_room_id, new_room_id, creator): + def _update_aliases_for_room_txn(txn): + sql = "UPDATE room_aliases SET room_id = ?, creator = ? WHERE room_id = ?" + txn.execute(sql, (new_room_id, creator, old_room_id,)) + self._invalidate_cache_and_stream( + txn, self.get_aliases_for_room, (old_room_id,) + ) + self._invalidate_cache_and_stream( + txn, self.get_aliases_for_room, (new_room_id,) + ) + return self.runInteraction( + "_update_aliases_for_room_txn", _update_aliases_for_room_txn + ) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 5d543652b..07366f66b 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -507,3 +507,27 @@ class RoomStore(SQLBaseStore): )) else: defer.returnValue(None) + + @cached(max_entries=10000) + def is_room_blocked(self, room_id): + return self._simple_select_one_onecol( + table="blocked_rooms", + keyvalues={ + "room_id": room_id, + }, + retcol="1", + allow_none=True, + desc="is_room_blocked", + ) + + @defer.inlineCallbacks + def block_room(self, room_id, user_id): + yield self._simple_insert( + table="blocked_rooms", + values={ + "room_id": room_id, + "user_id": user_id, + }, + desc="block_room", + ) + self.is_room_blocked.invalidate((room_id,)) diff --git a/synapse/storage/schema/delta/43/blocked_rooms.sql b/synapse/storage/schema/delta/43/blocked_rooms.sql new file mode 100644 index 000000000..0e3cd143f --- /dev/null +++ b/synapse/storage/schema/delta/43/blocked_rooms.sql @@ -0,0 +1,21 @@ +/* Copyright 2017 Vector Creations Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE TABLE blocked_rooms ( + room_id TEXT NOT NULL, + user_id TEXT NOT NULL -- Admin who blocked the room +); + +CREATE UNIQUE INDEX blocked_rooms_idx ON blocked_rooms(room_id); From 5db7070dd1585dabc7dc09cd31147c926bd18a87 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 19 Jun 2017 12:40:29 +0100 Subject: [PATCH 134/139] Forget room --- synapse/rest/client/v1/admin.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 086f7a098..f2e1b56c7 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -207,6 +207,8 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): content={}, ) + yield self.handlers.room_member_handler.forget(target_requester.user, room_id) + kicked_users.append(user_id) aliases_for_room = yield self.store.get_aliases_for_room(room_id) From b490299a3b588ad35874e556eab3ad4e707d8bf4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 19 Jun 2017 14:10:13 +0100 Subject: [PATCH 135/139] Change to create new room and join other users --- synapse/handlers/room.py | 21 +++++++++--- synapse/rest/client/v1/admin.py | 59 +++++++++++++++++++++++++++++---- 2 files changed, 70 insertions(+), 10 deletions(-) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index d2a0d6520..5698d2808 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -61,7 +61,7 @@ class RoomCreationHandler(BaseHandler): } @defer.inlineCallbacks - def create_room(self, requester, config): + def create_room(self, requester, config, ratelimit=True): """ Creates a new room. Args: @@ -75,7 +75,8 @@ class RoomCreationHandler(BaseHandler): """ user_id = requester.user.to_string() - yield self.ratelimit(requester) + if ratelimit: + yield self.ratelimit(requester) if "room_alias_name" in config: for wchar in string.whitespace: @@ -167,6 +168,7 @@ class RoomCreationHandler(BaseHandler): initial_state=initial_state, creation_content=creation_content, room_alias=room_alias, + power_level_content_override=config.get("power_level_content_override", {}) ) if "name" in config: @@ -245,7 +247,8 @@ class RoomCreationHandler(BaseHandler): invite_list, initial_state, creation_content, - room_alias + room_alias, + power_level_content_override, ): def create(etype, content, **kwargs): e = { @@ -291,7 +294,15 @@ class RoomCreationHandler(BaseHandler): ratelimit=False, ) - if (EventTypes.PowerLevels, '') not in initial_state: + # We treat the power levels override specially as this needs to be one + # of the first events that get sent into a room. + pl_content = initial_state.pop((EventTypes.PowerLevels, ''), None) + if pl_content is not None: + yield send( + etype=EventTypes.PowerLevels, + content=pl_content, + ) + else: power_level_content = { "users": { creator_id: 100, @@ -316,6 +327,8 @@ class RoomCreationHandler(BaseHandler): for invitee in invite_list: power_level_content["users"][invitee] = 100 + power_level_content.update(power_level_content_override) + yield send( etype=EventTypes.PowerLevels, content=power_level_content, diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index f2e1b56c7..d62418782 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -15,7 +15,7 @@ from twisted.internet import defer -from synapse.api.constants import Membership +from synapse.api.constants import Membership, EventTypes from synapse.api.errors import AuthError, SynapseError from synapse.types import UserID, create_requester from synapse.http.servlet import parse_json_object_from_request @@ -161,10 +161,16 @@ class DeactivateAccountRestServlet(ClientV1RestServlet): class ShutdownRoomRestServlet(ClientV1RestServlet): """Shuts down a room by removing all local users from the room and blocking all future invites and joins to the room. Any local aliases will be repointed - to a given room id. + to a new room created by `new_room_user_id` and kicked users will be auto + joined to the new room. """ PATTERNS = client_path_patterns("/admin/shutdown_room/(?P[^/]+)") + DEFAULT_MESSAGE = ( + "Sharing illegal content on this server is not permitted and rooms in" + " violatation will be blocked." + ) + def __init__(self, hs): super(ShutdownRoomRestServlet, self).__init__(hs) self.store = hs.get_datastore() @@ -180,9 +186,39 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): content = parse_json_object_from_request(request) - repoint_aliases_to_room_id = content.get("repoint_aliases_to_room_id") - if not repoint_aliases_to_room_id: - raise SynapseError(400, "Please provide field `repoint_aliases_to_room_id`") + new_room_user_id = content.get("new_room_user_id") + if not new_room_user_id: + raise SynapseError(400, "Please provide field `new_room_user_id`") + + room_creator_requester = create_requester(new_room_user_id) + + message = content.get("message", self.DEFAULT_MESSAGE) + room_name = content.get("room_name", "Content Violation Notification") + + info = yield self.handlers.room_creation_handler.create_room( + room_creator_requester, + config={ + "preset": "public_chat", + "name": room_name, + "power_level_content_override": { + "users_default": -10, + }, + }, + ratelimit=False, + ) + new_room_id = info["room_id"] + + msg_handler = self.handlers.message_handler + yield msg_handler.create_and_send_nonmember_event( + room_creator_requester, + { + "type": "m.room.message", + "content": {"body": message, "msgtype": "m.text"}, + "room_id": new_room_id, + "sender": new_room_user_id, + }, + ratelimit=False, + ) requester_user_id = requester.user.to_string() @@ -205,21 +241,32 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): room_id=room_id, action=Membership.LEAVE, content={}, + ratelimit=False ) yield self.handlers.room_member_handler.forget(target_requester.user, room_id) + yield self.handlers.room_member_handler.update_membership( + requester=target_requester, + target=target_requester.user, + room_id=new_room_id, + action=Membership.JOIN, + content={}, + ratelimit=False + ) + kicked_users.append(user_id) aliases_for_room = yield self.store.get_aliases_for_room(room_id) yield self.store.update_aliases_for_room( - room_id, repoint_aliases_to_room_id, requester_user_id + room_id, new_room_id, requester_user_id ) defer.returnValue((200, { "kicked_users": kicked_users, "local_aliases": aliases_for_room, + "new_room_id": new_room_id, })) From 36e51aad3c5f2477bdfe0ca77e09e40d7c9b6291 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 19 Jun 2017 14:42:21 +0100 Subject: [PATCH 136/139] Remove unused import --- synapse/rest/client/v1/admin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index d62418782..aaa3dffb1 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -15,7 +15,7 @@ from twisted.internet import defer -from synapse.api.constants import Membership, EventTypes +from synapse.api.constants import Membership from synapse.api.errors import AuthError, SynapseError from synapse.types import UserID, create_requester from synapse.http.servlet import parse_json_object_from_request From e5ae386ea4112ec91b47de339a3c8a4e034898c0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 19 Jun 2017 16:07:54 +0100 Subject: [PATCH 137/139] Handle all cases of sending membership events --- synapse/handlers/room_member.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index c0d9c0836..b3f979b24 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -374,6 +374,11 @@ class RoomMemberHandler(BaseHandler): # so don't really fit into the general auth process. raise AuthError(403, "Guest access not allowed") + if event.membership not in (Membership.LEAVE, Membership.BAN): + is_blocked = yield self.store.is_room_blocked(room_id) + if is_blocked: + raise SynapseError(403, "This room has been blocked on this server") + yield message_handler.handle_new_client_event( requester, event, From b8b936a6eab46cec2460fb723124bb3a750d3c83 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 19 Jun 2017 17:39:21 +0100 Subject: [PATCH 138/139] Add API to quarantine media --- synapse/rest/client/v1/admin.py | 25 +++++++ synapse/rest/media/v1/download_resource.py | 2 +- synapse/rest/media/v1/media_repository.py | 2 + synapse/rest/media/v1/thumbnail_resource.py | 4 +- synapse/storage/media_repository.py | 4 +- synapse/storage/room.py | 70 +++++++++++++++++++ .../schema/delta/43/quarantine_media.sql | 17 +++++ 7 files changed, 119 insertions(+), 5 deletions(-) create mode 100644 synapse/storage/schema/delta/43/quarantine_media.sql diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index aaa3dffb1..7d786e8de 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -270,6 +270,30 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): })) +class QuarantineMediaInRoom(ClientV1RestServlet): + """Quarantines all media in a room so that no one can download it via + this server. + """ + PATTERNS = client_path_patterns("/admin/quarantine_media/(?P[^/]+)") + + def __init__(self, hs): + super(QuarantineMediaInRoom, self).__init__(hs) + self.store = hs.get_datastore() + + @defer.inlineCallbacks + def on_POST(self, request, room_id): + requester = yield self.auth.get_user_by_req(request) + is_admin = yield self.auth.is_server_admin(requester.user) + if not is_admin: + raise AuthError(403, "You are not a server admin") + + num_quarantined = yield self.store.quarantine_media_ids_in_room( + room_id, requester.user.to_string(), + ) + + defer.returnValue((200, {"num_quarantined": num_quarantined})) + + class ResetPasswordRestServlet(ClientV1RestServlet): """Post request to allow an administrator reset password for a user. This need a user have a administrator access in Synapse. @@ -467,3 +491,4 @@ def register_servlets(hs, http_server): GetUsersPaginatedRestServlet(hs).register(http_server) SearchUsersRestServlet(hs).register(http_server) ShutdownRoomRestServlet(hs).register(http_server) + QuarantineMediaInRoom(hs).register(http_server) diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index 6788375e8..39a286b83 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -66,7 +66,7 @@ class DownloadResource(Resource): @defer.inlineCallbacks def _respond_local_file(self, request, media_id, name): media_info = yield self.store.get_local_media(media_id) - if not media_info: + if not media_info or media_info["quarantined_by"]: respond_404(request) return diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index bae2b4c75..0718f7524 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -135,6 +135,8 @@ class MediaRepository(object): media_info = yield self._download_remote_file( server_name, media_id ) + elif media_info["quarantined_by"]: + raise NotFoundError() else: self.recently_accessed_remotes.add((server_name, media_id)) yield self.store.update_cached_last_access_time( diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index d8f54adc9..59b2c39b2 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -81,7 +81,7 @@ class ThumbnailResource(Resource): method, m_type): media_info = yield self.store.get_local_media(media_id) - if not media_info: + if not media_info or media_info["quarantined_by"]: respond_404(request) return @@ -117,7 +117,7 @@ class ThumbnailResource(Resource): desired_type): media_info = yield self.store.get_local_media(media_id) - if not media_info: + if not media_info or media_info["quarantined_by"]: respond_404(request) return diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index 4c0f82353..5f0f18ee6 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -30,7 +30,7 @@ class MediaRepositoryStore(SQLBaseStore): return self._simple_select_one( "local_media_repository", {"media_id": media_id}, - ("media_type", "media_length", "upload_name", "created_ts"), + ("media_type", "media_length", "upload_name", "created_ts", "quarantined_by"), allow_none=True, desc="get_local_media", ) @@ -138,7 +138,7 @@ class MediaRepositoryStore(SQLBaseStore): {"media_origin": origin, "media_id": media_id}, ( "media_type", "media_length", "upload_name", "created_ts", - "filesystem_id", + "filesystem_id", "quarantined_by", ), allow_none=True, desc="get_cached_remote_media", diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 07366f66b..e9c1549c0 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -24,6 +24,7 @@ from .engines import PostgresEngine, Sqlite3Engine import collections import logging import ujson as json +import re logger = logging.getLogger(__name__) @@ -531,3 +532,72 @@ class RoomStore(SQLBaseStore): desc="block_room", ) self.is_room_blocked.invalidate((room_id,)) + + def quarantine_media_ids_in_room(self, room_id, quarantined_by): + """For a room loops through all events with media and quarantines + the associated media + """ + def _get_media_ids_in_room(txn): + mxc_re = re.compile("^mxc://([^/]+)/([^/#?]+)") + + next_token = self.get_current_events_token() + 1 + + total_media_quarantined = 0 + + while next_token: + sql = """ + SELECT stream_ordering, content FROM events + WHERE room_id = ? + AND stream_ordering < ? + AND contains_url = ? AND outlier = ? + ORDER BY stream_ordering DESC + LIMIT ? + """ + txn.execute(sql, (room_id, next_token, True, False, 100)) + + next_token = None + local_media_mxcs = [] + remote_media_mxcs = [] + for stream_ordering, content_json in txn: + next_token = stream_ordering + content = json.loads(content_json) + + url = content.get("url") + if not url: + continue + + matches = mxc_re.match(url) + if matches: + hostname = matches.group(1) + media_id = matches.group(2) + if hostname == self.hostname: + local_media_mxcs.append(media_id) + else: + remote_media_mxcs.append((hostname, media_id)) + + # Now update all the tables to set the quarantined_by flag + + txn.executemany(""" + UPDATE local_media_repository + SET quarantined_by = ? + WHERE media_id = ? + """, ((quarantined_by, media_id) for media_id in local_media_mxcs)) + + txn.executemany( + """ + UPDATE remote_media_cache + SET quarantined_by = ? + WHERE media_origin AND media_id = ? + """, + ( + (quarantined_by, origin, media_id) + for origin, media_id in remote_media_mxcs + ) + ) + + total_media_quarantined += len(local_media_mxcs) + total_media_quarantined += len(remote_media_mxcs) + + return total_media_quarantined + + return self.runInteraction("get_media_ids_in_room", _get_media_ids_in_room) diff --git a/synapse/storage/schema/delta/43/quarantine_media.sql b/synapse/storage/schema/delta/43/quarantine_media.sql new file mode 100644 index 000000000..630907ec4 --- /dev/null +++ b/synapse/storage/schema/delta/43/quarantine_media.sql @@ -0,0 +1,17 @@ +/* Copyright 2017 Vector Creations Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE local_media_repository ADD COLUMN quarantined_by TEXT; +ALTER TABLE remote_media_cache ADD COLUMN quarantined_by TEXT; From 385dcb7c60a0762f29779e9c2ab6a984636092df Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 19 Jun 2017 17:47:55 +0100 Subject: [PATCH 139/139] Handle thumbnail urls --- synapse/storage/room.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index e9c1549c0..23688430b 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -562,18 +562,20 @@ class RoomStore(SQLBaseStore): next_token = stream_ordering content = json.loads(content_json) - url = content.get("url") - if not url: - continue + content_url = content.get("url") + thumbnail_url = content.get("info", {}).get("thumbnail_url") - matches = mxc_re.match(url) - if matches: - hostname = matches.group(1) - media_id = matches.group(2) - if hostname == self.hostname: - local_media_mxcs.append(media_id) - else: - remote_media_mxcs.append((hostname, media_id)) + for url in (content_url, thumbnail_url): + if not url: + continue + matches = mxc_re.match(url) + if matches: + hostname = matches.group(1) + media_id = matches.group(2) + if hostname == self.hostname: + local_media_mxcs.append(media_id) + else: + remote_media_mxcs.append((hostname, media_id)) # Now update all the tables to set the quarantined_by flag