From f3de4f8cb7383a71bbbdd4c3c45dd803ee45e415 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Fri, 30 Dec 2016 20:21:04 +0000 Subject: [PATCH 01/12] Bump version and changelog --- CHANGES.rst | 10 ++++++++++ synapse/__init__.py | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 108f827cf..dfa5d8748 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,13 @@ +Changes in synapse v0.18.6-rc2 (2016-12-30) +=========================================== + +Bug fixes: + +* Fix memory leak in twisted by initialising logging correctly (PR #1731) +* Fix bug where fetching missing events took an unacceptable amount of time in + large rooms (PR #1734) + + Changes in synapse v0.18.6-rc1 (2016-12-29) =========================================== diff --git a/synapse/__init__.py b/synapse/__init__.py index 84592f53e..fbb2bb57b 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.18.6-rc1" +__version__ = "0.18.6-rc2" From b6b67715edc496ea95fd8005ee5b5685adcf2601 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 4 Jan 2017 13:34:35 +0000 Subject: [PATCH 02/12] Send ALL membership events to the server that was affected. Send all membership changes to the server that was affected. This ensures that if the last member of a room on a server was kicked or banned they get told about it. --- synapse/federation/transaction_queue.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 51b656d74..b00dda290 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -19,7 +19,7 @@ from twisted.internet import defer from .persistence import TransactionActions from .units import Transaction, Edu -from synapse.api.constants import EventTypes, Membership +from synapse.api.constants import EventTypes from synapse.api.errors import HttpResponseException from synapse.util.async import run_on_reactor from synapse.util.logcontext import preserve_context_over_fn @@ -161,9 +161,11 @@ class TransactionQueue(object): get_domain_from_id(user_id) for user_id in users_in_room ) + # Send all membership changes to the server that was affected. + # This ensures that if the last member of a room on a server + # was kicked or banned they get told about it. if event.type == EventTypes.Member: - if event.content["membership"] == Membership.JOIN: - destinations.add(get_domain_from_id(event.state_key)) + destinations.add(get_domain_from_id(event.state_key)) logger.debug("Sending %s to %r", event, destinations) From e02bdaf08ba12a5c1ef15d721a7505ffd870c608 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 4 Jan 2017 15:13:40 +0000 Subject: [PATCH 03/12] Get the destinations from the state from before the event Rather than the state after then event. --- synapse/federation/transaction_queue.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index b00dda290..891c245bb 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -19,7 +19,6 @@ from twisted.internet import defer from .persistence import TransactionActions from .units import Transaction, Edu -from synapse.api.constants import EventTypes from synapse.api.errors import HttpResponseException from synapse.util.async import run_on_reactor from synapse.util.logcontext import preserve_context_over_fn @@ -153,20 +152,22 @@ class TransactionQueue(object): break for event in events: + # Get the state from before the event. + # We need to make sure that this is the state from before + # the event and not from after it. + # Otherwise if the last member on a server in a room is + # banned then it won't receive the event because it won't + # be in the room after the ban. users_in_room = yield self.state.get_current_user_in_room( - event.room_id, latest_event_ids=[event.event_id], + event.room_id, latest_event_ids=[ + prev_id for prev_id, _ in event.prev_events + ], ) destinations = set( get_domain_from_id(user_id) for user_id in users_in_room ) - # Send all membership changes to the server that was affected. - # This ensures that if the last member of a room on a server - # was kicked or banned they get told about it. - if event.type == EventTypes.Member: - destinations.add(get_domain_from_id(event.state_key)) - logger.debug("Sending %s to %r", event, destinations) self._send_pdu(event, destinations) From f784980d2b2bd3827bfef94b0360582b2ef228ba Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Thu, 5 Jan 2017 11:26:30 +0000 Subject: [PATCH 04/12] Only send events that originate on this server. Or events that are sent via the federation "send_join" API. This should match the behaviour from before v0.18.5 and #1635 landed. --- synapse/events/__init__.py | 9 +++++++++ synapse/federation/transaction_queue.py | 12 ++++++++++++ synapse/handlers/federation.py | 4 ++++ 3 files changed, 25 insertions(+) diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index bcb8f33a5..8c71aeb5e 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -36,6 +36,15 @@ class _EventInternalMetadata(object): def is_invite_from_remote(self): return getattr(self, "invite_from_remote", False) + def get_send_on_behalf_of(self): + """Whether this server should send the event on behalf of another server. + This is used by the federation "send_join" API to forward the initial join + event for a server in the room. + + returns a str with the name of the server this event is sent on behalf of. + """ + return getattr(self, "get_send_on_behalf_of", None) + def _event_dict_property(key): def getter(self): diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 891c245bb..7db7b806d 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -61,6 +61,7 @@ class TransactionQueue(object): self.transport_layer = hs.get_federation_transport_client() self.clock = hs.get_clock() + self.is_mine_id = hs.is_mine_id # Is a mapping from destinations -> deferreds. Used to keep track # of which destinations have transactions in flight and when they are @@ -152,6 +153,12 @@ class TransactionQueue(object): break for event in events: + # Only send events for this server. + send_on_behalf_of = event.internal_metadata.get_send_on_behalf_of() + is_mine = self.is_mine_id(event.event_id) + if not is_mine and send_on_behalf_of is None: + continue + # Get the state from before the event. # We need to make sure that this is the state from before # the event and not from after it. @@ -167,6 +174,11 @@ class TransactionQueue(object): destinations = set( get_domain_from_id(user_id) for user_id in users_in_room ) + if send_on_behalf_of is not None: + # If we are sending the event on behalf of another server + # then it already has the event and there is no reason to + # send the event to it. + destinations.discard(send_on_behalf_of) logger.debug("Sending %s to %r", event, destinations) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 1d07e4d02..8c93d6d39 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -790,6 +790,10 @@ class FederationHandler(BaseHandler): ) event.internal_metadata.outlier = False + # Send this event on behalf of the origin server since they may not + # have an up to data view of the state of the room at this event so + # will not know which servers to send the event to. + event.internal_metadata.send_on_behalf_of = origin context, event_stream_id, max_stream_id = yield self._handle_new_event( origin, event From eedf400d05ba72c2c21b55a64f67104af54e90bd Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Sat, 31 Dec 2016 15:21:37 +0000 Subject: [PATCH 05/12] limit total timeout for get_missing_events to 10s --- synapse/federation/federation_client.py | 4 +++- synapse/federation/federation_server.py | 5 +++++ synapse/federation/transport/client.py | 5 +++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 6851f2376..b4bcec77e 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -707,7 +707,7 @@ class FederationClient(FederationBase): @defer.inlineCallbacks def get_missing_events(self, destination, room_id, earliest_events_ids, - latest_events, limit, min_depth): + latest_events, limit, min_depth, timeout): """Tries to fetch events we are missing. This is called when we receive an event without having received all of its ancestors. @@ -721,6 +721,7 @@ class FederationClient(FederationBase): have all previous events for. limit (int): Maximum number of events to return. min_depth (int): Minimum depth of events tor return. + timeout (int): Max time to wait in ms """ try: content = yield self.transport_layer.get_missing_events( @@ -730,6 +731,7 @@ class FederationClient(FederationBase): latest_events=[e.event_id for e in latest_events], limit=limit, min_depth=min_depth, + timeout=timeout, ) events = [ diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index f4c60e67e..6d76e6f91 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -425,6 +425,7 @@ class FederationServer(FederationBase): " limit: %d, min_depth: %d", earliest_events, latest_events, limit, min_depth ) + missing_events = yield self.handler.on_get_missing_events( origin, room_id, earliest_events, latest_events, limit, min_depth ) @@ -567,6 +568,9 @@ class FederationServer(FederationBase): len(prevs - seen), pdu.room_id, list(prevs - seen)[:5] ) + # XXX: we set timeout to 10s to help workaround + # https://github.com/matrix-org/synapse/issues/1733 + missing_events = yield self.get_missing_events( origin, pdu.room_id, @@ -574,6 +578,7 @@ class FederationServer(FederationBase): latest_events=[pdu], limit=10, min_depth=min_depth, + timeout=10000, ) # We want to sort these by depth so we process them and diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 491cdc29e..915af3440 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -386,7 +386,7 @@ class TransportLayerClient(object): @defer.inlineCallbacks @log_function def get_missing_events(self, destination, room_id, earliest_events, - latest_events, limit, min_depth): + latest_events, limit, min_depth, timeout): path = PREFIX + "/get_missing_events/%s" % (room_id,) content = yield self.client.post_json( @@ -397,7 +397,8 @@ class TransportLayerClient(object): "min_depth": int(min_depth), "earliest_events": earliest_events, "latest_events": latest_events, - } + }, + timeout=timeout, ) defer.returnValue(content) From 468749c9fca61fabc9dc5da5521ead84b4825783 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 5 Jan 2017 11:44:44 +0000 Subject: [PATCH 06/12] fix comment --- synapse/federation/federation_server.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 6d76e6f91..800f04189 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -569,7 +569,23 @@ class FederationServer(FederationBase): ) # XXX: we set timeout to 10s to help workaround - # https://github.com/matrix-org/synapse/issues/1733 + # https://github.com/matrix-org/synapse/issues/1733. + # The reason is to avoid holding the linearizer lock + # whilst processing inbound /send transactions, causing + # FDs to stack up and block other inbound transactions + # which empirically can currently take up to 30 minutes. + # + # N.B. this explicitly disables retry attempts. + # + # N.B. this also increases our chances of falling back to + # fetching fresh state for the room if the missing event + # can't be found, which slightly reduces our security. + # it may also increase our DAG extremity count for the room, + # causing additional state resolution? See #1760. + # However, fetching state doesn't hold the linearizer lock + # apparently. + # + # see https://github.com/matrix-org/synapse/pull/1744 missing_events = yield self.get_missing_events( origin, From dd3df11c5524441db3c9dd638ea59432d6f3d118 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Thu, 5 Jan 2017 12:32:47 +0000 Subject: [PATCH 07/12] More logging for the linearizer and for get_events --- synapse/storage/events.py | 2 ++ synapse/util/async.py | 13 +++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index ecb79c07e..04dbdac3f 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1084,8 +1084,10 @@ class EventsStore(SQLBaseStore): self._do_fetch ) + logger.info("Loading %d events", len(events)) with PreserveLoggingContext(): rows = yield events_d + logger.info("Loaded %d events (%d rows)", len(events), len(rows)) if not allow_rejected: rows[:] = [r for r in rows if not r["rejects"]] diff --git a/synapse/util/async.py b/synapse/util/async.py index 4280455cb..83875edc8 100644 --- a/synapse/util/async.py +++ b/synapse/util/async.py @@ -166,7 +166,11 @@ class Linearizer(object): # do some work. """ - def __init__(self): + def __init__(self, name=None): + if name is None: + self.name = id(self) + else: + self.name = name self.key_to_defer = {} @defer.inlineCallbacks @@ -185,15 +189,20 @@ class Linearizer(object): self.key_to_defer[key] = new_defer if current_defer: - logger.info("Waiting to acquire linearizer lock for key %r", key) + logger.info( + "Waiting to acquire linearizer lock %r for key %r", self.name, key + ) with PreserveLoggingContext(): yield current_defer + logger.info("Acquired linearizer lock %r for key %r", self.name, key) + @contextmanager def _ctx_manager(): try: yield finally: + logger.info("Releasing linearizer lock %r for key %r", self.name, key) new_defer.callback(None) current_d = self.key_to_defer.get(key) if current_d is new_defer: From 62451800e7a35719bd152c957390a70d0d305707 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Thu, 5 Jan 2017 13:36:10 +0000 Subject: [PATCH 08/12] Bump version and changelog to v0.18.6-rc3 --- CHANGES.rst | 15 +++++++++++++++ synapse/__init__.py | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index dfa5d8748..82c8e38cc 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,18 @@ +Changes in synapse v0.18.6-rc3 (2017-01-05) +=========================================== + +Bug fixes: + +* Fix bug where we failed to send ban events to the banned server (PR #1758) +* Fix bug where we sent event that didn't originate on this server to + other servers (PR #1764) +* Fix bug where processing an event from a remote server took a long time + because we were making long HTTP requests (PR #1765, PR #1744) + +Changes: + +* Improve logging for debugging deadlocks (PR #1766) + Changes in synapse v0.18.6-rc2 (2016-12-30) =========================================== diff --git a/synapse/__init__.py b/synapse/__init__.py index fbb2bb57b..a1da92ef9 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.18.6-rc2" +__version__ = "0.18.6-rc3" From 8cfc0165e913a140782538003fd61c2e01dcf81d Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 5 Jan 2017 13:39:43 +0000 Subject: [PATCH 09/12] fix annoying typos --- synapse/storage/roommember.py | 4 ++-- synapse/storage/state.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 946d5a81c..5d18037c7 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -393,8 +393,8 @@ class RoomMemberStore(SQLBaseStore): @cachedInlineCallbacks(num_args=2, cache_context=True) def _get_joined_users_from_context(self, room_id, state_group, current_state_ids, cache_context, event=None): - # We don't use `state_group`, its there so that we can cache based - # on it. However, its important that its never None, since two current_state's + # We don't use `state_group`, it's there so that we can cache based + # on it. However, it's important that it's never None, since two current_states # with a state_group of None are likely to be different. # See bulk_get_push_rules_for_room for how we work around this. assert state_group is not None diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 23e7ad992..7f466c40a 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -384,7 +384,7 @@ class StateStore(SQLBaseStore): # We did this before by getting the list of group ids, and # then passing that list to sqlite to get latest event for # each (type, state_key). However, that was terribly slow - # without the right indicies (which we can't add until + # without the right indices (which we can't add until # after we finish deduping state, which requires this func) args = [next_group] if types: From d79d1657617e5c050292b6676a9851c5674576da Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 5 Jan 2017 13:40:39 +0000 Subject: [PATCH 10/12] add logging for all the places we call resolve_state_groups. my kingdom for a backtrace that actually works. --- synapse/api/auth.py | 1 + synapse/handlers/federation.py | 1 + synapse/state.py | 4 ++++ 3 files changed, 6 insertions(+) diff --git a/synapse/api/auth.py b/synapse/api/auth.py index a99986714..641f2bb06 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -290,6 +290,7 @@ class Auth(object): with Measure(self.clock, "check_host_in_room"): latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) + logger.info("calling resolve_state_groups from check_host_in_room"); entry = yield self.state.resolve_state_groups( room_id, latest_event_ids ) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 8c93d6d39..9ed708bf3 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -591,6 +591,7 @@ class FederationHandler(BaseHandler): event_ids = list(extremities.keys()) + logger.info("Calling resolve_state_groups in _maybe_backfill") states = yield preserve_context_over_deferred(defer.gatherResults([ preserve_fn(self.state_handler.resolve_state_groups)(room_id, [e]) for e in event_ids diff --git a/synapse/state.py b/synapse/state.py index b4eca0e5d..bbca59b1e 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -123,6 +123,7 @@ class StateHandler(object): if not latest_event_ids: latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) + logger.info("calling resolve_state_groups from get_current_state"); ret = yield self.resolve_state_groups(room_id, latest_event_ids) state = ret.state @@ -147,6 +148,7 @@ class StateHandler(object): if not latest_event_ids: latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) + logger.info("calling resolve_state_groups from get_current_state_ids"); ret = yield self.resolve_state_groups(room_id, latest_event_ids) state = ret.state @@ -158,6 +160,7 @@ class StateHandler(object): @defer.inlineCallbacks def get_current_user_in_room(self, room_id, latest_event_ids=None): + logger.info("calling resolve_state_groups from get_current_user_in_room"); if not latest_event_ids: latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) entry = yield self.resolve_state_groups(room_id, latest_event_ids) @@ -223,6 +226,7 @@ class StateHandler(object): context.prev_state_events = [] defer.returnValue(context) + logger.info("calling resolve_state_groups from compute_event_context"); if event.is_state(): entry = yield self.resolve_state_groups( event.room_id, [e for e, _ in event.prev_events], From 883ff92a7fe2c81639a521470854b198845aff0f Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 5 Jan 2017 13:45:02 +0000 Subject: [PATCH 11/12] Fix case --- synapse/handlers/federation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 9ed708bf3..1021bcc40 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -591,7 +591,7 @@ class FederationHandler(BaseHandler): event_ids = list(extremities.keys()) - logger.info("Calling resolve_state_groups in _maybe_backfill") + logger.info("calling resolve_state_groups in _maybe_backfill") states = yield preserve_context_over_deferred(defer.gatherResults([ preserve_fn(self.state_handler.resolve_state_groups)(room_id, [e]) for e in event_ids From c18f7fc41019be6a5c08df3f4976bd94435677c7 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Thu, 5 Jan 2017 13:50:22 +0000 Subject: [PATCH 12/12] Fix flake8 and update changelog --- CHANGES.rst | 2 +- synapse/api/auth.py | 2 +- synapse/state.py | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 82c8e38cc..c1a8dd761 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -11,7 +11,7 @@ Bug fixes: Changes: -* Improve logging for debugging deadlocks (PR #1766) +* Improve logging for debugging deadlocks (PR #1766, PR #1767) Changes in synapse v0.18.6-rc2 (2016-12-30) =========================================== diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 641f2bb06..f93e45a74 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -290,7 +290,7 @@ class Auth(object): with Measure(self.clock, "check_host_in_room"): latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) - logger.info("calling resolve_state_groups from check_host_in_room"); + logger.info("calling resolve_state_groups from check_host_in_room") entry = yield self.state.resolve_state_groups( room_id, latest_event_ids ) diff --git a/synapse/state.py b/synapse/state.py index bbca59b1e..ba0d2a39a 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -123,7 +123,7 @@ class StateHandler(object): if not latest_event_ids: latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) - logger.info("calling resolve_state_groups from get_current_state"); + logger.info("calling resolve_state_groups from get_current_state") ret = yield self.resolve_state_groups(room_id, latest_event_ids) state = ret.state @@ -148,7 +148,7 @@ class StateHandler(object): if not latest_event_ids: latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) - logger.info("calling resolve_state_groups from get_current_state_ids"); + logger.info("calling resolve_state_groups from get_current_state_ids") ret = yield self.resolve_state_groups(room_id, latest_event_ids) state = ret.state @@ -160,7 +160,7 @@ class StateHandler(object): @defer.inlineCallbacks def get_current_user_in_room(self, room_id, latest_event_ids=None): - logger.info("calling resolve_state_groups from get_current_user_in_room"); + logger.info("calling resolve_state_groups from get_current_user_in_room") if not latest_event_ids: latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) entry = yield self.resolve_state_groups(room_id, latest_event_ids) @@ -226,7 +226,7 @@ class StateHandler(object): context.prev_state_events = [] defer.returnValue(context) - logger.info("calling resolve_state_groups from compute_event_context"); + logger.info("calling resolve_state_groups from compute_event_context") if event.is_state(): entry = yield self.resolve_state_groups( event.room_id, [e for e, _ in event.prev_events],