From b5d1c68beb24c314006398052070448d67bb4983 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 15 Mar 2017 02:21:07 +0000 Subject: [PATCH 1/2] Implement reset_context_after_deferred to correctly reset the context when we fire off a deferred we aren't going to wait for. --- synapse/util/logcontext.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index 6c83eb213..d73670f9f 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -308,6 +308,31 @@ def preserve_context_over_deferred(deferred, context=None): return d +def reset_context_after_deferred(deferred): + """If the deferred is incomplete, add a callback which will reset the + context. + + This is useful when you want to fire off a deferred, but don't want to + wait for it to complete. (The deferred will restore the current log context + when it completes, so if you don't do anything, it will leak log context.) + + (If this feels asymmetric, consider it this way: we are effectively forking + a new thread of execution. We are probably currently within a + ``with LoggingContext()`` block, which is supposed to have a single entry + and exit point. But by spawning off another deferred, we are effectively + adding a new exit point.) + + Args: + deferred (defer.Deferred): deferred + """ + def reset_context(result): + LoggingContext.set_current_context(LoggingContext.sentinel) + return result + + if not deferred.called: + deferred.addBoth(reset_context) + + def preserve_fn(f): """Ensures that function is called with correct context and that context is restored after return. Useful for wrapping functions that return a deferred From 9ce53a3861881e1da54d87d2db875f53eafef8ac Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 14 Mar 2017 11:26:57 +0000 Subject: [PATCH 2/2] Queue up federation PDUs while a room join is in progress This just takes the existing `room_queues` logic and moves it out to `on_receive_pdu` instead of `_process_received_pdu`, which ensures that we don't start trying to fetch prev_events and whathaveyou until the join has completed. --- synapse/handlers/federation.py | 68 ++++++++++++++++++++++------------ 1 file changed, 44 insertions(+), 24 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index d0c2b4d6e..0cd5501b0 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -14,6 +14,7 @@ # limitations under the License. """Contains handlers for federation events.""" +import synapse.util.logcontext from signedjson.key import decode_verify_key_bytes from signedjson.sign import verify_signed_json from unpaddedbase64 import decode_base64 @@ -114,6 +115,14 @@ class FederationHandler(BaseHandler): logger.debug("Already seen pdu %s", pdu.event_id) return + # If we are currently in the process of joining this room, then we + # queue up events for later processing. + if pdu.room_id in self.room_queues: + logger.info("Ignoring PDU %s for room %s from %s for now; join " + "in progress", pdu.event_id, pdu.room_id, origin) + self.room_queues[pdu.room_id].append((pdu, origin)) + return + state = None auth_chain = [] @@ -274,26 +283,13 @@ class FederationHandler(BaseHandler): @log_function @defer.inlineCallbacks - def _process_received_pdu(self, origin, pdu, state=None, auth_chain=None): + def _process_received_pdu(self, origin, pdu, state, auth_chain): """ Called when we have a new pdu. We need to do auth checks and put it through the StateHandler. - - auth_chain and state are None if we already have the necessary state - and prev_events in the db """ event = pdu - logger.debug("Got event: %s", event.event_id) - - # If we are currently in the process of joining this room, then we - # queue up events for later processing. - if event.room_id in self.room_queues: - self.room_queues[event.room_id].append((pdu, origin)) - return - - logger.debug("Processing event: %s", event.event_id) - - logger.debug("Event: %s", event) + logger.debug("Processing event: %s", event) # FIXME (erikj): Awful hack to make the case where we are not currently # in the room work @@ -862,8 +858,6 @@ class FederationHandler(BaseHandler): """ logger.debug("Joining %s to %s", joinee, room_id) - yield self.store.clean_room_for_join(room_id) - origin, event = yield self._make_and_verify_event( target_hosts, room_id, @@ -872,7 +866,15 @@ class FederationHandler(BaseHandler): content, ) + # This shouldn't happen, because the RoomMemberHandler has a + # linearizer lock which only allows one operation per user per room + # at a time - so this is just paranoia. + assert (room_id not in self.room_queues) + self.room_queues[room_id] = [] + + yield self.store.clean_room_for_join(room_id) + handled_events = set() try: @@ -925,17 +927,35 @@ class FederationHandler(BaseHandler): room_queue = self.room_queues[room_id] del self.room_queues[room_id] - for p, origin in room_queue: - if p.event_id in handled_events: - continue + # we don't need to wait for the queued events to be processed - + # it's just a best-effort thing at this point. We do want to do + # them roughly in order, though, otherwise we'll end up making + # lots of requests for missing prev_events which we do actually + # have. Hence we fire off the deferred, but don't wait for it. - try: - self._process_received_pdu(origin, p) - except: - logger.exception("Couldn't handle pdu") + synapse.util.logcontext.reset_context_after_deferred( + self._handle_queued_pdus(room_queue)) defer.returnValue(True) + @defer.inlineCallbacks + def _handle_queued_pdus(self, room_queue): + """Process PDUs which got queued up while we were busy send_joining. + + Args: + room_queue (list[FrozenEvent, str]): list of PDUs to be processed + and the servers that sent them + """ + for p, origin in room_queue: + try: + logger.info("Processing queued PDU %s which was received " + "while we were joining %s", p.event_id, p.room_id) + yield self.on_receive_pdu(origin, p) + except Exception as e: + logger.warn( + "Error handling queued PDU %s from %s: %s", + p.event_id, origin, e) + @defer.inlineCallbacks @log_function def on_make_join_request(self, room_id, user_id):