mirror of
https://git.anonymousland.org/anonymousland/synapse-product.git
synced 2025-05-31 15:44:22 -04:00
Handle race between persisting an event and un-partial stating a room (#13100)
Whenever we want to persist an event, we first compute an event context, which includes the state at the event and a flag indicating whether the state is partial. After a lot of processing, we finally try to store the event in the database, which can fail for partial state events when the containing room has been un-partial stated in the meantime. We detect the race as a foreign key constraint failure in the data store layer and turn it into a special `PartialStateConflictError` exception, which makes its way up to the method in which we computed the event context. To make things difficult, the exception needs to cross a replication request: `/fed_send_events` for events coming over federation and `/send_event` for events from clients. We transport the `PartialStateConflictError` as a `409 Conflict` over replication and turn `409`s back into `PartialStateConflictError`s on the worker making the request. All client events go through `EventCreationHandler.handle_new_client_event`, which is called in *a lot* of places. Instead of trying to update all the code which creates client events, we turn the `PartialStateConflictError` into a `429 Too Many Requests` in `EventCreationHandler.handle_new_client_event` and hope that clients take it as a hint to retry their request. On the federation event side, there are 7 places which compute event contexts. 4 of them use outlier event contexts: `FederationEventHandler._auth_and_persist_outliers_inner`, `FederationHandler.do_knock`, `FederationHandler.on_invite_request` and `FederationHandler.do_remotely_reject_invite`. These events won't have the partial state flag, so we do not need to do anything for then. The remaining 3 paths which create events are `FederationEventHandler.process_remote_join`, `FederationEventHandler.on_send_membership_event` and `FederationEventHandler._process_received_pdu`. We can't experience the race in `process_remote_join`, unless we're handling an additional join into a partial state room, which currently blocks, so we make no attempt to handle it correctly. `on_send_membership_event` is only called by `FederationServer._on_send_membership_event`, so we catch the `PartialStateConflictError` there and retry just once. `_process_received_pdu` is called by `on_receive_pdu` for incoming events and `_process_pulled_event` for backfill. The latter should never try to persist partial state events, so we ignore it. We catch the `PartialStateConflictError` in `on_receive_pdu` and retry just once. Refering to the graph of code paths in https://github.com/matrix-org/synapse/issues/12988#issuecomment-1156857648 may make the above make more sense. Signed-off-by: Sean Quah <seanq@matrix.org>
This commit is contained in:
parent
6ba732fefe
commit
68db233f0c
10 changed files with 234 additions and 74 deletions
|
@ -16,6 +16,7 @@
|
|||
import itertools
|
||||
import logging
|
||||
from collections import OrderedDict
|
||||
from http import HTTPStatus
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
|
@ -35,6 +36,7 @@ from prometheus_client import Counter
|
|||
|
||||
import synapse.metrics
|
||||
from synapse.api.constants import EventContentFields, EventTypes, RelationTypes
|
||||
from synapse.api.errors import Codes, SynapseError
|
||||
from synapse.api.room_versions import RoomVersions
|
||||
from synapse.events import EventBase, relation_from_event
|
||||
from synapse.events.snapshot import EventContext
|
||||
|
@ -69,6 +71,24 @@ event_counter = Counter(
|
|||
)
|
||||
|
||||
|
||||
class PartialStateConflictError(SynapseError):
|
||||
"""An internal error raised when attempting to persist an event with partial state
|
||||
after the room containing the event has been un-partial stated.
|
||||
|
||||
This error should be handled by recomputing the event context and trying again.
|
||||
|
||||
This error has an HTTP status code so that it can be transported over replication.
|
||||
It should not be exposed to clients.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__(
|
||||
HTTPStatus.CONFLICT,
|
||||
msg="Cannot persist partial state event in un-partial stated room",
|
||||
errcode=Codes.UNKNOWN,
|
||||
)
|
||||
|
||||
|
||||
@attr.s(slots=True, auto_attribs=True)
|
||||
class DeltaState:
|
||||
"""Deltas to use to update the `current_state_events` table.
|
||||
|
@ -154,6 +174,10 @@ class PersistEventsStore:
|
|||
|
||||
Returns:
|
||||
Resolves when the events have been persisted
|
||||
|
||||
Raises:
|
||||
PartialStateConflictError: if attempting to persist a partial state event in
|
||||
a room that has been un-partial stated.
|
||||
"""
|
||||
|
||||
# We want to calculate the stream orderings as late as possible, as
|
||||
|
@ -354,6 +378,9 @@ class PersistEventsStore:
|
|||
For each room, a list of the event ids which are the forward
|
||||
extremities.
|
||||
|
||||
Raises:
|
||||
PartialStateConflictError: if attempting to persist a partial state event in
|
||||
a room that has been un-partial stated.
|
||||
"""
|
||||
state_delta_for_room = state_delta_for_room or {}
|
||||
new_forward_extremities = new_forward_extremities or {}
|
||||
|
@ -1304,6 +1331,10 @@ class PersistEventsStore:
|
|||
|
||||
Returns:
|
||||
new list, without events which are already in the events table.
|
||||
|
||||
Raises:
|
||||
PartialStateConflictError: if attempting to persist a partial state event in
|
||||
a room that has been un-partial stated.
|
||||
"""
|
||||
txn.execute(
|
||||
"SELECT event_id, outlier FROM events WHERE event_id in (%s)"
|
||||
|
@ -2215,6 +2246,11 @@ class PersistEventsStore:
|
|||
txn: LoggingTransaction,
|
||||
events_and_contexts: Collection[Tuple[EventBase, EventContext]],
|
||||
) -> None:
|
||||
"""
|
||||
Raises:
|
||||
PartialStateConflictError: if attempting to persist a partial state event in
|
||||
a room that has been un-partial stated.
|
||||
"""
|
||||
state_groups = {}
|
||||
for event, context in events_and_contexts:
|
||||
if event.internal_metadata.is_outlier():
|
||||
|
@ -2239,19 +2275,37 @@ class PersistEventsStore:
|
|||
# if we have partial state for these events, record the fact. (This happens
|
||||
# here rather than in _store_event_txn because it also needs to happen when
|
||||
# we de-outlier an event.)
|
||||
self.db_pool.simple_insert_many_txn(
|
||||
txn,
|
||||
table="partial_state_events",
|
||||
keys=("room_id", "event_id"),
|
||||
values=[
|
||||
(
|
||||
event.room_id,
|
||||
event.event_id,
|
||||
)
|
||||
for event, ctx in events_and_contexts
|
||||
if ctx.partial_state
|
||||
],
|
||||
)
|
||||
try:
|
||||
self.db_pool.simple_insert_many_txn(
|
||||
txn,
|
||||
table="partial_state_events",
|
||||
keys=("room_id", "event_id"),
|
||||
values=[
|
||||
(
|
||||
event.room_id,
|
||||
event.event_id,
|
||||
)
|
||||
for event, ctx in events_and_contexts
|
||||
if ctx.partial_state
|
||||
],
|
||||
)
|
||||
except self.db_pool.engine.module.IntegrityError:
|
||||
logger.info(
|
||||
"Cannot persist events %s in rooms %s: room has been un-partial stated",
|
||||
[
|
||||
event.event_id
|
||||
for event, ctx in events_and_contexts
|
||||
if ctx.partial_state
|
||||
],
|
||||
list(
|
||||
{
|
||||
event.room_id
|
||||
for event, ctx in events_and_contexts
|
||||
if ctx.partial_state
|
||||
}
|
||||
),
|
||||
)
|
||||
raise PartialStateConflictError()
|
||||
|
||||
self.db_pool.simple_upsert_many_txn(
|
||||
txn,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue