synapse-product/synapse/storage/databases/main/events.py

# Copyright 2014-2016 OpenMarket Ltd
# Copyright 2018-2019 New Vector Ltd
# Copyright 2019 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import itertools
import logging
from collections import OrderedDict, namedtuple
from typing import (
    TYPE_CHECKING,
    Any,
    Dict,
    Generator,
    Iterable,
    List,
    Optional,
    Set,
    Tuple,
)

import attr
from prometheus_client import Counter

import synapse.metrics
from synapse.api.constants import EventContentFields, EventTypes, RelationTypes
from synapse.api.room_versions import RoomVersions
from synapse.crypto.event_signing import compute_event_reference_hash
from synapse.events import EventBase  # noqa: F401
from synapse.events.snapshot import EventContext  # noqa: F401
from synapse.logging.utils import log_function
from synapse.storage._base import db_to_json, make_in_list_sql_clause
from synapse.storage.database import DatabasePool, LoggingTransaction
from synapse.storage.databases.main.search import SearchEntry
from synapse.storage.types import Connection
from synapse.storage.util.id_generators import MultiWriterIdGenerator
from synapse.storage.util.sequence import SequenceGenerator
from synapse.types import StateMap, get_domain_from_id
from synapse.util import json_encoder
from synapse.util.iterutils import batch_iter, sorted_topologically

if TYPE_CHECKING:
    from synapse.server import HomeServer
    from synapse.storage.databases.main import DataStore


logger = logging.getLogger(__name__)

persist_event_counter = Counter("synapse_storage_events_persisted_events", "")
event_counter = Counter(
    "synapse_storage_events_persisted_events_sep",
    "",
    ["type", "origin_type", "origin_entity"],
)


_EventCacheEntry = namedtuple("_EventCacheEntry", ("event", "redacted_event"))


@attr.s(slots=True)
class DeltaState:
    """Deltas to use to update the `current_state_events` table.

    Attributes:
        to_delete: List of type/state_keys to delete from current state
        to_insert: Map of state to upsert into current state
        no_longer_in_room: The server is not longer in the room, so the room
            should e.g. be removed from `current_state_events` table.
    """

    to_delete = attr.ib(type=List[Tuple[str, str]])
    to_insert = attr.ib(type=StateMap[str])
    no_longer_in_room = attr.ib(type=bool, default=False)


class PersistEventsStore:
    """Contains all the functions for writing events to the database.

    Should only be instantiated on one process (when using a worker mode setup).

    Note: This is not part of the `DataStore` mixin.
    """

    def __init__(
        self,
        hs: "HomeServer",
        db: DatabasePool,
        main_data_store: "DataStore",
        db_conn: Connection,
    ):
        self.hs = hs
        self.db_pool = db
        self.store = main_data_store
        self.database_engine = db.engine
        self._clock = hs.get_clock()
        self._instance_name = hs.get_instance_name()

        self._ephemeral_messages_enabled = hs.config.enable_ephemeral_messages
        self.is_mine_id = hs.is_mine_id

        # Ideally we'd move these ID gens here, unfortunately some other ID
        # generators are chained off them so doing so is a bit of a PITA.
        self._backfill_id_gen = (
            self.store._backfill_id_gen
        )  # type: MultiWriterIdGenerator
        self._stream_id_gen = self.store._stream_id_gen  # type: MultiWriterIdGenerator

        # This should only exist on instances that are configured to write
        assert (
            hs.get_instance_name() in hs.config.worker.writers.events
        ), "Can only instantiate EventsStore on master"

    async def _persist_events_and_state_updates(
        self,
        events_and_contexts: List[Tuple[EventBase, EventContext]],
        current_state_for_room: Dict[str, StateMap[str]],
        state_delta_for_room: Dict[str, DeltaState],
        new_forward_extremeties: Dict[str, List[str]],
        backfilled: bool = False,
    ) -> None:
        """Persist a set of events alongside updates to the current state and
        forward extremities tables.

        Args:
            events_and_contexts:
            current_state_for_room: Map from room_id to the current state of
                the room based on forward extremities
            state_delta_for_room: Map from room_id to the delta to apply to
                room state
            new_forward_extremities: Map from room_id to list of event IDs
                that are the new forward extremities of the room.
            backfilled

        Returns:
            Resolves when the events have been persisted
        """

        # We want to calculate the stream orderings as late as possible, as
        # we only notify after all events with a lesser stream ordering have
        # been persisted. I.e. if we spend 10s inside the with block then
        # that will delay all subsequent events from being notified about.
        # Hence why we do it down here rather than wrapping the entire
        # function.
        #
        # Its safe to do this after calculating the state deltas etc as we
        # only need to protect the *persistence* of the events. This is to
        # ensure that queries of the form "fetch events since X" don't
        # return events and stream positions after events that are still in
        # flight, as otherwise subsequent requests "fetch event since Y"
        # will not return those events.
        #
        # Note: Multiple instances of this function cannot be in flight at
        # the same time for the same room.
        if backfilled:
            stream_ordering_manager = self._backfill_id_gen.get_next_mult(
                len(events_and_contexts)
            )
        else:
            stream_ordering_manager = self._stream_id_gen.get_next_mult(
                len(events_and_contexts)
            )

        async with stream_ordering_manager as stream_orderings:
            for (event, _), stream in zip(events_and_contexts, stream_orderings):
                event.internal_metadata.stream_ordering = stream

            await self.db_pool.runInteraction(
                "persist_events",
                self._persist_events_txn,
                events_and_contexts=events_and_contexts,
                backfilled=backfilled,
                state_delta_for_room=state_delta_for_room,
                new_forward_extremeties=new_forward_extremeties,
            )
            persist_event_counter.inc(len(events_and_contexts))

            if not backfilled:
                # backfilled events have negative stream orderings, so we don't
                # want to set the event_persisted_position to that.
                synapse.metrics.event_persisted_position.set(
                    events_and_contexts[-1][0].internal_metadata.stream_ordering
                )

            for event, context in events_and_contexts:
                if context.app_service:
                    origin_type = "local"
                    origin_entity = context.app_service.id
                elif self.hs.is_mine_id(event.sender):
                    origin_type = "local"
                    origin_entity = "*client*"
                else:
                    origin_type = "remote"
                    origin_entity = get_domain_from_id(event.sender)

                event_counter.labels(event.type, origin_type, origin_entity).inc()

            for room_id, new_state in current_state_for_room.items():
                self.store.get_current_state_ids.prefill((room_id,), new_state)

            for room_id, latest_event_ids in new_forward_extremeties.items():
                self.store.get_latest_event_ids_in_room.prefill(
                    (room_id,), list(latest_event_ids)
                )

    async def _get_events_which_are_prevs(self, event_ids: Iterable[str]) -> List[str]:
        """Filter the supplied list of event_ids to get those which are prev_events of
        existing (non-outlier/rejected) events.

        Args:
            event_ids: event ids to filter

        Returns:
            Filtered event ids
        """
        results = []  # type: List[str]

        def _get_events_which_are_prevs_txn(txn, batch):
            sql = """
            SELECT prev_event_id, internal_metadata
            FROM event_edges
                INNER JOIN events USING (event_id)
                LEFT JOIN rejections USING (event_id)
                LEFT JOIN event_json USING (event_id)
            WHERE
                NOT events.outlier
                AND rejections.event_id IS NULL
                AND
            """

            clause, args = make_in_list_sql_clause(
                self.database_engine, "prev_event_id", batch
            )

            txn.execute(sql + clause, args)
            results.extend(r[0] for r in txn if not db_to_json(r[1]).get("soft_failed"))

        for chunk in batch_iter(event_ids, 100):
            await self.db_pool.runInteraction(
                "_get_events_which_are_prevs", _get_events_which_are_prevs_txn, chunk
            )

        return results

    async def _get_prevs_before_rejected(self, event_ids: Iterable[str]) -> Set[str]:
        """Get soft-failed ancestors to remove from the extremities.

        Given a set of events, find all those that have been soft-failed or
        rejected. Returns those soft failed/rejected events and their prev
        events (whether soft-failed/rejected or not), and recurses up the
        prev-event graph until it finds no more soft-failed/rejected events.

        This is used to find extremities that are ancestors of new events, but
        are separated by soft failed events.

        Args:
            event_ids: Events to find prev events for. Note that these must have
                already been persisted.

        Returns:
            The previous events.
        """

        # The set of event_ids to return. This includes all soft-failed events
        # and their prev events.
        existing_prevs = set()

        def _get_prevs_before_rejected_txn(txn, batch):
            to_recursively_check = batch

            while to_recursively_check:
                sql = """
                SELECT
                    event_id, prev_event_id, internal_metadata,
                    rejections.event_id IS NOT NULL
                FROM event_edges
                    INNER JOIN events USING (event_id)
                    LEFT JOIN rejections USING (event_id)
                    LEFT JOIN event_json USING (event_id)
                WHERE
                    NOT events.outlier
                    AND
                """

                clause, args = make_in_list_sql_clause(
                    self.database_engine, "event_id", to_recursively_check
                )

                txn.execute(sql + clause, args)
                to_recursively_check = []

                for _, prev_event_id, metadata, rejected in txn:
                    if prev_event_id in existing_prevs:
                        continue

                    soft_failed = db_to_json(metadata).get("soft_failed")
                    if soft_failed or rejected:
                        to_recursively_check.append(prev_event_id)
                        existing_prevs.add(prev_event_id)

        for chunk in batch_iter(event_ids, 100):
            await self.db_pool.runInteraction(
                "_get_prevs_before_rejected", _get_prevs_before_rejected_txn, chunk
            )

        return existing_prevs

    @log_function
    def _persist_events_txn(
        self,
        txn: LoggingTransaction,
        events_and_contexts: List[Tuple[EventBase, EventContext]],
        backfilled: bool,
        state_delta_for_room: Optional[Dict[str, DeltaState]] = None,
        new_forward_extremeties: Optional[Dict[str, List[str]]] = None,
    ):
        """Insert some number of room events into the necessary database tables.

        Rejected events are only inserted into the events table, the events_json table,
        and the rejections table. Things reading from those table will need to check
        whether the event was rejected.

        Args:
            txn
            events_and_contexts: events to persist
            backfilled: True if the events were backfilled
            delete_existing True to purge existing table rows for the events
                from the database. This is useful when retrying due to
                IntegrityError.
            state_delta_for_room: The current-state delta for each room.
            new_forward_extremetie: The new forward extremities for each room.
                For each room, a list of the event ids which are the forward
                extremities.

        """
        state_delta_for_room = state_delta_for_room or {}
        new_forward_extremeties = new_forward_extremeties or {}

        all_events_and_contexts = events_and_contexts

        min_stream_order = events_and_contexts[0][0].internal_metadata.stream_ordering
        max_stream_order = events_and_contexts[-1][0].internal_metadata.stream_ordering

        # stream orderings should have been assigned by now
        assert min_stream_order
        assert max_stream_order

        self._update_forward_extremities_txn(
            txn,
            new_forward_extremities=new_forward_extremeties,
            max_stream_order=max_stream_order,
        )

        # Ensure that we don't have the same event twice.
        events_and_contexts = self._filter_events_and_contexts_for_duplicates(
            events_and_contexts
        )

        self._update_room_depths_txn(
            txn, events_and_contexts=events_and_contexts, backfilled=backfilled
        )

        # _update_outliers_txn filters out any events which have already been
        # persisted, and returns the filtered list.
        events_and_contexts = self._update_outliers_txn(
            txn, events_and_contexts=events_and_contexts
        )

        # From this point onwards the events are only events that we haven't
        # seen before.

        self._store_event_txn(txn, events_and_contexts=events_and_contexts)

        self._persist_transaction_ids_txn(txn, events_and_contexts)

        # Insert into event_to_state_groups.
        self._store_event_state_mappings_txn(txn, events_and_contexts)

        self._persist_event_auth_chain_txn(txn, [e for e, _ in events_and_contexts])

        # _store_rejected_events_txn filters out any events which were
        # rejected, and returns the filtered list.
        events_and_contexts = self._store_rejected_events_txn(
            txn, events_and_contexts=events_and_contexts
        )

        # From this point onwards the events are only ones that weren't
        # rejected.

        self._update_metadata_tables_txn(
            txn,
            events_and_contexts=events_and_contexts,
            all_events_and_contexts=all_events_and_contexts,
            backfilled=backfilled,
        )

        # We call this last as it assumes we've inserted the events into
        # room_memberships, where applicable.
        self._update_current_state_txn(txn, state_delta_for_room, min_stream_order)

    def _persist_event_auth_chain_txn(
        self,
        txn: LoggingTransaction,
        events: List[EventBase],
    ) -> None:

        # We only care about state events, so this if there are no state events.
        if not any(e.is_state() for e in events):
            return

        # We want to store event_auth mappings for rejected events, as they're
        # used in state res v2.
        # This is only necessary if the rejected event appears in an accepted
        # event's auth chain, but its easier for now just to store them (and
        # it doesn't take much storage compared to storing the entire event
        # anyway).
        self.db_pool.simple_insert_many_txn(
            txn,
            table="event_auth",
            values=[
                {
                    "event_id": event.event_id,
                    "room_id": event.room_id,
                    "auth_id": auth_id,
                }
                for event in events
                for auth_id in event.auth_event_ids()
                if event.is_state()
            ],
        )

        # We now calculate chain ID/sequence numbers for any state events we're
        # persisting. We ignore out of band memberships as we're not in the room
        # and won't have their auth chain (we'll fix it up later if we join the
        # room).
        #
        # See: docs/auth_chain_difference_algorithm.md

        # We ignore legacy rooms that we aren't filling the chain cover index
        # for.
        rows = self.db_pool.simple_select_many_txn(
            txn,
            table="rooms",
            column="room_id",
            iterable={event.room_id for event in events if event.is_state()},
            keyvalues={},
            retcols=("room_id", "has_auth_chain_index"),
        )
        rooms_using_chain_index = {
            row["room_id"] for row in rows if row["has_auth_chain_index"]
        }

        state_events = {
            event.event_id: event
            for event in events
            if event.is_state() and event.room_id in rooms_using_chain_index
        }

        if not state_events:
            return

        # We need to know the type/state_key and auth events of the events we're
        # calculating chain IDs for. We don't rely on having the full Event
        # instances as we'll potentially be pulling more events from the DB and
        # we don't need the overhead of fetching/parsing the full event JSON.
        event_to_types = {
            e.event_id: (e.type, e.state_key) for e in state_events.values()
        }
        event_to_auth_chain = {
            e.event_id: e.auth_event_ids() for e in state_events.values()
        }
        event_to_room_id = {e.event_id: e.room_id for e in state_events.values()}

        self._add_chain_cover_index(
            txn,
            self.db_pool,
            self.store.event_chain_id_gen,
            event_to_room_id,
            event_to_types,
            event_to_auth_chain,
        )

    @classmethod
    def _add_chain_cover_index(
        cls,
        txn,
        db_pool: DatabasePool,
        event_chain_id_gen: SequenceGenerator,
        event_to_room_id: Dict[str, str],
        event_to_types: Dict[str, Tuple[str, str]],
        event_to_auth_chain: Dict[str, List[str]],
    ) -> None:
        """Calculate the chain cover index for the given events.

        Args:
            event_to_room_id: Event ID to the room ID of the event
            event_to_types: Event ID to type and state_key of the event
            event_to_auth_chain: Event ID to list of auth event IDs of the
                event (events with no auth events can be excluded).
        """

        # Map from event ID to chain ID/sequence number.
        chain_map = {}  # type: Dict[str, Tuple[int, int]]

        # Set of event IDs to calculate chain ID/seq numbers for.
        events_to_calc_chain_id_for = set(event_to_room_id)

        # We check if there are any events that need to be handled in the rooms
        # we're looking at. These should just be out of band memberships, where
        # we didn't have the auth chain when we first persisted.
        rows = db_pool.simple_select_many_txn(
            txn,
            table="event_auth_chain_to_calculate",
            keyvalues={},
            column="room_id",
            iterable=set(event_to_room_id.values()),
            retcols=("event_id", "type", "state_key"),
        )
        for row in rows:
            event_id = row["event_id"]
            event_type = row["type"]
            state_key = row["state_key"]

            # (We could pull out the auth events for all rows at once using
            # simple_select_many, but this case happens rarely and almost always
            # with a single row.)
            auth_events = db_pool.simple_select_onecol_txn(
                txn,
                "event_auth",
                keyvalues={"event_id": event_id},
                retcol="auth_id",
            )

            events_to_calc_chain_id_for.add(event_id)
            event_to_types[event_id] = (event_type, state_key)
            event_to_auth_chain[event_id] = auth_events

        # First we get the chain ID and sequence numbers for the events'
        # auth events (that aren't also currently being persisted).
        #
        # Note that there there is an edge case here where we might not have
        # calculated chains and sequence numbers for events that were "out
        # of band". We handle this case by fetching the necessary info and
        # adding it to the set of events to calculate chain IDs for.

        missing_auth_chains = {
            a_id
            for auth_events in event_to_auth_chain.values()
            for a_id in auth_events
            if a_id not in events_to_calc_chain_id_for
        }

        # We loop here in case we find an out of band membership and need to
        # fetch their auth event info.
        while missing_auth_chains:
            sql = """
                SELECT event_id, events.type, state_key, chain_id, sequence_number
                FROM events
                INNER JOIN state_events USING (event_id)
                LEFT JOIN event_auth_chains USING (event_id)
                WHERE
            """
            clause, args = make_in_list_sql_clause(
                txn.database_engine,
                "event_id",
                missing_auth_chains,
            )
            txn.execute(sql + clause, args)

            missing_auth_chains.clear()

            for auth_id, event_type, state_key, chain_id, sequence_number in txn:
                event_to_types[auth_id] = (event_type, state_key)

                if chain_id is None:
                    # No chain ID, so the event was persisted out of band.
                    # We add to list of events to calculate auth chains for.

                    events_to_calc_chain_id_for.add(auth_id)

                    event_to_auth_chain[auth_id] = db_pool.simple_select_onecol_txn(
                        txn,
                        "event_auth",
                        keyvalues={"event_id": auth_id},
                        retcol="auth_id",
                    )

                    missing_auth_chains.update(
                        e
                        for e in event_to_auth_chain[auth_id]
                        if e not in event_to_types
                    )
                else:
                    chain_map[auth_id] = (chain_id, sequence_number)

        # Now we check if we have any events where we don't have auth chain,
        # this should only be out of band memberships.
        for event_id in sorted_topologically(event_to_auth_chain, event_to_auth_chain):
            for auth_id in event_to_auth_chain[event_id]:
                if (
                    auth_id not in chain_map
                    and auth_id not in events_to_calc_chain_id_for
                ):
                    events_to_calc_chain_id_for.discard(event_id)

                    # If this is an event we're trying to persist we add it to
                    # the list of events to calculate chain IDs for next time
                    # around. (Otherwise we will have already added it to the
                    # table).
                    room_id = event_to_room_id.get(event_id)
                    if room_id:
                        e_type, state_key = event_to_types[event_id]
                        db_pool.simple_insert_txn(
                            txn,
                            table="event_auth_chain_to_calculate",
                            values={
                                "event_id": event_id,
                                "room_id": room_id,
                                "type": e_type,
                                "state_key": state_key,
                            },
                        )

                    # We stop checking the event's auth events since we've
                    # discarded it.
                    break

        if not events_to_calc_chain_id_for:
            return

        # Allocate chain ID/sequence numbers to each new event.
        new_chain_tuples = cls._allocate_chain_ids(
            txn,
            db_pool,
            event_chain_id_gen,
            event_to_room_id,
            event_to_types,
            event_to_auth_chain,
            events_to_calc_chain_id_for,
            chain_map,
        )
        chain_map.update(new_chain_tuples)

        db_pool.simple_insert_many_txn(
            txn,
            table="event_auth_chains",
            values=[
                {"event_id": event_id, "chain_id": c_id, "sequence_number": seq}
                for event_id, (c_id, seq) in new_chain_tuples.items()
            ],
        )

        db_pool.simple_delete_many_txn(
            txn,
            table="event_auth_chain_to_calculate",
            keyvalues={},
            column="event_id",
            iterable=new_chain_tuples,
        )

        # Now we need to calculate any new links between chains caused by
        # the new events.
        #
        # Links are pairs of chain ID/sequence numbers such that for any
        # event A (CA, SA) and any event B (CB, SB), B is in A's auth chain
        # if and only if there is at least one link (CA, S1) -> (CB, S2)
        # where SA >= S1 and S2 >= SB.
        #
        # We try and avoid adding redundant links to the table, e.g. if we
        # have two links between two chains which both start/end at the
        # sequence number event (or cross) then one can be safely dropped.
        #
        # To calculate new links we look at every new event and:
        #   1. Fetch the chain ID/sequence numbers of its auth events,
        #      discarding any that are reachable by other auth events, or
        #      that have the same chain ID as the event.
        #   2. For each retained auth event we:
        #       a. Add a link from the event's to the auth event's chain
        #          ID/sequence number; and
        #       b. Add a link from the event to every chain reachable by the
        #          auth event.

        # Step 1, fetch all existing links from all the chains we've seen
        # referenced.
        chain_links = _LinkMap()
        rows = db_pool.simple_select_many_txn(
            txn,
            table="event_auth_chain_links",
            column="origin_chain_id",
            iterable={chain_id for chain_id, _ in chain_map.values()},
            keyvalues={},
            retcols=(
                "origin_chain_id",
                "origin_sequence_number",
                "target_chain_id",
                "target_sequence_number",
            ),
        )
        for row in rows:
            chain_links.add_link(
                (row["origin_chain_id"], row["origin_sequence_number"]),
                (row["target_chain_id"], row["target_sequence_number"]),
                new=False,
            )

        # We do this in toplogical order to avoid adding redundant links.
        for event_id in sorted_topologically(
            events_to_calc_chain_id_for, event_to_auth_chain
        ):
            chain_id, sequence_number = chain_map[event_id]

            # Filter out auth events that are reachable by other auth
            # events. We do this by looking at every permutation of pairs of
            # auth events (A, B) to check if B is reachable from A.
            reduction = {
                a_id
                for a_id in event_to_auth_chain.get(event_id, [])
                if chain_map[a_id][0] != chain_id
            }
            for start_auth_id, end_auth_id in itertools.permutations(
                event_to_auth_chain.get(event_id, []),
                r=2,
            ):
                if chain_links.exists_path_from(
                    chain_map[start_auth_id], chain_map[end_auth_id]
                ):
                    reduction.discard(end_auth_id)

            # Step 2, figure out what the new links are from the reduced
            # list of auth events.
            for auth_id in reduction:
                auth_chain_id, auth_sequence_number = chain_map[auth_id]

                # Step 2a, add link between the event and auth event
                chain_links.add_link(
                    (chain_id, sequence_number), (auth_chain_id, auth_sequence_number)
                )

                # Step 2b, add a link to chains reachable from the auth
                # event.
                for target_id, target_seq in chain_links.get_links_from(
                    (auth_chain_id, auth_sequence_number)
                ):
                    if target_id == chain_id:
                        continue

                    chain_links.add_link(
                        (chain_id, sequence_number), (target_id, target_seq)
                    )

        db_pool.simple_insert_many_txn(
            txn,
            table="event_auth_chain_links",
            values=[
                {
                    "origin_chain_id": source_id,
                    "origin_sequence_number": source_seq,
                    "target_chain_id": target_id,
                    "target_sequence_number": target_seq,
                }
                for (
                    source_id,
                    source_seq,
                    target_id,
                    target_seq,
                ) in chain_links.get_additions()
            ],
        )

    @staticmethod
    def _allocate_chain_ids(
        txn,
        db_pool: DatabasePool,
        event_chain_id_gen: SequenceGenerator,
        event_to_room_id: Dict[str, str],
        event_to_types: Dict[str, Tuple[str, str]],
        event_to_auth_chain: Dict[str, List[str]],
        events_to_calc_chain_id_for: Set[str],
        chain_map: Dict[str, Tuple[int, int]],
    ) -> Dict[str, Tuple[int, int]]:
        """Allocates, but does not persist, chain ID/sequence numbers for the
        events in `events_to_calc_chain_id_for`. (c.f. _add_chain_cover_index
        for info on args)
        """

        # We now calculate the chain IDs/sequence numbers for the events. We do
        # this by looking at the chain ID and sequence number of any auth event
        # with the same type/state_key and incrementing the sequence number by
        # one. If there was no match or the chain ID/sequence number is already
        # taken we generate a new chain.
        #
        # We try to reduce the number of times that we hit the database by
        # batching up calls, to make this more efficient when persisting large
        # numbers of state events (e.g. during joins).
        #
        # We do this by:
        #   1. Calculating for each event which auth event will be used to
        #      inherit the chain ID, i.e. converting the auth chain graph to a
        #      tree that we can allocate chains on. We also keep track of which
        #      existing chain IDs have been referenced.
        #   2. Fetching the max allocated sequence number for each referenced
        #      existing chain ID, generating a map from chain ID to the max
        #      allocated sequence number.
        #   3. Iterating over the tree and allocating a chain ID/seq no. to the
        #      new event, by incrementing the sequence number from the
        #      referenced event's chain ID/seq no. and checking that the
        #      incremented sequence number hasn't already been allocated (by
        #      looking in the map generated in the previous step). We generate a
        #      new chain if the sequence number has already been allocated.
        #

        existing_chains = set()  # type: Set[int]
        tree = []  # type: List[Tuple[str, Optional[str]]]

        # We need to do this in a topologically sorted order as we want to
        # generate chain IDs/sequence numbers of an event's auth events before
        # the event itself.
        for event_id in sorted_topologically(
            events_to_calc_chain_id_for, event_to_auth_chain
        ):
            for auth_id in event_to_auth_chain.get(event_id, []):
                if event_to_types.get(event_id) == event_to_types.get(auth_id):
                    existing_chain_id = chain_map.get(auth_id)
                    if existing_chain_id:
                        existing_chains.add(existing_chain_id[0])

                    tree.append((event_id, auth_id))
                    break
            else:
                tree.append((event_id, None))

        # Fetch the current max sequence number for each existing referenced chain.
        sql = """
            SELECT chain_id, MAX(sequence_number) FROM event_auth_chains
            WHERE %s
            GROUP BY chain_id
        """
        clause, args = make_in_list_sql_clause(
            db_pool.engine, "chain_id", existing_chains
        )
        txn.execute(sql % (clause,), args)

        chain_to_max_seq_no = {row[0]: row[1] for row in txn}  # type: Dict[Any, int]

        # Allocate the new events chain ID/sequence numbers.
        #
        # To reduce the number of calls to the database we don't allocate a
        # chain ID number in the loop, instead we use a temporary `object()` for
        # each new chain ID. Once we've done the loop we generate the necessary
        # number of new chain IDs in one call, replacing all temporary
        # objects with real allocated chain IDs.

        unallocated_chain_ids = set()  # type: Set[object]
        new_chain_tuples = {}  # type: Dict[str, Tuple[Any, int]]
        for event_id, auth_event_id in tree:
            # If we reference an auth_event_id we fetch the allocated chain ID,
            # either from the existing `chain_map` or the newly generated
            # `new_chain_tuples` map.
            existing_chain_id = None
            if auth_event_id:
                existing_chain_id = new_chain_tuples.get(auth_event_id)
                if not existing_chain_id:
                    existing_chain_id = chain_map[auth_event_id]

            new_chain_tuple = None  # type: Optional[Tuple[Any, int]]
            if existing_chain_id:
                # We found a chain ID/sequence number candidate, check its
                # not already taken.
                proposed_new_id = existing_chain_id[0]
                proposed_new_seq = existing_chain_id[1] + 1

                if chain_to_max_seq_no[proposed_new_id] < proposed_new_seq:
                    new_chain_tuple = (
                        proposed_new_id,
                        proposed_new_seq,
                    )

            # If we need to start a new chain we allocate a temporary chain ID.
            if not new_chain_tuple:
                new_chain_tuple = (object(), 1)
                unallocated_chain_ids.add(new_chain_tuple[0])

            new_chain_tuples[event_id] = new_chain_tuple
            chain_to_max_seq_no[new_chain_tuple[0]] = new_chain_tuple[1]

        # Generate new chain IDs for all unallocated chain IDs.
        newly_allocated_chain_ids = event_chain_id_gen.get_next_mult_txn(
            txn, len(unallocated_chain_ids)
        )

        # Map from potentially temporary chain ID to real chain ID
        chain_id_to_allocated_map = dict(
            zip(unallocated_chain_ids, newly_allocated_chain_ids)
        )  # type: Dict[Any, int]
        chain_id_to_allocated_map.update((c, c) for c in existing_chains)

        return {
            event_id: (chain_id_to_allocated_map[chain_id], seq)
            for event_id, (chain_id, seq) in new_chain_tuples.items()
        }

    def _persist_transaction_ids_txn(
        self,
        txn: LoggingTransaction,
        events_and_contexts: List[Tuple[EventBase, EventContext]],
    ):
        """Persist the mapping from transaction IDs to event IDs (if defined)."""

        to_insert = []
        for event, _ in events_and_contexts:
            token_id = getattr(event.internal_metadata, "token_id", None)
            txn_id = getattr(event.internal_metadata, "txn_id", None)
            if token_id and txn_id:
                to_insert.append(
                    {
                        "event_id": event.event_id,
                        "room_id": event.room_id,
                        "user_id": event.sender,
                        "token_id": token_id,
                        "txn_id": txn_id,
                        "inserted_ts": self._clock.time_msec(),
                    }
                )

        if to_insert:
            self.db_pool.simple_insert_many_txn(
                txn,
                table="event_txn_id",
                values=to_insert,
            )

    def _update_current_state_txn(
        self,
        txn: LoggingTransaction,
        state_delta_by_room: Dict[str, DeltaState],
        stream_id: int,
    ):
        for room_id, delta_state in state_delta_by_room.items():
            to_delete = delta_state.to_delete
            to_insert = delta_state.to_insert

            if delta_state.no_longer_in_room:
                # Server is no longer in the room so we delete the room from
                # current_state_events, being careful we've already updated the
                # rooms.room_version column (which gets populated in a
                # background task).
                self._upsert_room_version_txn(txn, room_id)

                # Before deleting we populate the current_state_delta_stream
                # so that async background tasks get told what happened.
                sql = """
                    INSERT INTO current_state_delta_stream
                        (stream_id, instance_name, room_id, type, state_key, event_id, prev_event_id)
                    SELECT ?, ?, room_id, type, state_key, null, event_id
                        FROM current_state_events
                        WHERE room_id = ?
                """
                txn.execute(sql, (stream_id, self._instance_name, room_id))

                self.db_pool.simple_delete_txn(
                    txn,
                    table="current_state_events",
                    keyvalues={"room_id": room_id},
                )
            else:
                # We're still in the room, so we update the current state as normal.

                # First we add entries to the current_state_delta_stream. We
                # do this before updating the current_state_events table so
                # that we can use it to calculate the `prev_event_id`. (This
                # allows us to not have to pull out the existing state
                # unnecessarily).
                #
                # The stream_id for the update is chosen to be the minimum of the stream_ids
                # for the batch of the events that we are persisting; that means we do not
                # end up in a situation where workers see events before the
                # current_state_delta updates.
                #
                sql = """
                    INSERT INTO current_state_delta_stream
                    (stream_id, instance_name, room_id, type, state_key, event_id, prev_event_id)
                    SELECT ?, ?, ?, ?, ?, ?, (
                        SELECT event_id FROM current_state_events
                        WHERE room_id = ? AND type = ? AND state_key = ?
                    )
                """
                txn.execute_batch(
                    sql,
                    (
                        (
                            stream_id,
                            self._instance_name,
                            room_id,
                            etype,
                            state_key,
                            to_insert.get((etype, state_key)),
                            room_id,
                            etype,
                            state_key,
                        )
                        for etype, state_key in itertools.chain(to_delete, to_insert)
                    ),
                )
                # Now we actually update the current_state_events table

                txn.execute_batch(
                    "DELETE FROM current_state_events"
                    " WHERE room_id = ? AND type = ? AND state_key = ?",
                    (
                        (room_id, etype, state_key)
                        for etype, state_key in itertools.chain(to_delete, to_insert)
                    ),
                )

                # We include the membership in the current state table, hence we do
                # a lookup when we insert. This assumes that all events have already
                # been inserted into room_memberships.
                txn.execute_batch(
                    """INSERT INTO current_state_events
                        (room_id, type, state_key, event_id, membership)
                    VALUES (?, ?, ?, ?, (SELECT membership FROM room_memberships WHERE event_id = ?))
                    """,
                    [
                        (room_id, key[0], key[1], ev_id, ev_id)
                        for key, ev_id in to_insert.items()
                    ],
                )

            # We now update `local_current_membership`. We do this regardless
            # of whether we're still in the room or not to handle the case where
            # e.g. we just got banned (where we need to record that fact here).

            # Note: Do we really want to delete rows here (that we do not
            # subsequently reinsert below)? While technically correct it means
            # we have no record of the fact the user *was* a member of the
            # room but got, say, state reset out of it.
            if to_delete or to_insert:
                txn.execute_batch(
                    "DELETE FROM local_current_membership"
                    " WHERE room_id = ? AND user_id = ?",
                    (
                        (room_id, state_key)
                        for etype, state_key in itertools.chain(to_delete, to_insert)
                        if etype == EventTypes.Member and self.is_mine_id(state_key)
                    ),
                )

            if to_insert:
                txn.execute_batch(
                    """INSERT INTO local_current_membership
                        (room_id, user_id, event_id, membership)
                    VALUES (?, ?, ?, (SELECT membership FROM room_memberships WHERE event_id = ?))
                    """,
                    [
                        (room_id, key[1], ev_id, ev_id)
                        for key, ev_id in to_insert.items()
                        if key[0] == EventTypes.Member and self.is_mine_id(key[1])
                    ],
                )

            txn.call_after(
                self.store._curr_state_delta_stream_cache.entity_has_changed,
                room_id,
                stream_id,
            )

            # Invalidate the various caches

            # Figure out the changes of membership to invalidate the
            # `get_rooms_for_user` cache.
            # We find out which membership events we may have deleted
            # and which we have added, then we invalidate the caches for all
            # those users.
            members_changed = {
                state_key
                for ev_type, state_key in itertools.chain(to_delete, to_insert)
                if ev_type == EventTypes.Member
            }

            for member in members_changed:
                txn.call_after(
                    self.store.get_rooms_for_user_with_stream_ordering.invalidate,
                    (member,),
                )

            self.store._invalidate_state_caches_and_stream(
                txn, room_id, members_changed
            )

    def _upsert_room_version_txn(self, txn: LoggingTransaction, room_id: str):
        """Update the room version in the database based off current state
        events.

        This is used when we're about to delete current state and we want to
        ensure that the `rooms.room_version` column is up to date.
        """

        sql = """
            SELECT json FROM event_json
            INNER JOIN current_state_events USING (room_id, event_id)
            WHERE room_id = ? AND type = ? AND state_key = ?
        """
        txn.execute(sql, (room_id, EventTypes.Create, ""))
        row = txn.fetchone()
        if row:
            event_json = db_to_json(row[0])
            content = event_json.get("content", {})
            creator = content.get("creator")
            room_version_id = content.get("room_version", RoomVersions.V1.identifier)

            self.db_pool.simple_upsert_txn(
                txn,
                table="rooms",
                keyvalues={"room_id": room_id},
                values={"room_version": room_version_id},
                insertion_values={"is_public": False, "creator": creator},
            )

    def _update_forward_extremities_txn(
        self, txn, new_forward_extremities, max_stream_order
    ):
        for room_id in new_forward_extremities.keys():
            self.db_pool.simple_delete_txn(
                txn, table="event_forward_extremities", keyvalues={"room_id": room_id}
            )
            txn.call_after(
                self.store.get_latest_event_ids_in_room.invalidate, (room_id,)
            )

        self.db_pool.simple_insert_many_txn(
            txn,
            table="event_forward_extremities",
            values=[
                {"event_id": ev_id, "room_id": room_id}
                for room_id, new_extrem in new_forward_extremities.items()
                for ev_id in new_extrem
            ],
        )
        # We now insert into stream_ordering_to_exterm a mapping from room_id,
        # new stream_ordering to new forward extremeties in the room.
        # This allows us to later efficiently look up the forward extremeties
        # for a room before a given stream_ordering
        self.db_pool.simple_insert_many_txn(
            txn,
            table="stream_ordering_to_exterm",
            values=[
                {
                    "room_id": room_id,
                    "event_id": event_id,
                    "stream_ordering": max_stream_order,
                }
                for room_id, new_extrem in new_forward_extremities.items()
                for event_id in new_extrem
            ],
        )

    @classmethod
    def _filter_events_and_contexts_for_duplicates(
        cls, events_and_contexts: List[Tuple[EventBase, EventContext]]
    ) -> List[Tuple[EventBase, EventContext]]:
        """Ensure that we don't have the same event twice.

        Pick the earliest non-outlier if there is one, else the earliest one.

        Args:
            events_and_contexts (list[(EventBase, EventContext)]):
        Returns:
            list[(EventBase, EventContext)]: filtered list
        """
        new_events_and_contexts = (
            OrderedDict()
        )  # type: OrderedDict[str, Tuple[EventBase, EventContext]]
        for event, context in events_and_contexts:
            prev_event_context = new_events_and_contexts.get(event.event_id)
            if prev_event_context:
                if not event.internal_metadata.is_outlier():
                    if prev_event_context[0].internal_metadata.is_outlier():
                        # To ensure correct ordering we pop, as OrderedDict is
                        # ordered by first insertion.
                        new_events_and_contexts.pop(event.event_id, None)
                        new_events_and_contexts[event.event_id] = (event, context)
            else:
                new_events_and_contexts[event.event_id] = (event, context)
        return list(new_events_and_contexts.values())

    def _update_room_depths_txn(
        self,
        txn,
        events_and_contexts: List[Tuple[EventBase, EventContext]],
        backfilled: bool,
    ):
        """Update min_depth for each room

        Args:
            txn (twisted.enterprise.adbapi.Connection): db connection
            events_and_contexts (list[(EventBase, EventContext)]): events
                we are persisting
            backfilled (bool): True if the events were backfilled
        """
        depth_updates = {}  # type: Dict[str, int]
        for event, context in events_and_contexts:
            # Remove the any existing cache entries for the event_ids
            txn.call_after(self.store._invalidate_get_event_cache, event.event_id)
            if not backfilled:
                txn.call_after(
                    self.store._events_stream_cache.entity_has_changed,
                    event.room_id,
                    event.internal_metadata.stream_ordering,
                )

            if not event.internal_metadata.is_outlier() and not context.rejected:
                depth_updates[event.room_id] = max(
                    event.depth, depth_updates.get(event.room_id, event.depth)
                )

        for room_id, depth in depth_updates.items():
            self._update_min_depth_for_room_txn(txn, room_id, depth)

    def _update_outliers_txn(self, txn, events_and_contexts):
        """Update any outliers with new event info.

        This turns outliers into ex-outliers (unless the new event was
        rejected).

        Args:
            txn (twisted.enterprise.adbapi.Connection): db connection
            events_and_contexts (list[(EventBase, EventContext)]): events
                we are persisting

        Returns:
            list[(EventBase, EventContext)] new list, without events which
            are already in the events table.
        """
        txn.execute(
            "SELECT event_id, outlier FROM events WHERE event_id in (%s)"
            % (",".join(["?"] * len(events_and_contexts)),),
            [event.event_id for event, _ in events_and_contexts],
        )

        have_persisted = {event_id: outlier for event_id, outlier in txn}

        to_remove = set()
        for event, context in events_and_contexts:
            if event.event_id not in have_persisted:
                continue

            to_remove.add(event)

            if context.rejected:
                # If the event is rejected then we don't care if the event
                # was an outlier or not.
                continue

            outlier_persisted = have_persisted[event.event_id]
            if not event.internal_metadata.is_outlier() and outlier_persisted:
                # We received a copy of an event that we had already stored as
                # an outlier in the database. We now have some state at that
                # so we need to update the state_groups table with that state.

                # insert into event_to_state_groups.
                try:
                    self._store_event_state_mappings_txn(txn, ((event, context),))
                except Exception:
                    logger.exception("")
                    raise

                # update the stored internal_metadata to update the "outlier" flag.
                # TODO: This is unused as of Synapse 1.31. Remove it once we are happy
                #  to drop backwards-compatibility with 1.30.
                metadata_json = json_encoder.encode(event.internal_metadata.get_dict())
                sql = "UPDATE event_json SET internal_metadata = ? WHERE event_id = ?"
                txn.execute(sql, (metadata_json, event.event_id))

                # Add an entry to the ex_outlier_stream table to replicate the
                # change in outlier status to our workers.
                stream_order = event.internal_metadata.stream_ordering
                state_group_id = context.state_group
                self.db_pool.simple_insert_txn(
                    txn,
                    table="ex_outlier_stream",
                    values={
                        "event_stream_ordering": stream_order,
                        "event_id": event.event_id,
                        "state_group": state_group_id,
                        "instance_name": self._instance_name,
                    },
                )

                sql = "UPDATE events SET outlier = ? WHERE event_id = ?"
                txn.execute(sql, (False, event.event_id))

                # Update the event_backward_extremities table now that this
                # event isn't an outlier any more.
                self._update_backward_extremeties(txn, [event])

        return [ec for ec in events_and_contexts if ec[0] not in to_remove]

    def _store_event_txn(self, txn, events_and_contexts):
        """Insert new events into the event, event_json, redaction and
        state_events tables.

        Args:
            txn (twisted.enterprise.adbapi.Connection): db connection
            events_and_contexts (list[(EventBase, EventContext)]): events
                we are persisting
        """

        if not events_and_contexts:
            # nothing to do here
            return

        def event_dict(event):
            d = event.get_dict()
            d.pop("redacted", None)
            d.pop("redacted_because", None)
            return d

        def get_internal_metadata(event):
            im = event.internal_metadata.get_dict()

            # temporary hack for database compatibility with Synapse 1.30 and earlier:
            # store the `outlier` flag inside the internal_metadata json as well as in
            # the `events` table, so that if anyone rolls back to an older Synapse,
            # things keep working. This can be removed once we are happy to drop support
            # for that
            if event.internal_metadata.is_outlier():
                im["outlier"] = True

            return im

        self.db_pool.simple_insert_many_txn(
            txn,
            table="event_json",
            values=[
                {
                    "event_id": event.event_id,
                    "room_id": event.room_id,
                    "internal_metadata": json_encoder.encode(
                        get_internal_metadata(event)
                    ),
                    "json": json_encoder.encode(event_dict(event)),
                    "format_version": event.format_version,
                }
                for event, _ in events_and_contexts
            ],
        )

        self.db_pool.simple_insert_many_txn(
            txn,
            table="events",
            values=[
                {
                    "instance_name": self._instance_name,
                    "stream_ordering": event.internal_metadata.stream_ordering,
                    "topological_ordering": event.depth,
                    "depth": event.depth,
                    "event_id": event.event_id,
                    "room_id": event.room_id,
                    "type": event.type,
                    "processed": True,
                    "outlier": event.internal_metadata.is_outlier(),
                    "origin_server_ts": int(event.origin_server_ts),
                    "received_ts": self._clock.time_msec(),
                    "sender": event.sender,
                    "contains_url": (
                        "url" in event.content and isinstance(event.content["url"], str)
                    ),
                }
                for event, _ in events_and_contexts
            ],
        )

        # If we're persisting an unredacted event we go and ensure
        # that we mark any redactions that reference this event as
        # requiring censoring.
        sql = "UPDATE redactions SET have_censored = ? WHERE redacts = ?"
        txn.execute_batch(
            sql,
            (
                (
                    False,
                    event.event_id,
                )
                for event, _ in events_and_contexts
                if not event.internal_metadata.is_redacted()
            ),
        )

        state_events_and_contexts = [
            ec for ec in events_and_contexts if ec[0].is_state()
        ]

        state_values = []
        for event, _ in state_events_and_contexts:
            vals = {
                "event_id": event.event_id,
                "room_id": event.room_id,
                "type": event.type,
                "state_key": event.state_key,
            }

            # TODO: How does this work with backfilling?
            if hasattr(event, "replaces_state"):
                vals["prev_state"] = event.replaces_state

            state_values.append(vals)

        self.db_pool.simple_insert_many_txn(
            txn, table="state_events", values=state_values
        )

    def _store_rejected_events_txn(self, txn, events_and_contexts):
        """Add rows to the 'rejections' table for received events which were
        rejected

        Args:
            txn (twisted.enterprise.adbapi.Connection): db connection
            events_and_contexts (list[(EventBase, EventContext)]): events
                we are persisting

        Returns:
            list[(EventBase, EventContext)] new list, without the rejected
                events.
        """
        # Remove the rejected events from the list now that we've added them
        # to the events table and the events_json table.
        to_remove = set()
        for event, context in events_and_contexts:
            if context.rejected:
                # Insert the event_id into the rejections table
                self._store_rejections_txn(txn, event.event_id, context.rejected)
                to_remove.add(event)

        return [ec for ec in events_and_contexts if ec[0] not in to_remove]

    def _update_metadata_tables_txn(
        self, txn, events_and_contexts, all_events_and_contexts, backfilled
    ):
        """Update all the miscellaneous tables for new events

        Args:
            txn (twisted.enterprise.adbapi.Connection): db connection
            events_and_contexts (list[(EventBase, EventContext)]): events
                we are persisting
            all_events_and_contexts (list[(EventBase, EventContext)]): all
                events that we were going to persist. This includes events
                we've already persisted, etc, that wouldn't appear in
                events_and_context.
            backfilled (bool): True if the events were backfilled
        """

        # Insert all the push actions into the event_push_actions table.
        self._set_push_actions_for_event_and_users_txn(
            txn,
            events_and_contexts=events_and_contexts,
            all_events_and_contexts=all_events_and_contexts,
        )

        if not events_and_contexts:
            # nothing to do here
            return

        for event, _ in events_and_contexts:
            if event.type == EventTypes.Redaction and event.redacts is not None:
                # Remove the entries in the event_push_actions table for the
                # redacted event.
                self._remove_push_actions_for_event_id_txn(
                    txn, event.room_id, event.redacts
                )

                # Remove from relations table.
                self._handle_redaction(txn, event.redacts)

        # Update the event_forward_extremities, event_backward_extremities and
        # event_edges tables.
        self._handle_mult_prev_events(
            txn, events=[event for event, _ in events_and_contexts]
        )

        for event, _ in events_and_contexts:
            if event.type == EventTypes.Name:
                # Insert into the event_search table.
                self._store_room_name_txn(txn, event)
            elif event.type == EventTypes.Topic:
                # Insert into the event_search table.
                self._store_room_topic_txn(txn, event)
            elif event.type == EventTypes.Message:
                # Insert into the event_search table.
                self._store_room_message_txn(txn, event)
            elif event.type == EventTypes.Redaction and event.redacts is not None:
                # Insert into the redactions table.
                self._store_redaction(txn, event)
            elif event.type == EventTypes.Retention:
                # Update the room_retention table.
                self._store_retention_policy_for_room_txn(txn, event)

            self._handle_event_relations(txn, event)

            # Store the labels for this event.
            labels = event.content.get(EventContentFields.LABELS)
            if labels:
                self.insert_labels_for_event_txn(
                    txn, event.event_id, labels, event.room_id, event.depth
                )

            if self._ephemeral_messages_enabled:
                # If there's an expiry timestamp on the event, store it.
                expiry_ts = event.content.get(EventContentFields.SELF_DESTRUCT_AFTER)
                if isinstance(expiry_ts, int) and not event.is_state():
                    self._insert_event_expiry_txn(txn, event.event_id, expiry_ts)

        # Insert into the room_memberships table.
        self._store_room_members_txn(
            txn,
            [
                event
                for event, _ in events_and_contexts
                if event.type == EventTypes.Member
            ],
            backfilled=backfilled,
        )

        # Insert event_reference_hashes table.
        self._store_event_reference_hashes_txn(
            txn, [event for event, _ in events_and_contexts]
        )

        # Prefill the event cache
        self._add_to_cache(txn, events_and_contexts)

    def _add_to_cache(self, txn, events_and_contexts):
        to_prefill = []

        rows = []
        N = 200
        for i in range(0, len(events_and_contexts), N):
            ev_map = {e[0].event_id: e[0] for e in events_and_contexts[i : i + N]}
            if not ev_map:
                break

            sql = (
                "SELECT "
                " e.event_id as event_id, "
                " r.redacts as redacts,"
                " rej.event_id as rejects "
                " FROM events as e"
                " LEFT JOIN rejections as rej USING (event_id)"
                " LEFT JOIN redactions as r ON e.event_id = r.redacts"
                " WHERE "
            )

            clause, args = make_in_list_sql_clause(
                self.database_engine, "e.event_id", list(ev_map)
            )

            txn.execute(sql + clause, args)
            rows = self.db_pool.cursor_to_dict(txn)
            for row in rows:
                event = ev_map[row["event_id"]]
                if not row["rejects"] and not row["redacts"]:
                    to_prefill.append(
                        _EventCacheEntry(event=event, redacted_event=None)
                    )

        def prefill():
            for cache_entry in to_prefill:
                self.store._get_event_cache.set((cache_entry[0].event_id,), cache_entry)

        txn.call_after(prefill)

    def _store_redaction(self, txn, event):
        # invalidate the cache for the redacted event
        txn.call_after(self.store._invalidate_get_event_cache, event.redacts)

        self.db_pool.simple_insert_txn(
            txn,
            table="redactions",
            values={
                "event_id": event.event_id,
                "redacts": event.redacts,
                "received_ts": self._clock.time_msec(),
            },
        )

    def insert_labels_for_event_txn(
        self, txn, event_id, labels, room_id, topological_ordering
    ):
        """Store the mapping between an event's ID and its labels, with one row per
        (event_id, label) tuple.

        Args:
            txn (LoggingTransaction): The transaction to execute.
            event_id (str): The event's ID.
            labels (list[str]): A list of text labels.
            room_id (str): The ID of the room the event was sent to.
            topological_ordering (int): The position of the event in the room's topology.
        """
        return self.db_pool.simple_insert_many_txn(
            txn=txn,
            table="event_labels",
            values=[
                {
                    "event_id": event_id,
                    "label": label,
                    "room_id": room_id,
                    "topological_ordering": topological_ordering,
                }
                for label in labels
            ],
        )

    def _insert_event_expiry_txn(self, txn, event_id, expiry_ts):
        """Save the expiry timestamp associated with a given event ID.

        Args:
            txn (LoggingTransaction): The database transaction to use.
            event_id (str): The event ID the expiry timestamp is associated with.
            expiry_ts (int): The timestamp at which to expire (delete) the event.
        """
        return self.db_pool.simple_insert_txn(
            txn=txn,
            table="event_expiry",
            values={"event_id": event_id, "expiry_ts": expiry_ts},
        )

    def _store_event_reference_hashes_txn(self, txn, events):
        """Store a hash for a PDU
        Args:
            txn (cursor):
            events (list): list of Events.
        """

        vals = []
        for event in events:
            ref_alg, ref_hash_bytes = compute_event_reference_hash(event)
            vals.append(
                {
                    "event_id": event.event_id,
                    "algorithm": ref_alg,
                    "hash": memoryview(ref_hash_bytes),
                }
            )

        self.db_pool.simple_insert_many_txn(
            txn, table="event_reference_hashes", values=vals
        )

    def _store_room_members_txn(self, txn, events, backfilled):
        """Store a room member in the database."""

        def str_or_none(val: Any) -> Optional[str]:
            return val if isinstance(val, str) else None

        self.db_pool.simple_insert_many_txn(
            txn,
            table="room_memberships",
            values=[
                {
                    "event_id": event.event_id,
                    "user_id": event.state_key,
                    "sender": event.user_id,
                    "room_id": event.room_id,
                    "membership": event.membership,
                    "display_name": str_or_none(event.content.get("displayname")),
                    "avatar_url": str_or_none(event.content.get("avatar_url")),
                }
                for event in events
            ],
        )

        for event in events:
            txn.call_after(
                self.store._membership_stream_cache.entity_has_changed,
                event.state_key,
                event.internal_metadata.stream_ordering,
            )
            txn.call_after(
                self.store.get_invited_rooms_for_local_user.invalidate,
                (event.state_key,),
            )

            # We update the local_current_membership table only if the event is
            # "current", i.e., its something that has just happened.
            #
            # This will usually get updated by the `current_state_events` handling,
            # unless its an outlier, and an outlier is only "current" if it's an "out of
            # band membership", like a remote invite or a rejection of a remote invite.
            if (
                self.is_mine_id(event.state_key)
                and not backfilled
                and event.internal_metadata.is_outlier()
                and event.internal_metadata.is_out_of_band_membership()
            ):
                self.db_pool.simple_upsert_txn(
                    txn,
                    table="local_current_membership",
                    keyvalues={"room_id": event.room_id, "user_id": event.state_key},
                    values={
                        "event_id": event.event_id,
                        "membership": event.membership,
                    },
                )

    def _handle_event_relations(self, txn, event):
        """Handles inserting relation data during peristence of events

        Args:
            txn
            event (EventBase)
        """
        relation = event.content.get("m.relates_to")
        if not relation:
            # No relations
            return

        rel_type = relation.get("rel_type")
        if rel_type not in (
            RelationTypes.ANNOTATION,
            RelationTypes.REFERENCE,
            RelationTypes.REPLACE,
        ):
            # Unknown relation type
            return

        parent_id = relation.get("event_id")
        if not parent_id:
            # Invalid relation
            return

        aggregation_key = relation.get("key")

        self.db_pool.simple_insert_txn(
            txn,
            table="event_relations",
            values={
                "event_id": event.event_id,
                "relates_to_id": parent_id,
                "relation_type": rel_type,
                "aggregation_key": aggregation_key,
            },
        )

        txn.call_after(self.store.get_relations_for_event.invalidate, (parent_id,))
        txn.call_after(
            self.store.get_aggregation_groups_for_event.invalidate, (parent_id,)
        )

        if rel_type == RelationTypes.REPLACE:
            txn.call_after(self.store.get_applicable_edit.invalidate, (parent_id,))

    def _handle_redaction(self, txn, redacted_event_id):
        """Handles receiving a redaction and checking whether we need to remove
        any redacted relations from the database.

        Args:
            txn
            redacted_event_id (str): The event that was redacted.
        """

        self.db_pool.simple_delete_txn(
            txn, table="event_relations", keyvalues={"event_id": redacted_event_id}
        )

    def _store_room_topic_txn(self, txn, event):
        if hasattr(event, "content") and "topic" in event.content:
            self.store_event_search_txn(
                txn, event, "content.topic", event.content["topic"]
            )

    def _store_room_name_txn(self, txn, event):
        if hasattr(event, "content") and "name" in event.content:
            self.store_event_search_txn(
                txn, event, "content.name", event.content["name"]
            )

    def _store_room_message_txn(self, txn, event):
        if hasattr(event, "content") and "body" in event.content:
            self.store_event_search_txn(
                txn, event, "content.body", event.content["body"]
            )

    def _store_retention_policy_for_room_txn(self, txn, event):
        if not event.is_state():
            logger.debug("Ignoring non-state m.room.retention event")
            return

        if hasattr(event, "content") and (
            "min_lifetime" in event.content or "max_lifetime" in event.content
        ):
            if (
                "min_lifetime" in event.content
                and not isinstance(event.content.get("min_lifetime"), int)
            ) or (
                "max_lifetime" in event.content
                and not isinstance(event.content.get("max_lifetime"), int)
            ):
                # Ignore the event if one of the value isn't an integer.
                return

            self.db_pool.simple_insert_txn(
                txn=txn,
                table="room_retention",
                values={
                    "room_id": event.room_id,
                    "event_id": event.event_id,
                    "min_lifetime": event.content.get("min_lifetime"),
                    "max_lifetime": event.content.get("max_lifetime"),
                },
            )

            self.store._invalidate_cache_and_stream(
                txn, self.store.get_retention_policy_for_room, (event.room_id,)
            )

    def store_event_search_txn(self, txn, event, key, value):
        """Add event to the search table

        Args:
            txn (cursor):
            event (EventBase):
            key (str):
            value (str):
        """
        self.store.store_search_entries_txn(
            txn,
            (
                SearchEntry(
                    key=key,
                    value=value,
                    event_id=event.event_id,
                    room_id=event.room_id,
                    stream_ordering=event.internal_metadata.stream_ordering,
                    origin_server_ts=event.origin_server_ts,
                ),
            ),
        )

    def _set_push_actions_for_event_and_users_txn(
        self, txn, events_and_contexts, all_events_and_contexts
    ):
        """Handles moving push actions from staging table to main
        event_push_actions table for all events in `events_and_contexts`.

        Also ensures that all events in `all_events_and_contexts` are removed
        from the push action staging area.

        Args:
            events_and_contexts (list[(EventBase, EventContext)]): events
                we are persisting
            all_events_and_contexts (list[(EventBase, EventContext)]): all
                events that we were going to persist. This includes events
                we've already persisted, etc, that wouldn't appear in
                events_and_context.
        """

        sql = """
            INSERT INTO event_push_actions (
                room_id, event_id, user_id, actions, stream_ordering,
                topological_ordering, notif, highlight, unread
            )
            SELECT ?, event_id, user_id, actions, ?, ?, notif, highlight, unread
            FROM event_push_actions_staging
            WHERE event_id = ?
        """

        if events_and_contexts:
            txn.execute_batch(
                sql,
                (
                    (
                        event.room_id,
                        event.internal_metadata.stream_ordering,
                        event.depth,
                        event.event_id,
                    )
                    for event, _ in events_and_contexts
                ),
            )

            room_to_event_ids = {}  # type: Dict[str, List[str]]
            for e, _ in events_and_contexts:
                room_to_event_ids.setdefault(e.room_id, []).append(e.event_id)

            for room_id, event_ids in room_to_event_ids.items():
                rows = self.db_pool.simple_select_many_txn(
                    txn,
                    table="event_push_actions_staging",
                    column="event_id",
                    iterable=event_ids,
                    keyvalues={},
                    retcols=("user_id",),
                )

                user_ids = {row["user_id"] for row in rows}

                for user_id in user_ids:
                    txn.call_after(
                        self.store.get_unread_event_push_actions_by_room_for_user.invalidate,
                        (room_id, user_id),
                    )

        # Now we delete the staging area for *all* events that were being
        # persisted.
        txn.execute_batch(
            "DELETE FROM event_push_actions_staging WHERE event_id = ?",
            ((event.event_id,) for event, _ in all_events_and_contexts),
        )

    def _remove_push_actions_for_event_id_txn(self, txn, room_id, event_id):
        # Sad that we have to blow away the cache for the whole room here
        txn.call_after(
            self.store.get_unread_event_push_actions_by_room_for_user.invalidate,
            (room_id,),
        )
        txn.execute(
            "DELETE FROM event_push_actions WHERE room_id = ? AND event_id = ?",
            (room_id, event_id),
        )

    def _store_rejections_txn(self, txn, event_id, reason):
        self.db_pool.simple_insert_txn(
            txn,
            table="rejections",
            values={
                "event_id": event_id,
                "reason": reason,
                "last_check": self._clock.time_msec(),
            },
        )

    def _store_event_state_mappings_txn(
        self, txn, events_and_contexts: Iterable[Tuple[EventBase, EventContext]]
    ):
        state_groups = {}
        for event, context in events_and_contexts:
            if event.internal_metadata.is_outlier():
                continue

            # if the event was rejected, just give it the same state as its
            # predecessor.
            if context.rejected:
                state_groups[event.event_id] = context.state_group_before_event
                continue

            state_groups[event.event_id] = context.state_group

        self.db_pool.simple_insert_many_txn(
            txn,
            table="event_to_state_groups",
            values=[
                {"state_group": state_group_id, "event_id": event_id}
                for event_id, state_group_id in state_groups.items()
            ],
        )

        for event_id, state_group_id in state_groups.items():
            txn.call_after(
                self.store._get_state_group_for_event.prefill,
                (event_id,),
                state_group_id,
            )

    def _update_min_depth_for_room_txn(self, txn, room_id, depth):
        min_depth = self.store._get_min_depth_interaction(txn, room_id)

        if min_depth is not None and depth >= min_depth:
            return

        self.db_pool.simple_upsert_txn(
            txn,
            table="room_depth",
            keyvalues={"room_id": room_id},
            values={"min_depth": depth},
        )

    def _handle_mult_prev_events(self, txn, events):
        """
        For the given event, update the event edges table and forward and
        backward extremities tables.
        """
        self.db_pool.simple_insert_many_txn(
            txn,
            table="event_edges",
            values=[
                {
                    "event_id": ev.event_id,
                    "prev_event_id": e_id,
                    "room_id": ev.room_id,
                    "is_state": False,
                }
                for ev in events
                for e_id in ev.prev_event_ids()
            ],
        )

        self._update_backward_extremeties(txn, events)

    def _update_backward_extremeties(self, txn, events):
        """Updates the event_backward_extremities tables based on the new/updated
        events being persisted.

        This is called for new events *and* for events that were outliers, but
        are now being persisted as non-outliers.

        Forward extremities are handled when we first start persisting the events.
        """
        events_by_room = {}  # type: Dict[str, List[EventBase]]
        for ev in events:
            events_by_room.setdefault(ev.room_id, []).append(ev)

        query = (
            "INSERT INTO event_backward_extremities (event_id, room_id)"
            " SELECT ?, ? WHERE NOT EXISTS ("
            " SELECT 1 FROM event_backward_extremities"
            " WHERE event_id = ? AND room_id = ?"
            " )"
            " AND NOT EXISTS ("
            " SELECT 1 FROM events WHERE event_id = ? AND room_id = ? "
            " AND outlier = ?"
            " )"
        )

        txn.execute_batch(
            query,
            [
                (e_id, ev.room_id, e_id, ev.room_id, e_id, ev.room_id, False)
                for ev in events
                for e_id in ev.prev_event_ids()
                if not ev.internal_metadata.is_outlier()
            ],
        )

        query = (
            "DELETE FROM event_backward_extremities"
            " WHERE event_id = ? AND room_id = ?"
        )
        txn.execute_batch(
            query,
            [
                (ev.event_id, ev.room_id)
                for ev in events
                if not ev.internal_metadata.is_outlier()
            ],
        )


@attr.s(slots=True)
class _LinkMap:
    """A helper type for tracking links between chains."""

    # Stores the set of links as nested maps: source chain ID -> target chain ID
    # -> source sequence number -> target sequence number.
    maps = attr.ib(type=Dict[int, Dict[int, Dict[int, int]]], factory=dict)

    # Stores the links that have been added (with new set to true), as tuples of
    # `(source chain ID, source sequence no, target chain ID, target sequence no.)`
    additions = attr.ib(type=Set[Tuple[int, int, int, int]], factory=set)

    def add_link(
        self,
        src_tuple: Tuple[int, int],
        target_tuple: Tuple[int, int],
        new: bool = True,
    ) -> bool:
        """Add a new link between two chains, ensuring no redundant links are added.

        New links should be added in topological order.

        Args:
            src_tuple: The chain ID/sequence number of the source of the link.
            target_tuple: The chain ID/sequence number of the target of the link.
            new: Whether this is a "new" link, i.e. should it be returned
                by `get_additions`.

        Returns:
            True if a link was added, false if the given link was dropped as redundant
        """
        src_chain, src_seq = src_tuple
        target_chain, target_seq = target_tuple

        current_links = self.maps.setdefault(src_chain, {}).setdefault(target_chain, {})

        assert src_chain != target_chain

        if new:
            # Check if the new link is redundant
            for current_seq_src, current_seq_target in current_links.items():
                # If a link "crosses" another link then its redundant. For example
                # in the following link 1 (L1) is redundant, as any event reachable
                # via L1 is *also* reachable via L2.
                #
                #   Chain A     Chain B
                #      |          |
                #   L1 |------    |
                #      |     |    |
                #   L2 |---- | -->|
                #      |     |    |
                #      |     |--->|
                #      |          |
                #      |          |
                #
                # So we only need to keep links which *do not* cross, i.e. links
                # that both start and end above or below an existing link.
                #
                # Note, since we add links in topological ordering we should never
                # see `src_seq` less than `current_seq_src`.

                if current_seq_src <= src_seq and target_seq <= current_seq_target:
                    # This new link is redundant, nothing to do.
                    return False

            self.additions.add((src_chain, src_seq, target_chain, target_seq))

        current_links[src_seq] = target_seq
        return True

    def get_links_from(
        self, src_tuple: Tuple[int, int]
    ) -> Generator[Tuple[int, int], None, None]:
        """Gets the chains reachable from the given chain/sequence number.

        Yields:
            The chain ID and sequence number the link points to.
        """
        src_chain, src_seq = src_tuple
        for target_id, sequence_numbers in self.maps.get(src_chain, {}).items():
            for link_src_seq, target_seq in sequence_numbers.items():
                if link_src_seq <= src_seq:
                    yield target_id, target_seq

    def get_links_between(
        self, source_chain: int, target_chain: int
    ) -> Generator[Tuple[int, int], None, None]:
        """Gets the links between two chains.

        Yields:
            The source and target sequence numbers.
        """

        yield from self.maps.get(source_chain, {}).get(target_chain, {}).items()

    def get_additions(self) -> Generator[Tuple[int, int, int, int], None, None]:
        """Gets any newly added links.

        Yields:
            The source chain ID/sequence number and target chain ID/sequence number
        """

        for src_chain, src_seq, target_chain, _ in self.additions:
            target_seq = self.maps.get(src_chain, {}).get(target_chain, {}).get(src_seq)
            if target_seq is not None:
                yield (src_chain, src_seq, target_chain, target_seq)

    def exists_path_from(
        self,
        src_tuple: Tuple[int, int],
        target_tuple: Tuple[int, int],
    ) -> bool:
        """Checks if there is a path between the source chain ID/sequence and
        target chain ID/sequence.
        """
        src_chain, src_seq = src_tuple
        target_chain, target_seq = target_tuple

        if src_chain == target_chain:
            return target_seq <= src_seq

        links = self.get_links_between(src_chain, target_chain)
        for link_start_seq, link_end_seq in links:
            if link_start_seq <= src_seq and target_seq <= link_end_seq:
                return True

        return False