2019-03-25 05:37:08 -04:00
|
|
|
#
|
2023-11-21 15:29:58 -05:00
|
|
|
# This file is licensed under the Affero General Public License (AGPL) version 3.
|
|
|
|
#
|
2024-01-23 06:26:48 -05:00
|
|
|
# Copyright 2018 Vector Creations Ltd
|
2023-11-21 15:29:58 -05:00
|
|
|
# Copyright (C) 2023 New Vector, Ltd
|
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU Affero General Public License as
|
|
|
|
# published by the Free Software Foundation, either version 3 of the
|
|
|
|
# License, or (at your option) any later version.
|
|
|
|
#
|
|
|
|
# See the GNU Affero General Public License for more details:
|
|
|
|
# <https://www.gnu.org/licenses/agpl-3.0.html>.
|
|
|
|
#
|
|
|
|
# Originally licensed under the Apache License, Version 2.0:
|
|
|
|
# <http://www.apache.org/licenses/LICENSE-2.0>.
|
|
|
|
#
|
|
|
|
# [This file includes modifications made by New Vector Limited]
|
2019-03-25 05:37:08 -04:00
|
|
|
#
|
|
|
|
#
|
|
|
|
|
|
|
|
import logging
|
2023-10-16 07:35:22 -04:00
|
|
|
from typing import List, Optional, Tuple
|
|
|
|
|
|
|
|
import attr
|
2020-03-02 11:52:15 -05:00
|
|
|
|
2024-08-06 12:43:43 -04:00
|
|
|
from synapse.logging.opentracing import trace
|
2019-03-25 05:37:08 -04:00
|
|
|
from synapse.storage._base import SQLBaseStore
|
2024-08-16 11:04:02 -04:00
|
|
|
from synapse.storage.database import LoggingTransaction, make_in_list_sql_clause
|
2024-07-29 17:45:48 -04:00
|
|
|
from synapse.storage.databases.main.stream import _filter_results_by_stream
|
2024-08-16 11:04:02 -04:00
|
|
|
from synapse.types import RoomStreamToken, StrCollection
|
2021-11-12 15:24:12 -05:00
|
|
|
from synapse.util.caches.stream_change_cache import StreamChangeCache
|
2024-08-16 11:04:02 -04:00
|
|
|
from synapse.util.iterutils import batch_iter
|
2019-03-25 05:37:08 -04:00
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
2023-10-16 07:35:22 -04:00
|
|
|
@attr.s(slots=True, frozen=True, auto_attribs=True)
|
|
|
|
class StateDelta:
|
|
|
|
stream_id: int
|
|
|
|
room_id: str
|
|
|
|
event_type: str
|
|
|
|
state_key: str
|
|
|
|
|
|
|
|
event_id: Optional[str]
|
|
|
|
"""new event_id for this state key. None if the state has been deleted."""
|
|
|
|
|
|
|
|
prev_event_id: Optional[str]
|
|
|
|
"""previous event_id for this state key. None if it's new state."""
|
|
|
|
|
|
|
|
|
2019-03-25 05:37:08 -04:00
|
|
|
class StateDeltasStore(SQLBaseStore):
|
2021-11-12 15:24:12 -05:00
|
|
|
# This class must be mixed in with a child class which provides the following
|
|
|
|
# attribute. TODO: can we get static analysis to enforce this?
|
|
|
|
_curr_state_delta_stream_cache: StreamChangeCache
|
|
|
|
|
2022-06-01 11:02:53 -04:00
|
|
|
async def get_partial_current_state_deltas(
|
2020-08-28 09:37:55 -04:00
|
|
|
self, prev_stream_id: int, max_stream_id: int
|
2023-10-16 07:35:22 -04:00
|
|
|
) -> Tuple[int, List[StateDelta]]:
|
2019-04-26 06:13:16 -04:00
|
|
|
"""Fetch a list of room state changes since the given stream id
|
|
|
|
|
2022-06-01 11:02:53 -04:00
|
|
|
This may be the partial state if we're lazy joining the room.
|
|
|
|
|
2019-04-26 06:13:16 -04:00
|
|
|
Args:
|
2020-08-28 09:37:55 -04:00
|
|
|
prev_stream_id: point to get changes since (exclusive)
|
|
|
|
max_stream_id: the point that we know has been correctly persisted
|
2023-10-16 07:35:22 -04:00
|
|
|
- ie, an upper limit to return changes from.
|
2019-04-26 06:13:16 -04:00
|
|
|
|
|
|
|
Returns:
|
2020-08-28 09:37:55 -04:00
|
|
|
A tuple consisting of:
|
2023-10-16 07:35:22 -04:00
|
|
|
- the stream id which these results go up to
|
|
|
|
- list of current_state_delta_stream rows. If it is empty, we are
|
|
|
|
up to date.
|
2019-04-26 06:13:16 -04:00
|
|
|
"""
|
2019-03-25 05:37:08 -04:00
|
|
|
prev_stream_id = int(prev_stream_id)
|
2019-10-10 06:29:01 -04:00
|
|
|
|
|
|
|
# check we're not going backwards
|
2021-12-08 09:15:14 -05:00
|
|
|
assert (
|
|
|
|
prev_stream_id <= max_stream_id
|
|
|
|
), f"New stream id {max_stream_id} is smaller than prev stream id {prev_stream_id}"
|
2019-10-10 06:29:01 -04:00
|
|
|
|
2019-04-03 05:07:29 -04:00
|
|
|
if not self._curr_state_delta_stream_cache.has_any_entity_changed(
|
|
|
|
prev_stream_id
|
|
|
|
):
|
2019-10-10 06:29:01 -04:00
|
|
|
# if the CSDs haven't changed between prev_stream_id and now, we
|
|
|
|
# know for certain that they haven't changed between prev_stream_id and
|
|
|
|
# max_stream_id.
|
2021-09-23 06:59:07 -04:00
|
|
|
return max_stream_id, []
|
2019-03-25 05:37:08 -04:00
|
|
|
|
2021-11-12 15:24:12 -05:00
|
|
|
def get_current_state_deltas_txn(
|
|
|
|
txn: LoggingTransaction,
|
2023-10-16 07:35:22 -04:00
|
|
|
) -> Tuple[int, List[StateDelta]]:
|
2019-03-25 05:37:08 -04:00
|
|
|
# First we calculate the max stream id that will give us less than
|
|
|
|
# N results.
|
2021-02-12 11:01:48 -05:00
|
|
|
# We arbitrarily limit to 100 stream_id entries to ensure we don't
|
2019-03-25 05:37:08 -04:00
|
|
|
# select toooo many.
|
|
|
|
sql = """
|
|
|
|
SELECT stream_id, count(*)
|
|
|
|
FROM current_state_delta_stream
|
2019-10-10 06:29:01 -04:00
|
|
|
WHERE stream_id > ? AND stream_id <= ?
|
2019-03-25 05:37:08 -04:00
|
|
|
GROUP BY stream_id
|
|
|
|
ORDER BY stream_id ASC
|
|
|
|
LIMIT 100
|
|
|
|
"""
|
2019-10-10 06:29:01 -04:00
|
|
|
txn.execute(sql, (prev_stream_id, max_stream_id))
|
2019-03-25 05:37:08 -04:00
|
|
|
|
|
|
|
total = 0
|
2019-10-10 06:29:01 -04:00
|
|
|
|
|
|
|
for stream_id, count in txn:
|
2019-03-25 05:37:08 -04:00
|
|
|
total += count
|
|
|
|
if total > 100:
|
2021-02-12 11:01:48 -05:00
|
|
|
# We arbitrarily limit to 100 entries to ensure we don't
|
2019-03-25 05:37:08 -04:00
|
|
|
# select toooo many.
|
2019-10-10 06:29:01 -04:00
|
|
|
logger.debug(
|
|
|
|
"Clipping current_state_delta_stream rows to stream_id %i",
|
|
|
|
stream_id,
|
|
|
|
)
|
|
|
|
clipped_stream_id = stream_id
|
2019-03-25 05:37:08 -04:00
|
|
|
break
|
2019-10-10 06:29:01 -04:00
|
|
|
else:
|
|
|
|
# if there's no problem, we may as well go right up to the max_stream_id
|
|
|
|
clipped_stream_id = max_stream_id
|
2019-03-25 05:37:08 -04:00
|
|
|
|
|
|
|
# Now actually get the deltas
|
|
|
|
sql = """
|
|
|
|
SELECT stream_id, room_id, type, state_key, event_id, prev_event_id
|
|
|
|
FROM current_state_delta_stream
|
|
|
|
WHERE ? < stream_id AND stream_id <= ?
|
|
|
|
ORDER BY stream_id ASC
|
|
|
|
"""
|
2019-10-10 06:29:01 -04:00
|
|
|
txn.execute(sql, (prev_stream_id, clipped_stream_id))
|
2023-10-16 07:35:22 -04:00
|
|
|
return clipped_stream_id, [
|
|
|
|
StateDelta(
|
|
|
|
stream_id=row[0],
|
|
|
|
room_id=row[1],
|
|
|
|
event_type=row[2],
|
|
|
|
state_key=row[3],
|
|
|
|
event_id=row[4],
|
|
|
|
prev_event_id=row[5],
|
|
|
|
)
|
|
|
|
for row in txn.fetchall()
|
|
|
|
]
|
2019-03-25 05:37:08 -04:00
|
|
|
|
2020-08-28 09:37:55 -04:00
|
|
|
return await self.db_pool.runInteraction(
|
2019-03-25 05:37:08 -04:00
|
|
|
"get_current_state_deltas", get_current_state_deltas_txn
|
|
|
|
)
|
|
|
|
|
2021-11-12 15:24:12 -05:00
|
|
|
def _get_max_stream_id_in_current_state_deltas_txn(
|
|
|
|
self, txn: LoggingTransaction
|
|
|
|
) -> int:
|
2020-08-05 16:38:57 -04:00
|
|
|
return self.db_pool.simple_select_one_onecol_txn(
|
2019-05-21 12:36:50 -04:00
|
|
|
txn,
|
2019-03-25 05:37:08 -04:00
|
|
|
table="current_state_delta_stream",
|
|
|
|
keyvalues={},
|
|
|
|
retcol="COALESCE(MAX(stream_id), -1)",
|
2019-05-21 12:36:50 -04:00
|
|
|
)
|
|
|
|
|
2021-11-12 15:24:12 -05:00
|
|
|
async def get_max_stream_id_in_current_state_deltas(self) -> int:
|
2020-08-28 09:37:55 -04:00
|
|
|
return await self.db_pool.runInteraction(
|
2019-05-21 12:36:50 -04:00
|
|
|
"get_max_stream_id_in_current_state_deltas",
|
|
|
|
self._get_max_stream_id_in_current_state_deltas_txn,
|
2019-03-25 05:37:08 -04:00
|
|
|
)
|
2024-07-29 17:45:48 -04:00
|
|
|
|
2024-08-06 12:43:43 -04:00
|
|
|
@trace
|
2024-07-29 17:45:48 -04:00
|
|
|
async def get_current_state_deltas_for_room(
|
|
|
|
self, room_id: str, from_token: RoomStreamToken, to_token: RoomStreamToken
|
|
|
|
) -> List[StateDelta]:
|
|
|
|
"""Get the state deltas between two tokens."""
|
|
|
|
|
2024-08-09 06:59:44 -04:00
|
|
|
if not self._curr_state_delta_stream_cache.has_entity_changed(
|
|
|
|
room_id, from_token.stream
|
|
|
|
):
|
|
|
|
return []
|
|
|
|
|
2024-07-29 17:45:48 -04:00
|
|
|
def get_current_state_deltas_for_room_txn(
|
|
|
|
txn: LoggingTransaction,
|
|
|
|
) -> List[StateDelta]:
|
|
|
|
sql = """
|
|
|
|
SELECT instance_name, stream_id, type, state_key, event_id, prev_event_id
|
|
|
|
FROM current_state_delta_stream
|
|
|
|
WHERE room_id = ? AND ? < stream_id AND stream_id <= ?
|
|
|
|
ORDER BY stream_id ASC
|
|
|
|
"""
|
|
|
|
txn.execute(
|
|
|
|
sql, (room_id, from_token.stream, to_token.get_max_stream_pos())
|
|
|
|
)
|
|
|
|
|
|
|
|
return [
|
|
|
|
StateDelta(
|
|
|
|
stream_id=row[1],
|
|
|
|
room_id=room_id,
|
|
|
|
event_type=row[2],
|
|
|
|
state_key=row[3],
|
|
|
|
event_id=row[4],
|
|
|
|
prev_event_id=row[5],
|
|
|
|
)
|
|
|
|
for row in txn
|
|
|
|
if _filter_results_by_stream(from_token, to_token, row[0], row[1])
|
|
|
|
]
|
|
|
|
|
|
|
|
return await self.db_pool.runInteraction(
|
|
|
|
"get_current_state_deltas_for_room", get_current_state_deltas_for_room_txn
|
|
|
|
)
|
2024-08-16 11:04:02 -04:00
|
|
|
|
|
|
|
@trace
|
|
|
|
async def get_current_state_deltas_for_rooms(
|
|
|
|
self,
|
|
|
|
room_ids: StrCollection,
|
|
|
|
from_token: RoomStreamToken,
|
|
|
|
to_token: RoomStreamToken,
|
|
|
|
) -> List[StateDelta]:
|
|
|
|
"""Get the state deltas between two tokens for the set of rooms."""
|
|
|
|
|
|
|
|
room_ids = self._curr_state_delta_stream_cache.get_entities_changed(
|
|
|
|
room_ids, from_token.stream
|
|
|
|
)
|
|
|
|
if not room_ids:
|
|
|
|
return []
|
|
|
|
|
|
|
|
def get_current_state_deltas_for_rooms_txn(
|
|
|
|
txn: LoggingTransaction,
|
|
|
|
room_ids: StrCollection,
|
|
|
|
) -> List[StateDelta]:
|
|
|
|
clause, args = make_in_list_sql_clause(
|
|
|
|
self.database_engine, "room_id", room_ids
|
|
|
|
)
|
|
|
|
|
|
|
|
sql = f"""
|
|
|
|
SELECT instance_name, stream_id, room_id, type, state_key, event_id, prev_event_id
|
|
|
|
FROM current_state_delta_stream
|
|
|
|
WHERE {clause} AND ? < stream_id AND stream_id <= ?
|
|
|
|
ORDER BY stream_id ASC
|
|
|
|
"""
|
|
|
|
args.append(from_token.stream)
|
|
|
|
args.append(to_token.get_max_stream_pos())
|
|
|
|
|
|
|
|
txn.execute(sql, args)
|
|
|
|
|
|
|
|
return [
|
|
|
|
StateDelta(
|
|
|
|
stream_id=row[1],
|
|
|
|
room_id=row[2],
|
|
|
|
event_type=row[3],
|
|
|
|
state_key=row[4],
|
|
|
|
event_id=row[5],
|
|
|
|
prev_event_id=row[6],
|
|
|
|
)
|
|
|
|
for row in txn
|
|
|
|
if _filter_results_by_stream(from_token, to_token, row[0], row[1])
|
|
|
|
]
|
|
|
|
|
|
|
|
results = []
|
|
|
|
for batch in batch_iter(room_ids, 1000):
|
|
|
|
deltas = await self.db_pool.runInteraction(
|
|
|
|
"get_current_state_deltas_for_rooms",
|
|
|
|
get_current_state_deltas_for_rooms_txn,
|
|
|
|
batch,
|
|
|
|
)
|
|
|
|
|
|
|
|
results.extend(deltas)
|
|
|
|
|
|
|
|
return results
|