Prevent expired events from being filtered out when retention is disabled (#12611)

Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
This commit is contained in:
Brendan Abolivier 2022-05-23 19:18:23 +02:00 committed by GitHub
parent a608ac847b
commit 4cc4229cd7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 71 additions and 32 deletions

View file

@ -239,7 +239,7 @@ class PaginationHandler:
# defined in the server's configuration, we can safely assume that's the
# case and use it for this room.
max_lifetime = (
retention_policy["max_lifetime"] or self._retention_default_max_lifetime
retention_policy.max_lifetime or self._retention_default_max_lifetime
)
# Cap the effective max_lifetime to be within the range allowed in the

View file

@ -45,7 +45,7 @@ from synapse.storage.database import (
from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore
from synapse.storage.types import Cursor
from synapse.storage.util.id_generators import IdGenerator
from synapse.types import JsonDict, ThirdPartyInstanceID
from synapse.types import JsonDict, RetentionPolicy, ThirdPartyInstanceID
from synapse.util import json_encoder
from synapse.util.caches.descriptors import cached
from synapse.util.stringutils import MXC_REGEX
@ -699,7 +699,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
await self.db_pool.runInteraction("delete_ratelimit", delete_ratelimit_txn)
@cached()
async def get_retention_policy_for_room(self, room_id: str) -> Dict[str, int]:
async def get_retention_policy_for_room(self, room_id: str) -> RetentionPolicy:
"""Get the retention policy for a given room.
If no retention policy has been found for this room, returns a policy defined
@ -707,12 +707,20 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
the 'max_lifetime' if no default policy has been defined in the server's
configuration).
If support for retention policies is disabled, a policy with a 'min_lifetime' and
'max_lifetime' of None is returned.
Args:
room_id: The ID of the room to get the retention policy of.
Returns:
A dict containing "min_lifetime" and "max_lifetime" for this room.
"""
# If the room retention feature is disabled, return a policy with no minimum nor
# maximum. This prevents incorrectly filtering out events when sending to
# the client.
if not self.config.retention.retention_enabled:
return RetentionPolicy()
def get_retention_policy_for_room_txn(
txn: LoggingTransaction,
@ -736,10 +744,10 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
# If we don't know this room ID, ret will be None, in this case return the default
# policy.
if not ret:
return {
"min_lifetime": self.config.retention.retention_default_min_lifetime,
"max_lifetime": self.config.retention.retention_default_max_lifetime,
}
return RetentionPolicy(
min_lifetime=self.config.retention.retention_default_min_lifetime,
max_lifetime=self.config.retention.retention_default_max_lifetime,
)
min_lifetime = ret[0]["min_lifetime"]
max_lifetime = ret[0]["max_lifetime"]
@ -754,10 +762,10 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
if max_lifetime is None:
max_lifetime = self.config.retention.retention_default_max_lifetime
return {
"min_lifetime": min_lifetime,
"max_lifetime": max_lifetime,
}
return RetentionPolicy(
min_lifetime=min_lifetime,
max_lifetime=max_lifetime,
)
async def get_media_mxcs_in_room(self, room_id: str) -> Tuple[List[str], List[str]]:
"""Retrieves all the local and remote media MXC URIs in a given room
@ -994,7 +1002,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
async def get_rooms_for_retention_period_in_range(
self, min_ms: Optional[int], max_ms: Optional[int], include_null: bool = False
) -> Dict[str, Dict[str, Optional[int]]]:
) -> Dict[str, RetentionPolicy]:
"""Retrieves all of the rooms within the given retention range.
Optionally includes the rooms which don't have a retention policy.
@ -1016,7 +1024,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
def get_rooms_for_retention_period_in_range_txn(
txn: LoggingTransaction,
) -> Dict[str, Dict[str, Optional[int]]]:
) -> Dict[str, RetentionPolicy]:
range_conditions = []
args = []
@ -1047,10 +1055,10 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
rooms_dict = {}
for row in rows:
rooms_dict[row["room_id"]] = {
"min_lifetime": row["min_lifetime"],
"max_lifetime": row["max_lifetime"],
}
rooms_dict[row["room_id"]] = RetentionPolicy(
min_lifetime=row["min_lifetime"],
max_lifetime=row["max_lifetime"],
)
if include_null:
# If required, do a second query that retrieves all of the rooms we know
@ -1065,10 +1073,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
# policy in its state), add it with a null policy.
for row in rows:
if row["room_id"] not in rooms_dict:
rooms_dict[row["room_id"]] = {
"min_lifetime": None,
"max_lifetime": None,
}
rooms_dict[row["room_id"]] = RetentionPolicy()
return rooms_dict

View file

@ -932,3 +932,9 @@ class UserProfile(TypedDict):
user_id: str
display_name: Optional[str]
avatar_url: Optional[str]
@attr.s(auto_attribs=True, frozen=True, slots=True)
class RetentionPolicy:
min_lifetime: Optional[int] = None
max_lifetime: Optional[int] = None

View file

@ -22,7 +22,7 @@ from synapse.events import EventBase
from synapse.events.utils import prune_event
from synapse.storage import Storage
from synapse.storage.state import StateFilter
from synapse.types import StateMap, get_domain_from_id
from synapse.types import RetentionPolicy, StateMap, get_domain_from_id
logger = logging.getLogger(__name__)
@ -94,7 +94,7 @@ async def filter_events_for_client(
if filter_send_to_client:
room_ids = {e.room_id for e in events}
retention_policies = {}
retention_policies: Dict[str, RetentionPolicy] = {}
for room_id in room_ids:
retention_policies[
@ -137,7 +137,7 @@ async def filter_events_for_client(
# events.
if not event.is_state():
retention_policy = retention_policies[event.room_id]
max_lifetime = retention_policy.get("max_lifetime")
max_lifetime = retention_policy.max_lifetime
if max_lifetime is not None:
oldest_allowed_ts = storage.main.clock.time_msec() - max_lifetime