# # This file is licensed under the Affero General Public License (AGPL) version 3. # # Copyright (C) The Matrix.org Foundation C.I.C. 2022 # Copyright 2014 - 2016 OpenMarket Ltd # Copyright (C) 2023 New Vector, Ltd # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as # published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # # See the GNU Affero General Public License for more details: # . # # Originally licensed under the Apache License, Version 2.0: # . # # [This file includes modifications made by New Vector Limited] # # import logging from enum import Enum, auto from typing import ( Collection, Dict, Final, FrozenSet, List, Mapping, Optional, Sequence, Set, Tuple, ) import attr from synapse.api.constants import ( EventTypes, EventUnsignedContentFields, HistoryVisibility, Membership, ) from synapse.events import EventBase from synapse.events.snapshot import EventContext from synapse.events.utils import clone_event, prune_event from synapse.logging.opentracing import trace from synapse.storage.controllers import StorageControllers from synapse.storage.databases.main import DataStore from synapse.types import RetentionPolicy, StateMap, StrCollection, get_domain_from_id from synapse.types.state import StateFilter from synapse.util import Clock logger = logging.getLogger(__name__) filtered_event_logger = logging.getLogger("synapse.visibility.filtered_event_debug") VISIBILITY_PRIORITY = ( HistoryVisibility.WORLD_READABLE, HistoryVisibility.SHARED, HistoryVisibility.INVITED, HistoryVisibility.JOINED, ) MEMBERSHIP_PRIORITY = ( Membership.JOIN, Membership.INVITE, Membership.KNOCK, Membership.LEAVE, Membership.BAN, ) _HISTORY_VIS_KEY: Final[Tuple[str, str]] = (EventTypes.RoomHistoryVisibility, "") @trace async def filter_events_for_client( storage: StorageControllers, user_id: str, events: List[EventBase], is_peeking: bool = False, always_include_ids: FrozenSet[str] = frozenset(), filter_send_to_client: bool = True, ) -> List[EventBase]: """ Check which events a user is allowed to see. If the user can see the event but its sender asked for their data to be erased, prune the content of the event. Args: storage user_id: user id to be checked events: sequence of events to be checked is_peeking: should be True if: * the user is not currently a member of the room, and: * the user has not been a member of the room since the given events always_include_ids: set of event ids to specifically include, if present in events (unless sender is ignored) filter_send_to_client: Whether we're checking an event that's going to be sent to a client. This might not always be the case since this function can also be called to check whether a user can see the state at a given point. Returns: The filtered events. The `unsigned` data is annotated with the membership state of `user_id` at each event. """ # Filter out events that have been soft failed so that we don't relay them # to clients. events_before_filtering = events events = [e for e in events if not e.internal_metadata.is_soft_failed()] if len(events_before_filtering) != len(events): if filtered_event_logger.isEnabledFor(logging.DEBUG): filtered_event_logger.debug( "filter_events_for_client: Filtered out soft-failed events: Before=%s, After=%s", [event.event_id for event in events_before_filtering], [event.event_id for event in events], ) types = (_HISTORY_VIS_KEY, (EventTypes.Member, user_id)) # we exclude outliers at this point, and then handle them separately later event_id_to_state = await storage.state.get_state_for_events( frozenset(e.event_id for e in events if not e.internal_metadata.outlier), state_filter=StateFilter.from_types(types), ) # Get the users who are ignored by the requesting user. ignore_list = await storage.main.ignored_users(user_id) erased_senders = await storage.main.are_users_erased(e.sender for e in events) if filter_send_to_client: room_ids = {e.room_id for e in events} retention_policies: Dict[str, RetentionPolicy] = {} for room_id in room_ids: retention_policies[room_id] = ( await storage.main.get_retention_policy_for_room(room_id) ) # meow: let admins see secret events like org.matrix.dummy_event, m.room.aliases # and events expired by the retention policy. filter_override = user_id in storage.hs.config.meow.filter_override def allowed(event: EventBase) -> Optional[EventBase]: state_after_event = event_id_to_state.get(event.event_id) filtered = _check_client_allowed_to_see_event( user_id=user_id, event=event, clock=storage.main.clock, filter_send_to_client=filter_send_to_client, sender_ignored=event.sender in ignore_list, always_include_ids=always_include_ids, retention_policy=retention_policies[event.room_id], state=state_after_event, is_peeking=is_peeking, sender_erased=erased_senders.get(event.sender, False), filter_override=filter_override, ) if filtered is None: return None # Annotate the event with the user's membership after the event. # # Normally we just look in `state_after_event`, but if the event is an outlier # we won't have such a state. The only outliers that are returned here are the # user's own membership event, so we can just inspect that. user_membership_event: Optional[EventBase] if event.type == EventTypes.Member and event.state_key == user_id: user_membership_event = event elif state_after_event is not None: user_membership_event = state_after_event.get((EventTypes.Member, user_id)) else: # unreachable! raise Exception("Missing state for event that is not user's own membership") user_membership = ( user_membership_event.membership if user_membership_event else Membership.LEAVE ) # Copy the event before updating the unsigned data: this shouldn't be persisted # to the cache! cloned = clone_event(filtered) cloned.unsigned[EventUnsignedContentFields.MEMBERSHIP] = user_membership return cloned # Check each event: gives an iterable of None or (a modified) EventBase. filtered_events = map(allowed, events) # Turn it into a list and remove None entries before returning. return [ev for ev in filtered_events if ev] async def filter_event_for_clients_with_state( store: DataStore, user_ids: StrCollection, event: EventBase, context: EventContext, is_peeking: bool = False, filter_send_to_client: bool = True, ) -> StrCollection: """ Checks to see if an event is visible to the users in the list at the time of the event. Note: This does *not* check if the sender of the event was erased. Args: store: databases user_ids: user_ids to be checked event: the event to be checked context: EventContext for the event to be checked is_peeking: Whether the users are peeking into the room, ie not currently joined filter_send_to_client: Whether we're checking an event that's going to be sent to a client. This might not always be the case since this function can also be called to check whether a user can see the state at a given point. Returns: Collection of user IDs for whom the event is visible """ # None of the users should see the event if it is soft_failed if event.internal_metadata.is_soft_failed(): return [] # Fast path if we don't have any user IDs to check. if not user_ids: return () # Make a set for all user IDs that haven't been filtered out by a check. allowed_user_ids = set(user_ids) # Only run some checks if these events aren't about to be sent to clients. This is # because, if this is not the case, we're probably only checking if the users can # see events in the room at that point in the DAG, and that shouldn't be decided # on those checks. if filter_send_to_client: ignored_by = await store.ignored_by(event.sender) retention_policy = await store.get_retention_policy_for_room(event.room_id) for user_id in user_ids: if ( _check_filter_send_to_client( event, store.clock, retention_policy, sender_ignored=user_id in ignored_by, ) == _CheckFilter.DENIED ): allowed_user_ids.discard(user_id) if event.internal_metadata.outlier: # Normally these can't be seen by clients, but we make an exception for # for out-of-band membership events (eg, incoming invites, or rejections of # said invite) for the user themselves. if event.type == EventTypes.Member and event.state_key in allowed_user_ids: logger.debug("Returning out-of-band-membership event %s", event) return {event.state_key} return set() # First we get just the history visibility in case its shared/world-readable # room. visibility_state_map = await _get_state_map( store, event, context, StateFilter.from_types([_HISTORY_VIS_KEY]) ) visibility = get_effective_room_visibility_from_state(visibility_state_map) if ( _check_history_visibility(event, visibility, is_peeking=is_peeking) == _CheckVisibility.ALLOWED ): return allowed_user_ids # The history visibility isn't lax, so we now need to fetch the membership # events of all the users. filter_list = [] for user_id in allowed_user_ids: filter_list.append((EventTypes.Member, user_id)) filter_list.append((EventTypes.RoomHistoryVisibility, "")) state_filter = StateFilter.from_types(filter_list) state_map = await _get_state_map(store, event, context, state_filter) # Now we check whether the membership allows each user to see the event. return { user_id for user_id in allowed_user_ids if _check_membership(user_id, event, visibility, state_map, is_peeking).allowed } async def _get_state_map( store: DataStore, event: EventBase, context: EventContext, state_filter: StateFilter ) -> StateMap[EventBase]: """Helper function for getting a `StateMap[EventBase]` from an `EventContext`""" state_map = await context.get_prev_state_ids(state_filter) # Use events rather than event ids as content from the events are needed in # _check_visibility event_map = await store.get_events(state_map.values(), get_prev_content=False) updated_state_map = {} for state_key, event_id in state_map.items(): state_event = event_map.get(event_id) if state_event: updated_state_map[state_key] = state_event if event.is_state(): current_state_key = (event.type, event.state_key) # Add current event to updated_state_map, we need to do this here as it # may not have been persisted to the db yet updated_state_map[current_state_key] = event return updated_state_map def _check_client_allowed_to_see_event( user_id: str, event: EventBase, clock: Clock, filter_send_to_client: bool, is_peeking: bool, always_include_ids: FrozenSet[str], sender_ignored: bool, retention_policy: RetentionPolicy, state: Optional[StateMap[EventBase]], sender_erased: bool, filter_override: bool, ) -> Optional[EventBase]: """Check with the given user is allowed to see the given event See `filter_events_for_client` for details about args Args: user_id event clock filter_send_to_client is_peeking always_include_ids sender_ignored: Whether the user is ignoring the event sender retention_policy: The retention policy of the room state: The state at the event, unless its an outlier sender_erased: Whether the event sender has been marked as "erased" filter_override: meow Returns: None if the user cannot see this event at all a redacted copy of the event if they can only see a redacted version the original event if they can see it as normal. """ # Only run some checks if these events aren't about to be sent to clients. This is # because, if this is not the case, we're probably only checking if the users can # see events in the room at that point in the DAG, and that shouldn't be decided # on those checks. if filter_send_to_client and not filter_override: if ( _check_filter_send_to_client(event, clock, retention_policy, sender_ignored) == _CheckFilter.DENIED ): filtered_event_logger.debug( "_check_client_allowed_to_see_event(event=%s): Filtered out event because `_check_filter_send_to_client` returned `_CheckFilter.DENIED`", event.event_id, ) return None # meow: even with filter_override, we want to filter ignored users elif filter_send_to_client and not event.is_state() and sender_ignored: return None if event.event_id in always_include_ids: return event # we need to handle outliers separately, since we don't have the room state. if event.internal_metadata.outlier: # Normally these can't be seen by clients, but we make an exception for # for out-of-band membership events (eg, incoming invites, or rejections of # said invite) for the user themselves. if event.type == EventTypes.Member and event.state_key == user_id: logger.debug( "_check_client_allowed_to_see_event(event=%s): Returning out-of-band-membership event %s", event.event_id, event, ) return event filtered_event_logger.debug( "_check_client_allowed_to_see_event(event=%s): Filtered out event because it's an outlier", event.event_id, ) return None if state is None: raise Exception("Missing state for non-outlier event") # get the room_visibility at the time of the event. visibility = get_effective_room_visibility_from_state(state) # Check if the room has lax history visibility, allowing us to skip # membership checks. # # We can only do this check if the sender has *not* been erased, as if they # have we need to check the user's membership. if ( not sender_erased and _check_history_visibility(event, visibility, is_peeking) == _CheckVisibility.ALLOWED ): return event membership_result = _check_membership(user_id, event, visibility, state, is_peeking) if not membership_result.allowed: filtered_event_logger.debug( "_check_client_allowed_to_see_event(event=%s): Filtered out event because the user can't see the event because of their membership, membership_result.allowed=%s membership_result.joined=%s", event.event_id, membership_result.allowed, membership_result.joined, ) return None # If the sender has been erased and the user was not joined at the time, we # must only return the redacted form. if sender_erased and not membership_result.joined: filtered_event_logger.debug( "_check_client_allowed_to_see_event(event=%s): Returning pruned event because `sender_erased` and the user was not joined at the time", event.event_id, ) event = prune_event(event) return event @attr.s(frozen=True, slots=True, auto_attribs=True) class _CheckMembershipReturn: """Return value of `_check_membership`. Attributes: allowed: Whether the user should be allowed to see the event. joined: Whether the user was joined to the room at the event. """ allowed: bool joined: bool def _check_membership( user_id: str, event: EventBase, visibility: str, state: StateMap[EventBase], is_peeking: bool, ) -> _CheckMembershipReturn: """Check whether the user can see the event due to their membership""" # If the event is the user's own membership event, use the 'most joined' # membership membership = None if event.type == EventTypes.Member and event.state_key == user_id: membership = event.content.get("membership", None) if membership not in MEMBERSHIP_PRIORITY: membership = "leave" prev_content = event.unsigned.get("prev_content", {}) prev_membership = prev_content.get("membership", None) if prev_membership not in MEMBERSHIP_PRIORITY: prev_membership = "leave" # Always allow the user to see their own leave events, otherwise # they won't see the room disappear if they reject the invite # # (Note this doesn't work for out-of-band invite rejections, which don't # have prev_state populated. They are handled above in the outlier code.) if membership == "leave" and ( prev_membership == "join" or prev_membership == "invite" ): return _CheckMembershipReturn(True, False) new_priority = MEMBERSHIP_PRIORITY.index(membership) old_priority = MEMBERSHIP_PRIORITY.index(prev_membership) if old_priority < new_priority: membership = prev_membership # otherwise, get the user's membership at the time of the event. if membership is None: membership_event = state.get((EventTypes.Member, user_id), None) if membership_event: membership = membership_event.membership # if the user was a member of the room at the time of the event, # they can see it. if membership == Membership.JOIN: return _CheckMembershipReturn(True, True) # otherwise, it depends on the room visibility. if visibility == HistoryVisibility.JOINED: # we weren't a member at the time of the event, so we can't # see this event. return _CheckMembershipReturn(False, False) elif visibility == HistoryVisibility.INVITED: # user can also see the event if they were *invited* at the time # of the event. return _CheckMembershipReturn(membership == Membership.INVITE, False) elif visibility == HistoryVisibility.SHARED and is_peeking: # if the visibility is shared, users cannot see the event unless # they have *subsequently* joined the room (or were members at the # time, of course) # # XXX: if the user has subsequently joined and then left again, # ideally we would share history up to the point they left. But # we don't know when they left. We just treat it as though they # never joined, and restrict access. return _CheckMembershipReturn(False, False) # The visibility is either shared or world_readable, and the user was # not a member at the time. We allow it. return _CheckMembershipReturn(True, False) class _CheckFilter(Enum): MAYBE_ALLOWED = auto() DENIED = auto() def _check_filter_send_to_client( event: EventBase, clock: Clock, retention_policy: RetentionPolicy, sender_ignored: bool, ) -> _CheckFilter: """Apply checks for sending events to client Returns: True if might be allowed to be sent to clients, False if definitely not. """ if event.type == EventTypes.Dummy: return _CheckFilter.DENIED if not event.is_state() and sender_ignored: return _CheckFilter.DENIED # Until MSC2261 has landed we can't redact malicious alias events, so for # now we temporarily filter out m.room.aliases entirely to mitigate # abuse, while we spec a better solution to advertising aliases # on rooms. if event.type == EventTypes.Aliases: return _CheckFilter.DENIED # Don't try to apply the room's retention policy if the event is a state # event, as MSC1763 states that retention is only considered for non-state # events. if not event.is_state(): max_lifetime = retention_policy.max_lifetime if max_lifetime is not None: oldest_allowed_ts = clock.time_msec() - max_lifetime if event.origin_server_ts < oldest_allowed_ts: return _CheckFilter.DENIED return _CheckFilter.MAYBE_ALLOWED class _CheckVisibility(Enum): ALLOWED = auto() MAYBE_DENIED = auto() def _check_history_visibility( event: EventBase, visibility: str, is_peeking: bool ) -> _CheckVisibility: """Check if event is allowed to be seen due to lax history visibility. Returns: True if user can definitely see the event, False if maybe not. """ # Always allow history visibility events on boundaries. This is done # by setting the effective visibility to the least restrictive # of the old vs new. if event.type == EventTypes.RoomHistoryVisibility: prev_content = event.unsigned.get("prev_content", {}) prev_visibility = prev_content.get("history_visibility", None) if prev_visibility not in VISIBILITY_PRIORITY: prev_visibility = HistoryVisibility.SHARED new_priority = VISIBILITY_PRIORITY.index(visibility) old_priority = VISIBILITY_PRIORITY.index(prev_visibility) if old_priority < new_priority: visibility = prev_visibility if visibility == HistoryVisibility.SHARED and not is_peeking: return _CheckVisibility.ALLOWED elif visibility == HistoryVisibility.WORLD_READABLE: return _CheckVisibility.ALLOWED return _CheckVisibility.MAYBE_DENIED def get_effective_room_visibility_from_state(state: StateMap[EventBase]) -> str: """Get the actual history vis, from a state map including the history_visibility event Handles missing and invalid history visibility events. """ visibility_event = state.get(_HISTORY_VIS_KEY, None) if not visibility_event: return HistoryVisibility.SHARED visibility = visibility_event.content.get( "history_visibility", HistoryVisibility.SHARED ) if visibility not in VISIBILITY_PRIORITY: visibility = HistoryVisibility.SHARED return visibility async def filter_events_for_server( storage: StorageControllers, target_server_name: str, local_server_name: str, events: Sequence[EventBase], *, redact: bool, filter_out_erased_senders: bool, filter_out_remote_partial_state_events: bool, ) -> List[EventBase]: """Filter a list of events based on whether the target server is allowed to see them. For a fully stated room, the target server is allowed to see an event E if: - the state at E has world readable or shared history vis, OR - the state at E says that the target server is in the room. For a partially stated room, the target server is allowed to see E if: - E was created by this homeserver, AND: - the partial state at E has world readable or shared history vis, OR - the partial state at E says that the target server is in the room. TODO: state before or state after? Args: storage target_server_name local_server_name events redact: Controls what to do with events which have been filtered out. If True, include their redacted forms; if False, omit them entirely. filter_out_erased_senders: If true, also filter out events whose sender has been erased. This is used e.g. during pagination to decide whether to backfill or not. filter_out_remote_partial_state_events: If True, also filter out events in partial state rooms created by other homeservers. Returns The filtered events. """ def is_sender_erased(event: EventBase, erased_senders: Mapping[str, bool]) -> bool: if erased_senders and erased_senders[event.sender]: logger.info("Sender of %s has been erased, redacting", event.event_id) return True return False def check_event_is_visible( visibility: str, memberships: StateMap[EventBase] ) -> bool: if visibility not in (HistoryVisibility.INVITED, HistoryVisibility.JOINED): return True # We now loop through all membership events looking for # membership states for the requesting server to determine # if the server is either in the room or has been invited # into the room. for ev in memberships.values(): assert get_domain_from_id(ev.state_key) == target_server_name memtype = ev.membership if memtype == Membership.JOIN: return True elif memtype == Membership.INVITE: if visibility == HistoryVisibility.INVITED: return True # server has no users in the room: redact return False if filter_out_erased_senders: erased_senders = await storage.main.are_users_erased(e.sender for e in events) else: # We don't want to check whether users are erased, which is equivalent # to no users having been erased. erased_senders = {} # Filter out non-local events when we are in the middle of a partial join, since our servers # list can be out of date and we could leak events to servers not in the room anymore. # This can also be true for local events but we consider it to be an acceptable risk. # We do this check as a first step and before retrieving membership events because # otherwise a room could be fully joined after we retrieve those, which would then bypass # this check but would base the filtering on an outdated view of the membership events. partial_state_invisible_event_ids: Set[str] = set() if filter_out_remote_partial_state_events: for e in events: sender_domain = get_domain_from_id(e.sender) if ( sender_domain != local_server_name and await storage.main.is_partial_state_room(e.room_id) ): partial_state_invisible_event_ids.add(e.event_id) # Let's check to see if all the events have a history visibility # of "shared" or "world_readable". If that's the case then we don't # need to check membership (as we know the server is in the room). event_to_history_vis = await _event_to_history_vis(storage, events) # for any with restricted vis, we also need the memberships event_to_memberships = await _event_to_memberships( storage, [ e for e in events if event_to_history_vis[e.event_id] not in (HistoryVisibility.SHARED, HistoryVisibility.WORLD_READABLE) ], target_server_name, ) def include_event_in_output(e: EventBase) -> bool: erased = is_sender_erased(e, erased_senders) visible = check_event_is_visible( event_to_history_vis[e.event_id], event_to_memberships.get(e.event_id, {}) ) if e.event_id in partial_state_invisible_event_ids: visible = False return visible and not erased to_return = [] for e in events: if include_event_in_output(e): to_return.append(e) elif redact: to_return.append(prune_event(e)) return to_return async def _event_to_history_vis( storage: StorageControllers, events: Collection[EventBase] ) -> Dict[str, str]: """Get the history visibility at each of the given events Returns a map from event id to history_visibility setting """ # outliers get special treatment here. We don't have the state at that point in the # room (and attempting to look it up will raise an exception), so all we can really # do is assume that the requesting server is allowed to see the event. That's # equivalent to there not being a history_visibility event, so we just exclude # any outliers from the query. event_to_state_ids = await storage.state.get_state_ids_for_events( frozenset(e.event_id for e in events if not e.internal_metadata.is_outlier()), state_filter=StateFilter.from_types(types=(_HISTORY_VIS_KEY,)), ) visibility_ids = { vis_event_id for vis_event_id in ( state_ids.get(_HISTORY_VIS_KEY) for state_ids in event_to_state_ids.values() ) if vis_event_id } vis_events = await storage.main.get_events(visibility_ids) result: Dict[str, str] = {} for event in events: vis = HistoryVisibility.SHARED state_ids = event_to_state_ids.get(event.event_id) # if we didn't find any state for this event, it's an outlier, and we assume # it's open visibility_id = None if state_ids: visibility_id = state_ids.get(_HISTORY_VIS_KEY) if visibility_id: vis_event = vis_events[visibility_id] vis = vis_event.content.get("history_visibility", HistoryVisibility.SHARED) assert isinstance(vis, str) result[event.event_id] = vis return result async def _event_to_memberships( storage: StorageControllers, events: Collection[EventBase], server_name: str ) -> Dict[str, StateMap[EventBase]]: """Get the remote membership list at each of the given events Returns a map from event id to state map, which will contain only membership events for the given server. """ if not events: return {} # for each event, get the event_ids of the membership state at those events. # # TODO: this means that we request the entire membership list. If there are only # one or two users on this server, and the room is huge, this is very wasteful # (it means more db work, and churns the *stateGroupMembersCache*). # It might be that we could extend StateFilter to specify "give me keys matching # *:", to avoid this. event_to_state_ids = await storage.state.get_state_ids_for_events( frozenset(e.event_id for e in events), state_filter=StateFilter.from_types(types=((EventTypes.Member, None),)), ) # We only want to pull out member events that correspond to the # server's domain. # # event_to_state_ids contains lots of duplicates, so it turns out to be # cheaper to build a complete event_id => (type, state_key) dict, and then # filter out the ones we don't want # event_id_to_state_key = { event_id: key for key_to_eid in event_to_state_ids.values() for key, event_id in key_to_eid.items() } def include(state_key: str) -> bool: # we avoid using get_domain_from_id here for efficiency. idx = state_key.find(":") if idx == -1: return False return state_key[idx + 1 :] == server_name event_map = await storage.main.get_events( [ e_id for e_id, (_, state_key) in event_id_to_state_key.items() if include(state_key) ] ) return { e_id: { key: event_map[inner_e_id] for key, inner_e_id in key_to_eid.items() if inner_e_id in event_map } for e_id, key_to_eid in event_to_state_ids.items() }