2016-05-11 08:42:37 -04:00
#
2023-11-21 15:29:58 -05:00
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
2024-01-23 06:26:48 -05:00
# Copyright (C) The Matrix.org Foundation C.I.C. 2022
# Copyright 2014 - 2016 OpenMarket Ltd
2023-11-21 15:29:58 -05:00
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# [This file includes modifications made by New Vector Limited]
2016-05-11 08:42:37 -04:00
#
#
2018-06-07 06:37:10 -04:00
import logging
2022-07-11 09:14:09 -04:00
from enum import Enum , auto
2023-03-10 10:31:25 -05:00
from typing import (
Collection ,
Dict ,
2023-08-15 08:11:20 -04:00
Final ,
2023-03-10 10:31:25 -05:00
FrozenSet ,
List ,
Mapping ,
Optional ,
Sequence ,
Set ,
Tuple ,
)
2022-03-31 13:39:34 -04:00
2022-07-11 09:14:09 -04:00
import attr
2018-06-07 06:37:10 -04:00
2022-03-15 14:06:05 -04:00
from synapse . api . constants import EventTypes , HistoryVisibility , Membership
2021-03-24 06:49:01 -04:00
from synapse . events import EventBase
2022-07-11 09:14:09 -04:00
from synapse . events . snapshot import EventContext
2018-06-25 08:42:55 -04:00
from synapse . events . utils import prune_event
2022-08-03 11:57:38 -04:00
from synapse . logging . opentracing import trace
2022-05-31 08:17:50 -04:00
from synapse . storage . controllers import StorageControllers
2022-07-11 09:14:09 -04:00
from synapse . storage . databases . main import DataStore
2023-09-13 07:57:19 -04:00
from synapse . types import RetentionPolicy , StateMap , StrCollection , get_domain_from_id
2022-12-12 11:19:30 -05:00
from synapse . types . state import StateFilter
2022-07-11 09:14:09 -04:00
from synapse . util import Clock
2016-05-11 08:42:37 -04:00
logger = logging . getLogger ( __name__ )
2023-06-01 22:27:18 -04:00
filtered_event_logger = logging . getLogger ( " synapse.visibility.filtered_event_debug " )
2016-05-11 08:42:37 -04:00
2020-12-16 08:46:37 -05:00
VISIBILITY_PRIORITY = (
HistoryVisibility . WORLD_READABLE ,
HistoryVisibility . SHARED ,
HistoryVisibility . INVITED ,
HistoryVisibility . JOINED ,
)
2016-05-11 08:42:37 -04:00
MEMBERSHIP_PRIORITY = (
Membership . JOIN ,
Membership . INVITE ,
Membership . KNOCK ,
Membership . LEAVE ,
Membership . BAN ,
)
2022-03-31 13:39:34 -04:00
_HISTORY_VIS_KEY : Final [ Tuple [ str , str ] ] = ( EventTypes . RoomHistoryVisibility , " " )
2016-05-11 08:42:37 -04:00
2022-08-03 11:57:38 -04:00
@trace
2020-07-27 12:32:08 -04:00
async def filter_events_for_client (
2022-05-31 08:17:50 -04:00
storage : StorageControllers ,
2021-03-24 06:49:01 -04:00
user_id : str ,
events : List [ EventBase ] ,
is_peeking : bool = False ,
always_include_ids : FrozenSet [ str ] = frozenset ( ) ,
filter_send_to_client : bool = True ,
) - > List [ EventBase ] :
2018-06-07 06:37:10 -04:00
"""
2019-12-16 07:15:37 -05:00
Check which events a user is allowed to see . If the user can see the event but its
sender asked for their data to be erased , prune the content of the event .
2016-05-11 08:42:37 -04:00
Args :
2019-10-23 12:25:54 -04:00
storage
2021-03-24 06:49:01 -04:00
user_id : user id to be checked
events : sequence of events to be checked
is_peeking : should be True if :
2018-06-07 06:37:10 -04:00
* the user is not currently a member of the room , and :
* the user has not been a member of the room since the given
events
2022-08-10 07:58:20 -04:00
always_include_ids : set of event ids to specifically include , if present
in events ( unless sender is ignored )
2021-03-24 06:49:01 -04:00
filter_send_to_client : Whether we ' re checking an event that ' s going to be
2020-03-11 11:21:25 -04:00
sent to a client . This might not always be the case since this function can
also be called to check whether a user can see the state at a given point .
2018-06-07 06:37:10 -04:00
Returns :
2021-03-24 06:49:01 -04:00
The filtered events .
2016-05-11 08:42:37 -04:00
"""
2019-02-12 05:31:21 -05:00
# Filter out events that have been soft failed so that we don't relay them
# to clients.
2022-10-17 17:02:39 -04:00
events_before_filtering = events
2020-02-21 07:15:07 -05:00
events = [ e for e in events if not e . internal_metadata . is_soft_failed ( ) ]
2022-10-17 17:02:39 -04:00
if len ( events_before_filtering ) != len ( events ) :
2023-06-01 22:27:18 -04:00
if filtered_event_logger . isEnabledFor ( logging . DEBUG ) :
filtered_event_logger . debug (
2022-10-17 17:02:39 -04:00
" filter_events_for_client: Filtered out soft-failed events: Before= %s , After= %s " ,
[ event . event_id for event in events_before_filtering ] ,
[ event . event_id for event in events ] ,
)
2019-02-12 05:31:21 -05:00
2022-03-31 13:39:34 -04:00
types = ( _HISTORY_VIS_KEY , ( EventTypes . Member , user_id ) )
2021-01-18 09:02:22 -05:00
2022-03-04 05:31:19 -05:00
# we exclude outliers at this point, and then handle them separately later
2020-07-27 12:32:08 -04:00
event_id_to_state = await storage . state . get_state_for_events (
2022-03-04 05:31:19 -05:00
frozenset ( e . event_id for e in events if not e . internal_metadata . outlier ) ,
2018-10-25 12:49:55 -04:00
state_filter = StateFilter . from_types ( types ) ,
2018-06-07 06:37:10 -04:00
)
2022-03-15 14:06:05 -04:00
# Get the users who are ignored by the requesting user.
ignore_list = await storage . main . ignored_users ( user_id )
2016-05-11 08:42:37 -04:00
2021-07-19 10:28:05 -04:00
erased_senders = await storage . main . are_users_erased ( e . sender for e in events )
2018-06-25 08:42:55 -04:00
2020-03-11 11:32:07 -04:00
if filter_send_to_client :
2020-02-21 07:15:07 -05:00
room_ids = { e . room_id for e in events }
2022-05-23 13:18:23 -04:00
retention_policies : Dict [ str , RetentionPolicy ] = { }
2019-11-04 12:09:22 -05:00
2019-11-28 14:26:13 -05:00
for room_id in room_ids :
2024-03-13 12:46:44 -04:00
retention_policies [ room_id ] = (
await storage . main . get_retention_policy_for_room ( room_id )
)
2019-11-04 12:09:22 -05:00
2021-03-24 06:49:01 -04:00
def allowed ( event : EventBase ) - > Optional [ EventBase ] :
2022-07-11 09:14:09 -04:00
return _check_client_allowed_to_see_event (
user_id = user_id ,
event = event ,
clock = storage . main . clock ,
filter_send_to_client = filter_send_to_client ,
sender_ignored = event . sender in ignore_list ,
always_include_ids = always_include_ids ,
retention_policy = retention_policies [ room_id ] ,
state = event_id_to_state . get ( event . event_id ) ,
is_peeking = is_peeking ,
sender_erased = erased_senders . get ( event . sender , False ) ,
)
2017-09-18 12:13:03 -04:00
2022-07-11 09:14:09 -04:00
# Check each event: gives an iterable of None or (a potentially modified)
# EventBase.
filtered_events = map ( allowed , events )
2022-03-04 05:31:19 -05:00
2022-07-11 09:14:09 -04:00
# Turn it into a list and remove None entries before returning.
return [ ev for ev in filtered_events if ev ]
2022-03-04 05:31:19 -05:00
2016-05-11 08:42:37 -04:00
2022-07-11 09:14:09 -04:00
async def filter_event_for_clients_with_state (
store : DataStore ,
2023-09-13 07:57:19 -04:00
user_ids : StrCollection ,
2022-07-11 09:14:09 -04:00
event : EventBase ,
context : EventContext ,
is_peeking : bool = False ,
filter_send_to_client : bool = True ,
2023-09-13 07:57:19 -04:00
) - > StrCollection :
2022-07-11 09:14:09 -04:00
"""
Checks to see if an event is visible to the users in the list at the time of
the event .
2016-05-11 08:42:37 -04:00
2022-07-11 09:14:09 -04:00
Note : This does * not * check if the sender of the event was erased .
2016-05-11 08:42:37 -04:00
2022-07-11 09:14:09 -04:00
Args :
store : databases
user_ids : user_ids to be checked
event : the event to be checked
context : EventContext for the event to be checked
is_peeking : Whether the users are peeking into the room , ie not
currently joined
filter_send_to_client : Whether we ' re checking an event that ' s going to be
sent to a client . This might not always be the case since this function can
also be called to check whether a user can see the state at a given point .
Returns :
Collection of user IDs for whom the event is visible
"""
# None of the users should see the event if it is soft_failed
if event . internal_metadata . is_soft_failed ( ) :
return [ ]
2016-05-11 08:42:37 -04:00
2022-09-30 12:40:33 -04:00
# Fast path if we don't have any user IDs to check.
if not user_ids :
return ( )
2022-07-11 09:14:09 -04:00
# Make a set for all user IDs that haven't been filtered out by a check.
allowed_user_ids = set ( user_ids )
2016-05-11 08:42:37 -04:00
2022-07-11 09:14:09 -04:00
# Only run some checks if these events aren't about to be sent to clients. This is
# because, if this is not the case, we're probably only checking if the users can
# see events in the room at that point in the DAG, and that shouldn't be decided
# on those checks.
if filter_send_to_client :
ignored_by = await store . ignored_by ( event . sender )
retention_policy = await store . get_retention_policy_for_room ( event . room_id )
for user_id in user_ids :
if (
_check_filter_send_to_client (
event ,
store . clock ,
retention_policy ,
sender_ignored = user_id in ignored_by ,
)
== _CheckFilter . DENIED
2017-03-23 14:50:31 -04:00
) :
2022-07-11 09:14:09 -04:00
allowed_user_ids . discard ( user_id )
if event . internal_metadata . outlier :
# Normally these can't be seen by clients, but we make an exception for
# for out-of-band membership events (eg, incoming invites, or rejections of
# said invite) for the user themselves.
if event . type == EventTypes . Member and event . state_key in allowed_user_ids :
logger . debug ( " Returning out-of-band-membership event %s " , event )
return { event . state_key }
return set ( )
# First we get just the history visibility in case its shared/world-readable
# room.
visibility_state_map = await _get_state_map (
store , event , context , StateFilter . from_types ( [ _HISTORY_VIS_KEY ] )
)
2018-06-25 08:42:55 -04:00
2022-07-11 09:14:09 -04:00
visibility = get_effective_room_visibility_from_state ( visibility_state_map )
if (
_check_history_visibility ( event , visibility , is_peeking = is_peeking )
== _CheckVisibility . ALLOWED
) :
return allowed_user_ids
# The history visibility isn't lax, so we now need to fetch the membership
# events of all the users.
filter_list = [ ]
for user_id in allowed_user_ids :
filter_list . append ( ( EventTypes . Member , user_id ) )
filter_list . append ( ( EventTypes . RoomHistoryVisibility , " " ) )
state_filter = StateFilter . from_types ( filter_list )
state_map = await _get_state_map ( store , event , context , state_filter )
# Now we check whether the membership allows each user to see the event.
return {
user_id
for user_id in allowed_user_ids
if _check_membership ( user_id , event , visibility , state_map , is_peeking ) . allowed
}
async def _get_state_map (
store : DataStore , event : EventBase , context : EventContext , state_filter : StateFilter
) - > StateMap [ EventBase ] :
""" Helper function for getting a `StateMap[EventBase]` from an `EventContext` """
state_map = await context . get_prev_state_ids ( state_filter )
# Use events rather than event ids as content from the events are needed in
# _check_visibility
event_map = await store . get_events ( state_map . values ( ) , get_prev_content = False )
updated_state_map = { }
for state_key , event_id in state_map . items ( ) :
state_event = event_map . get ( event_id )
if state_event :
updated_state_map [ state_key ] = state_event
if event . is_state ( ) :
current_state_key = ( event . type , event . state_key )
# Add current event to updated_state_map, we need to do this here as it
# may not have been persisted to the db yet
updated_state_map [ current_state_key ] = event
return updated_state_map
2016-05-11 08:42:37 -04:00
2022-07-11 09:14:09 -04:00
def _check_client_allowed_to_see_event (
user_id : str ,
event : EventBase ,
clock : Clock ,
filter_send_to_client : bool ,
is_peeking : bool ,
always_include_ids : FrozenSet [ str ] ,
sender_ignored : bool ,
retention_policy : RetentionPolicy ,
state : Optional [ StateMap [ EventBase ] ] ,
sender_erased : bool ,
) - > Optional [ EventBase ] :
""" Check with the given user is allowed to see the given event
See ` filter_events_for_client ` for details about args
Args :
user_id
event
clock
filter_send_to_client
is_peeking
always_include_ids
sender_ignored : Whether the user is ignoring the event sender
retention_policy : The retention policy of the room
state : The state at the event , unless its an outlier
sender_erased : Whether the event sender has been marked as " erased "
Returns :
None if the user cannot see this event at all
a redacted copy of the event if they can only see a redacted
version
the original event if they can see it as normal .
"""
# Only run some checks if these events aren't about to be sent to clients. This is
# because, if this is not the case, we're probably only checking if the users can
# see events in the room at that point in the DAG, and that shouldn't be decided
# on those checks.
if filter_send_to_client :
if (
_check_filter_send_to_client ( event , clock , retention_policy , sender_ignored )
== _CheckFilter . DENIED
) :
2023-06-01 22:27:18 -04:00
filtered_event_logger . debug (
2022-10-17 17:02:39 -04:00
" _check_client_allowed_to_see_event(event= %s ): Filtered out event because `_check_filter_send_to_client` returned `_CheckFilter.DENIED` " ,
event . event_id ,
)
2018-06-25 08:42:55 -04:00
return None
2022-07-11 09:14:09 -04:00
if event . event_id in always_include_ids :
return event
# we need to handle outliers separately, since we don't have the room state.
if event . internal_metadata . outlier :
# Normally these can't be seen by clients, but we make an exception for
# for out-of-band membership events (eg, incoming invites, or rejections of
# said invite) for the user themselves.
if event . type == EventTypes . Member and event . state_key == user_id :
2022-10-17 17:02:39 -04:00
logger . debug (
" _check_client_allowed_to_see_event(event= %s ): Returning out-of-band-membership event %s " ,
event . event_id ,
event ,
)
2022-07-11 09:14:09 -04:00
return event
2023-06-01 22:27:18 -04:00
filtered_event_logger . debug (
2022-10-17 17:02:39 -04:00
" _check_client_allowed_to_see_event(event= %s ): Filtered out event because it ' s an outlier " ,
event . event_id ,
)
2022-07-11 09:14:09 -04:00
return None
if state is None :
raise Exception ( " Missing state for non-outlier event " )
# get the room_visibility at the time of the event.
visibility = get_effective_room_visibility_from_state ( state )
2018-06-25 08:42:55 -04:00
2022-07-11 09:14:09 -04:00
# Check if the room has lax history visibility, allowing us to skip
# membership checks.
#
# We can only do this check if the sender has *not* been erased, as if they
# have we need to check the user's membership.
if (
not sender_erased
and _check_history_visibility ( event , visibility , is_peeking )
== _CheckVisibility . ALLOWED
) :
2018-06-25 08:42:55 -04:00
return event
2022-07-11 09:14:09 -04:00
membership_result = _check_membership ( user_id , event , visibility , state , is_peeking )
if not membership_result . allowed :
2023-06-01 22:27:18 -04:00
filtered_event_logger . debug (
2022-10-17 17:02:39 -04:00
" _check_client_allowed_to_see_event(event= %s ): Filtered out event because the user can ' t see the event because of their membership, membership_result.allowed= %s membership_result.joined= %s " ,
event . event_id ,
membership_result . allowed ,
membership_result . joined ,
)
2022-07-11 09:14:09 -04:00
return None
2018-06-25 08:42:55 -04:00
2022-07-11 09:14:09 -04:00
# If the sender has been erased and the user was not joined at the time, we
# must only return the redacted form.
if sender_erased and not membership_result . joined :
2023-06-01 22:27:18 -04:00
filtered_event_logger . debug (
2022-10-17 17:02:39 -04:00
" _check_client_allowed_to_see_event(event= %s ): Returning pruned event because `sender_erased` and the user was not joined at the time " ,
event . event_id ,
)
2022-07-11 09:14:09 -04:00
event = prune_event ( event )
return event
@attr.s ( frozen = True , slots = True , auto_attribs = True )
class _CheckMembershipReturn :
" Return value of _check_membership "
allowed : bool
joined : bool
def _check_membership (
user_id : str ,
event : EventBase ,
visibility : str ,
state : StateMap [ EventBase ] ,
is_peeking : bool ,
) - > _CheckMembershipReturn :
""" Check whether the user can see the event due to their membership
Returns :
True if they can , False if they can ' t, plus the membership of the user
at the event .
"""
# If the event is the user's own membership event, use the 'most joined'
# membership
membership = None
if event . type == EventTypes . Member and event . state_key == user_id :
membership = event . content . get ( " membership " , None )
if membership not in MEMBERSHIP_PRIORITY :
membership = " leave "
prev_content = event . unsigned . get ( " prev_content " , { } )
prev_membership = prev_content . get ( " membership " , None )
if prev_membership not in MEMBERSHIP_PRIORITY :
prev_membership = " leave "
# Always allow the user to see their own leave events, otherwise
# they won't see the room disappear if they reject the invite
#
# (Note this doesn't work for out-of-band invite rejections, which don't
# have prev_state populated. They are handled above in the outlier code.)
if membership == " leave " and (
prev_membership == " join " or prev_membership == " invite "
) :
return _CheckMembershipReturn ( True , membership == Membership . JOIN )
new_priority = MEMBERSHIP_PRIORITY . index ( membership )
old_priority = MEMBERSHIP_PRIORITY . index ( prev_membership )
if old_priority < new_priority :
membership = prev_membership
# otherwise, get the user's membership at the time of the event.
if membership is None :
membership_event = state . get ( ( EventTypes . Member , user_id ) , None )
if membership_event :
membership = membership_event . membership
# if the user was a member of the room at the time of the event,
# they can see it.
if membership == Membership . JOIN :
return _CheckMembershipReturn ( True , True )
# otherwise, it depends on the room visibility.
if visibility == HistoryVisibility . JOINED :
# we weren't a member at the time of the event, so we can't
# see this event.
return _CheckMembershipReturn ( False , False )
elif visibility == HistoryVisibility . INVITED :
# user can also see the event if they were *invited* at the time
# of the event.
return _CheckMembershipReturn ( membership == Membership . INVITE , False )
elif visibility == HistoryVisibility . SHARED and is_peeking :
# if the visibility is shared, users cannot see the event unless
# they have *subsequently* joined the room (or were members at the
# time, of course)
#
# XXX: if the user has subsequently joined and then left again,
# ideally we would share history up to the point they left. But
# we don't know when they left. We just treat it as though they
# never joined, and restrict access.
return _CheckMembershipReturn ( False , False )
# The visibility is either shared or world_readable, and the user was
# not a member at the time. We allow it.
return _CheckMembershipReturn ( True , False )
class _CheckFilter ( Enum ) :
MAYBE_ALLOWED = auto ( )
DENIED = auto ( )
def _check_filter_send_to_client (
event : EventBase ,
clock : Clock ,
retention_policy : RetentionPolicy ,
sender_ignored : bool ,
) - > _CheckFilter :
""" Apply checks for sending events to client
Returns :
True if might be allowed to be sent to clients , False if definitely not .
"""
if event . type == EventTypes . Dummy :
return _CheckFilter . DENIED
if not event . is_state ( ) and sender_ignored :
return _CheckFilter . DENIED
# Until MSC2261 has landed we can't redact malicious alias events, so for
# now we temporarily filter out m.room.aliases entirely to mitigate
# abuse, while we spec a better solution to advertising aliases
# on rooms.
if event . type == EventTypes . Aliases :
return _CheckFilter . DENIED
# Don't try to apply the room's retention policy if the event is a state
# event, as MSC1763 states that retention is only considered for non-state
# events.
if not event . is_state ( ) :
max_lifetime = retention_policy . max_lifetime
if max_lifetime is not None :
oldest_allowed_ts = clock . time_msec ( ) - max_lifetime
if event . origin_server_ts < oldest_allowed_ts :
return _CheckFilter . DENIED
return _CheckFilter . MAYBE_ALLOWED
class _CheckVisibility ( Enum ) :
ALLOWED = auto ( )
MAYBE_DENIED = auto ( )
def _check_history_visibility (
event : EventBase , visibility : str , is_peeking : bool
) - > _CheckVisibility :
""" Check if event is allowed to be seen due to lax history visibility.
Returns :
True if user can definitely see the event , False if maybe not .
"""
# Always allow history visibility events on boundaries. This is done
# by setting the effective visibility to the least restrictive
# of the old vs new.
if event . type == EventTypes . RoomHistoryVisibility :
prev_content = event . unsigned . get ( " prev_content " , { } )
prev_visibility = prev_content . get ( " history_visibility " , None )
if prev_visibility not in VISIBILITY_PRIORITY :
prev_visibility = HistoryVisibility . SHARED
new_priority = VISIBILITY_PRIORITY . index ( visibility )
old_priority = VISIBILITY_PRIORITY . index ( prev_visibility )
if old_priority < new_priority :
visibility = prev_visibility
if visibility == HistoryVisibility . SHARED and not is_peeking :
return _CheckVisibility . ALLOWED
elif visibility == HistoryVisibility . WORLD_READABLE :
return _CheckVisibility . ALLOWED
return _CheckVisibility . MAYBE_DENIED
2018-07-16 06:38:45 -04:00
2022-06-01 07:29:51 -04:00
def get_effective_room_visibility_from_state ( state : StateMap [ EventBase ] ) - > str :
""" Get the actual history vis, from a state map including the history_visibility event
Handles missing and invalid history visibility events .
"""
visibility_event = state . get ( _HISTORY_VIS_KEY , None )
if not visibility_event :
return HistoryVisibility . SHARED
visibility = visibility_event . content . get (
" history_visibility " , HistoryVisibility . SHARED
)
if visibility not in VISIBILITY_PRIORITY :
visibility = HistoryVisibility . SHARED
return visibility
2020-07-27 12:32:08 -04:00
async def filter_events_for_server (
2022-05-31 08:17:50 -04:00
storage : StorageControllers ,
2022-11-21 10:46:14 -05:00
target_server_name : str ,
local_server_name : str ,
2023-03-10 10:31:25 -05:00
events : Sequence [ EventBase ] ,
* ,
redact : bool ,
filter_out_erased_senders : bool ,
filter_out_remote_partial_state_events : bool ,
2021-03-24 06:49:01 -04:00
) - > List [ EventBase ] :
2023-03-10 10:31:25 -05:00
""" Filter a list of events based on whether the target server is allowed to
2019-02-20 11:54:35 -05:00
see them .
2023-03-10 10:31:25 -05:00
For a fully stated room , the target server is allowed to see an event E if :
- the state at E has world readable or shared history vis , OR
- the state at E says that the target server is in the room .
For a partially stated room , the target server is allowed to see E if :
- E was created by this homeserver , AND :
- the partial state at E has world readable or shared history vis , OR
- the partial state at E says that the target server is in the room .
TODO : state before or state after ?
2019-02-20 11:54:35 -05:00
Args :
2019-10-23 12:25:54 -04:00
storage
2023-03-10 10:31:25 -05:00
target_server_name
local_server_name
2021-03-24 06:49:01 -04:00
events
2023-03-10 10:31:25 -05:00
redact : Controls what to do with events which have been filtered out .
If True , include their redacted forms ; if False , omit them entirely .
filter_out_erased_senders : If true , also filter out events whose sender has been
2019-03-04 09:34:34 -05:00
erased . This is used e . g . during pagination to decide whether to
backfill or not .
2023-03-10 10:31:25 -05:00
filter_out_remote_partial_state_events : If True , also filter out events in
partial state rooms created by other homeservers .
2019-02-20 11:54:35 -05:00
Returns
2021-03-24 06:49:01 -04:00
The filtered events .
2019-02-20 11:54:35 -05:00
"""
2018-07-16 10:22:27 -04:00
2023-03-10 10:31:25 -05:00
def is_sender_erased ( event : EventBase , erased_senders : Mapping [ str , bool ] ) - > bool :
2019-03-04 11:04:04 -05:00
if erased_senders and erased_senders [ event . sender ] :
2018-07-16 10:22:27 -04:00
logger . info ( " Sender of %s has been erased, redacting " , event . event_id )
2019-03-04 09:34:34 -05:00
return True
return False
2018-07-16 10:22:27 -04:00
2022-03-31 13:39:34 -04:00
def check_event_is_visible (
visibility : str , memberships : StateMap [ EventBase ]
) - > bool :
if visibility not in ( HistoryVisibility . INVITED , HistoryVisibility . JOINED ) :
return True
2018-07-16 06:38:45 -04:00
2022-03-31 13:39:34 -04:00
# We now loop through all membership events looking for
# membership states for the requesting server to determine
# if the server is either in the room or has been invited
# into the room.
for ev in memberships . values ( ) :
2022-11-21 10:46:14 -05:00
assert get_domain_from_id ( ev . state_key ) == target_server_name
2022-03-31 13:39:34 -04:00
memtype = ev . membership
if memtype == Membership . JOIN :
return True
elif memtype == Membership . INVITE :
if visibility == HistoryVisibility . INVITED :
return True
# server has no users in the room: redact
return False
2018-07-16 06:38:45 -04:00
2023-03-10 10:31:25 -05:00
if filter_out_erased_senders :
2021-07-19 10:28:05 -04:00
erased_senders = await storage . main . are_users_erased ( e . sender for e in events )
2019-03-04 09:34:34 -05:00
else :
# We don't want to check whether users are erased, which is equivalent
# to no users having been erased.
erased_senders = { }
2022-11-21 10:46:14 -05:00
# Filter out non-local events when we are in the middle of a partial join, since our servers
# list can be out of date and we could leak events to servers not in the room anymore.
# This can also be true for local events but we consider it to be an acceptable risk.
# We do this check as a first step and before retrieving membership events because
# otherwise a room could be fully joined after we retrieve those, which would then bypass
# this check but would base the filtering on an outdated view of the membership events.
2023-03-10 10:31:25 -05:00
partial_state_invisible_event_ids : Set [ str ] = set ( )
if filter_out_remote_partial_state_events :
2022-11-21 10:46:14 -05:00
for e in events :
sender_domain = get_domain_from_id ( e . sender )
if (
sender_domain != local_server_name
and await storage . main . is_partial_state_room ( e . room_id )
) :
2023-03-10 10:31:25 -05:00
partial_state_invisible_event_ids . add ( e . event_id )
2022-11-21 10:46:14 -05:00
2022-03-31 13:39:34 -04:00
# Let's check to see if all the events have a history visibility
# of "shared" or "world_readable". If that's the case then we don't
# need to check membership (as we know the server is in the room).
event_to_history_vis = await _event_to_history_vis ( storage , events )
# for any with restricted vis, we also need the memberships
event_to_memberships = await _event_to_memberships (
storage ,
[
e
for e in events
if event_to_history_vis [ e . event_id ]
not in ( HistoryVisibility . SHARED , HistoryVisibility . WORLD_READABLE )
] ,
2022-11-21 10:46:14 -05:00
target_server_name ,
2022-03-31 13:39:34 -04:00
)
2023-03-10 10:31:25 -05:00
def include_event_in_output ( e : EventBase ) - > bool :
2022-03-31 13:39:34 -04:00
erased = is_sender_erased ( e , erased_senders )
visible = check_event_is_visible (
event_to_history_vis [ e . event_id ] , event_to_memberships . get ( e . event_id , { } )
)
2022-11-21 10:46:14 -05:00
2023-03-10 10:31:25 -05:00
if e . event_id in partial_state_invisible_event_ids :
2022-11-21 10:46:14 -05:00
visible = False
2023-03-10 10:31:25 -05:00
return visible and not erased
to_return = [ ]
for e in events :
if include_event_in_output ( e ) :
2022-03-31 13:39:34 -04:00
to_return . append ( e )
elif redact :
to_return . append ( prune_event ( e ) )
return to_return
async def _event_to_history_vis (
2022-05-31 08:17:50 -04:00
storage : StorageControllers , events : Collection [ EventBase ]
2022-03-31 13:39:34 -04:00
) - > Dict [ str , str ] :
""" Get the history visibility at each of the given events
Returns a map from event id to history_visibility setting
"""
# outliers get special treatment here. We don't have the state at that point in the
# room (and attempting to look it up will raise an exception), so all we can really
# do is assume that the requesting server is allowed to see the event. That's
# equivalent to there not being a history_visibility event, so we just exclude
# any outliers from the query.
event_to_state_ids = await storage . state . get_state_ids_for_events (
frozenset ( e . event_id for e in events if not e . internal_metadata . is_outlier ( ) ) ,
state_filter = StateFilter . from_types ( types = ( _HISTORY_VIS_KEY , ) ) ,
)
visibility_ids = {
vis_event_id
for vis_event_id in (
state_ids . get ( _HISTORY_VIS_KEY ) for state_ids in event_to_state_ids . values ( )
)
if vis_event_id
}
vis_events = await storage . main . get_events ( visibility_ids )
result : Dict [ str , str ] = { }
for event in events :
vis = HistoryVisibility . SHARED
state_ids = event_to_state_ids . get ( event . event_id )
# if we didn't find any state for this event, it's an outlier, and we assume
# it's open
visibility_id = None
if state_ids :
visibility_id = state_ids . get ( _HISTORY_VIS_KEY )
if visibility_id :
vis_event = vis_events [ visibility_id ]
vis = vis_event . content . get ( " history_visibility " , HistoryVisibility . SHARED )
assert isinstance ( vis , str )
result [ event . event_id ] = vis
return result
async def _event_to_memberships (
2022-05-31 08:17:50 -04:00
storage : StorageControllers , events : Collection [ EventBase ] , server_name : str
2022-03-31 13:39:34 -04:00
) - > Dict [ str , StateMap [ EventBase ] ] :
""" Get the remote membership list at each of the given events
Returns a map from event id to state map , which will contain only membership events
for the given server .
"""
if not events :
return { }
# for each event, get the event_ids of the membership state at those events.
2022-04-26 05:27:11 -04:00
#
# TODO: this means that we request the entire membership list. If there are only
# one or two users on this server, and the room is huge, this is very wasteful
# (it means more db work, and churns the *stateGroupMembersCache*).
# It might be that we could extend StateFilter to specify "give me keys matching
# *:<server_name>", to avoid this.
2020-07-27 12:32:08 -04:00
event_to_state_ids = await storage . state . get_state_ids_for_events (
2018-07-16 06:38:45 -04:00
frozenset ( e . event_id for e in events ) ,
2022-03-31 13:39:34 -04:00
state_filter = StateFilter . from_types ( types = ( ( EventTypes . Member , None ) , ) ) ,
2018-07-16 06:38:45 -04:00
)
# We only want to pull out member events that correspond to the
# server's domain.
2018-07-13 11:32:46 -04:00
#
# event_to_state_ids contains lots of duplicates, so it turns out to be
2018-10-08 08:44:58 -04:00
# cheaper to build a complete event_id => (type, state_key) dict, and then
# filter out the ones we don't want
2018-07-13 11:32:46 -04:00
#
2018-10-08 08:44:58 -04:00
event_id_to_state_key = {
event_id : key
2020-06-15 07:03:36 -04:00
for key_to_eid in event_to_state_ids . values ( )
for key , event_id in key_to_eid . items ( )
2018-07-13 11:32:46 -04:00
}
2018-07-16 06:38:45 -04:00
2022-03-31 13:39:34 -04:00
def include ( state_key : str ) - > bool :
2018-07-17 06:13:57 -04:00
# we avoid using get_domain_from_id here for efficiency.
2018-07-13 11:32:46 -04:00
idx = state_key . find ( " : " )
if idx == - 1 :
2018-07-16 06:38:45 -04:00
return False
2018-07-13 11:32:46 -04:00
return state_key [ idx + 1 : ] == server_name
2019-06-20 05:32:02 -04:00
2020-07-27 12:32:08 -04:00
event_map = await storage . main . get_events (
2022-03-31 13:39:34 -04:00
[
e_id
for e_id , ( _ , state_key ) in event_id_to_state_key . items ( )
if include ( state_key )
]
2018-07-16 06:38:45 -04:00
)
2022-03-31 13:39:34 -04:00
return {
2018-07-16 06:38:45 -04:00
e_id : {
key : event_map [ inner_e_id ]
2020-06-15 07:03:36 -04:00
for key , inner_e_id in key_to_eid . items ( )
2018-07-16 06:38:45 -04:00
if inner_e_id in event_map
}
2020-06-15 07:03:36 -04:00
for e_id , key_to_eid in event_to_state_ids . items ( )
2018-07-16 06:38:45 -04:00
}