forked-synapse/synapse/handlers/room_list.py
Erik Johnston 23740eaa3d
Correctly mention previous copyright (#16820)
During the migration the automated script to update the copyright
headers accidentally got rid of some of the existing copyright lines.
Reinstate them.
2024-01-23 11:26:48 +00:00

623 lines
23 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright 2014 - 2016 OpenMarket Ltd
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# [This file includes modifications made by New Vector Limited]
#
#
import logging
from typing import TYPE_CHECKING, Any, List, Optional, Tuple
import attr
import msgpack
from unpaddedbase64 import decode_base64, encode_base64
from synapse.api.constants import (
EventContentFields,
EventTypes,
GuestAccess,
HistoryVisibility,
JoinRules,
PublicRoomsFilterFields,
)
from synapse.api.errors import (
Codes,
HttpResponseException,
RequestSendFailed,
SynapseError,
)
from synapse.storage.databases.main.room import LargestRoomStats
from synapse.types import JsonDict, JsonMapping, ThirdPartyInstanceID
from synapse.util.caches.descriptors import _CacheContext, cached
from synapse.util.caches.response_cache import ResponseCache
if TYPE_CHECKING:
from synapse.server import HomeServer
logger = logging.getLogger(__name__)
REMOTE_ROOM_LIST_POLL_INTERVAL = 60 * 1000
# This is used to indicate we should only return rooms published to the main list.
EMPTY_THIRD_PARTY_ID = ThirdPartyInstanceID(None, None)
# Maximum number of local public rooms returned over the CS or SS API
MAX_PUBLIC_ROOMS_IN_RESPONSE = 100
class RoomListHandler:
def __init__(self, hs: "HomeServer"):
self.store = hs.get_datastores().main
self._storage_controllers = hs.get_storage_controllers()
self.hs = hs
self.enable_room_list_search = hs.config.roomdirectory.enable_room_list_search
self.response_cache: ResponseCache[
Tuple[Optional[int], Optional[str], Optional[ThirdPartyInstanceID]]
] = ResponseCache(hs.get_clock(), "room_list")
self.remote_response_cache: ResponseCache[
Tuple[str, Optional[int], Optional[str], bool, Optional[str]]
] = ResponseCache(hs.get_clock(), "remote_room_list", timeout_ms=30 * 1000)
async def get_local_public_room_list(
self,
limit: Optional[int] = None,
since_token: Optional[str] = None,
search_filter: Optional[dict] = None,
network_tuple: Optional[ThirdPartyInstanceID] = EMPTY_THIRD_PARTY_ID,
from_federation_origin: Optional[str] = None,
) -> JsonDict:
"""Generate a local public room list.
There are multiple different lists: the main one plus one per third
party network. A client can ask for a specific list or to return all.
Args:
limit
since_token
search_filter
network_tuple: Which public list to use.
This can be (None, None) to indicate the main list, or a particular
appservice and network id to use an appservice specific one.
Setting to None returns all public rooms across all lists.
from_federation_origin: the server name of the requester, or None
if the request is not from federation.
"""
if not self.enable_room_list_search:
return {"chunk": [], "total_room_count_estimate": 0}
logger.info(
"Getting public room list: limit=%r, since=%r, search=%r, network=%r",
limit,
since_token,
bool(search_filter),
network_tuple,
)
capped_limit: int = (
MAX_PUBLIC_ROOMS_IN_RESPONSE
if limit is None or limit > MAX_PUBLIC_ROOMS_IN_RESPONSE
else limit
)
if search_filter or from_federation_origin is not None:
# We explicitly don't bother caching searches or requests for
# appservice specific lists.
# We also don't bother caching requests from federated homeservers.
logger.debug("Bypassing cache as search or federation request.")
return await self._get_public_room_list(
capped_limit,
since_token,
search_filter,
network_tuple=network_tuple,
from_federation_origin=from_federation_origin,
)
key = (capped_limit, since_token, network_tuple)
return await self.response_cache.wrap(
key,
self._get_public_room_list,
capped_limit,
since_token,
network_tuple=network_tuple,
from_federation_origin=from_federation_origin,
)
async def _get_public_room_list(
self,
limit: int,
since_token: Optional[str] = None,
search_filter: Optional[dict] = None,
network_tuple: Optional[ThirdPartyInstanceID] = EMPTY_THIRD_PARTY_ID,
from_federation_origin: Optional[str] = None,
) -> JsonDict:
"""Generate a public room list.
Args:
limit: Maximum amount of rooms to return.
since_token:
search_filter: Dictionary to filter rooms by.
network_tuple: Which public list to use.
This can be (None, None) to indicate the main list, or a particular
appservice and network id to use an appservice specific one.
Setting to None returns all public rooms across all lists.
from_federation_origin: the server name of the requester, or None
if the request is not from federation.
"""
# Pagination tokens work by storing the room ID sent in the last batch,
# plus the direction (forwards or backwards). Next batch tokens always
# go forwards, prev batch tokens always go backwards.
if since_token:
batch_token = RoomListNextBatch.from_token(since_token)
bounds: Optional[Tuple[int, str]] = (
batch_token.last_joined_members,
batch_token.last_room_id,
)
forwards = batch_token.direction_is_forward
has_batch_token = True
else:
bounds = None
forwards = True
has_batch_token = False
if from_federation_origin is None:
# Client-Server API:
# we request one more than wanted to see if there are more pages to come
probing_limit = limit + 1
else:
# Federation API:
# we request a handful more in case any get filtered out by ACLs
# as a best easy effort attempt to return the full number of entries
# specified by `limit`.
probing_limit = limit + 10
results = await self.store.get_largest_public_rooms(
network_tuple,
search_filter,
probing_limit,
bounds=bounds,
forwards=forwards,
ignore_non_federatable=from_federation_origin is not None,
)
def build_room_entry(room: LargestRoomStats) -> JsonDict:
entry = {
"room_id": room.room_id,
"name": room.name,
"topic": room.topic,
"canonical_alias": room.canonical_alias,
"num_joined_members": room.joined_members,
"avatar_url": room.avatar,
"world_readable": room.history_visibility
== HistoryVisibility.WORLD_READABLE,
"guest_can_join": room.guest_access == "can_join",
"join_rule": room.join_rules,
"room_type": room.room_type,
}
# Filter out Nones rather omit the field altogether
return {k: v for k, v in entry.items() if v is not None}
# Build a list of up to `limit` entries.
room_entries: List[JsonDict] = []
rooms_iterator = results if forwards else reversed(results)
# Track the first and last 'considered' rooms so that we can provide correct
# next_batch/prev_batch tokens.
# This is needed because the loop might finish early when
# `len(room_entries) >= limit` and we might be left with rooms we didn't
# 'consider' (iterate over) and we should save those rooms for the next
# batch.
first_considered_room: Optional[LargestRoomStats] = None
last_considered_room: Optional[LargestRoomStats] = None
cut_off_due_to_limit: bool = False
for room_result in rooms_iterator:
if len(room_entries) >= limit:
cut_off_due_to_limit = True
break
if first_considered_room is None:
first_considered_room = room_result
last_considered_room = room_result
if from_federation_origin is not None:
# If this is a federated request, apply server ACLs if the room has any set
acl_evaluator = (
await self._storage_controllers.state.get_server_acl_for_room(
room_result.room_id
)
)
if acl_evaluator is not None:
if not acl_evaluator.server_matches_acl_event(
from_federation_origin
):
# the requesting server is ACL blocked by the room,
# don't show in directory
continue
room_entries.append(build_room_entry(room_result))
if not forwards:
# If we are paginating backwards, we still return the chunk in
# biggest-first order, so reverse again.
room_entries.reverse()
# Swap the order of first/last considered rooms.
first_considered_room, last_considered_room = (
last_considered_room,
first_considered_room,
)
response: JsonDict = {
"chunk": room_entries,
}
num_results = len(results)
more_to_come_from_database = num_results == probing_limit
if forwards and has_batch_token:
# If there was a token given then we assume that there
# must be previous results, even if there were no results in this batch.
if first_considered_room is not None:
response["prev_batch"] = RoomListNextBatch(
last_joined_members=first_considered_room.joined_members,
last_room_id=first_considered_room.room_id,
direction_is_forward=False,
).to_token()
else:
# If we didn't find any results this time,
# we don't have an actual room ID to put in the token.
# But since `first_considered_room` is None, we know that we have
# reached the end of the results.
# So we can use a token of (0, empty room ID) to paginate from the end
# next time.
response["prev_batch"] = RoomListNextBatch(
last_joined_members=0,
last_room_id="",
direction_is_forward=False,
).to_token()
if num_results > 0:
assert first_considered_room is not None
assert last_considered_room is not None
if forwards:
if more_to_come_from_database or cut_off_due_to_limit:
response["next_batch"] = RoomListNextBatch(
last_joined_members=last_considered_room.joined_members,
last_room_id=last_considered_room.room_id,
direction_is_forward=True,
).to_token()
else: # backwards
if has_batch_token:
response["next_batch"] = RoomListNextBatch(
last_joined_members=last_considered_room.joined_members,
last_room_id=last_considered_room.room_id,
direction_is_forward=True,
).to_token()
if more_to_come_from_database or cut_off_due_to_limit:
response["prev_batch"] = RoomListNextBatch(
last_joined_members=first_considered_room.joined_members,
last_room_id=first_considered_room.room_id,
direction_is_forward=False,
).to_token()
# We can't efficiently count the total number of rooms that are not
# blocked by ACLs, but this is just an estimate so that should be
# good enough.
response["total_room_count_estimate"] = await self.store.count_public_rooms(
network_tuple,
ignore_non_federatable=from_federation_origin is not None,
search_filter=search_filter,
)
return response
@cached(num_args=1, cache_context=True)
async def generate_room_entry(
self,
room_id: str,
num_joined_users: int,
cache_context: _CacheContext,
with_alias: bool = True,
allow_private: bool = False,
) -> Optional[JsonMapping]:
"""Returns the entry for a room
Args:
room_id: The room's ID.
num_joined_users: Number of users in the room.
cache_context: Information for cached responses.
with_alias: Whether to return the room's aliases in the result.
allow_private: Whether invite-only rooms should be shown.
Returns:
Returns a room entry as a dictionary, or None if this
room was determined not to be shown publicly.
"""
result = {"room_id": room_id, "num_joined_members": num_joined_users}
if with_alias:
aliases = await self.store.get_aliases_for_room(
room_id, on_invalidate=cache_context.invalidate
)
if aliases:
result["aliases"] = aliases
current_state_ids = await self._storage_controllers.state.get_current_state_ids(
room_id, on_invalidate=cache_context.invalidate
)
if not current_state_ids:
# We're not in the room, so may as well bail out here.
return result
event_map = await self.store.get_events(
[
event_id
for key, event_id in current_state_ids.items()
if key[0]
in (
EventTypes.Create,
EventTypes.JoinRules,
EventTypes.Name,
EventTypes.Topic,
EventTypes.CanonicalAlias,
EventTypes.RoomHistoryVisibility,
EventTypes.GuestAccess,
"m.room.avatar",
)
]
)
current_state = {(ev.type, ev.state_key): ev for ev in event_map.values()}
# Double check that this is actually a public room.
join_rules_event = current_state.get((EventTypes.JoinRules, ""))
if join_rules_event:
join_rule = join_rules_event.content.get("join_rule", None)
if not allow_private and join_rule and join_rule != JoinRules.PUBLIC:
return None
# Return whether this room is open to federation users or not
create_event = current_state[EventTypes.Create, ""]
result["m.federate"] = create_event.content.get(
EventContentFields.FEDERATE, True
)
name_event = current_state.get((EventTypes.Name, ""))
if name_event:
name = name_event.content.get("name", None)
if name:
result["name"] = name
topic_event = current_state.get((EventTypes.Topic, ""))
if topic_event:
topic = topic_event.content.get("topic", None)
if topic:
result["topic"] = topic
canonical_event = current_state.get((EventTypes.CanonicalAlias, ""))
if canonical_event:
canonical_alias = canonical_event.content.get("alias", None)
if canonical_alias:
result["canonical_alias"] = canonical_alias
visibility_event = current_state.get((EventTypes.RoomHistoryVisibility, ""))
visibility = None
if visibility_event:
visibility = visibility_event.content.get("history_visibility", None)
result["world_readable"] = visibility == HistoryVisibility.WORLD_READABLE
guest_event = current_state.get((EventTypes.GuestAccess, ""))
guest = None
if guest_event:
guest = guest_event.content.get(EventContentFields.GUEST_ACCESS)
result["guest_can_join"] = guest == GuestAccess.CAN_JOIN
avatar_event = current_state.get(("m.room.avatar", ""))
if avatar_event:
avatar_url = avatar_event.content.get("url", None)
if avatar_url:
result["avatar_url"] = avatar_url
return result
async def get_remote_public_room_list(
self,
server_name: str,
limit: Optional[int] = None,
since_token: Optional[str] = None,
search_filter: Optional[dict] = None,
include_all_networks: bool = False,
third_party_instance_id: Optional[str] = None,
) -> JsonDict:
"""Get the public room list from remote server
Raises:
SynapseError
"""
if not self.enable_room_list_search:
return {"chunk": [], "total_room_count_estimate": 0}
if search_filter:
# Searching across federation is defined in MSC2197.
# However, the remote homeserver may or may not actually support it.
# So we first try an MSC2197 remote-filtered search, then fall back
# to a locally-filtered search if we must.
try:
res = await self._get_remote_list_cached(
server_name,
limit=limit,
since_token=since_token,
include_all_networks=include_all_networks,
third_party_instance_id=third_party_instance_id,
search_filter=search_filter,
)
return res
except HttpResponseException as hre:
syn_err = hre.to_synapse_error()
if hre.code in (404, 405) or syn_err.errcode in (
Codes.UNRECOGNIZED,
Codes.NOT_FOUND,
):
logger.debug("Falling back to locally-filtered /publicRooms")
else:
# Not an error that should trigger a fallback.
raise SynapseError(502, "Failed to fetch room list")
except RequestSendFailed:
# Not an error that should trigger a fallback.
raise SynapseError(502, "Failed to fetch room list")
# if we reach this point, then we fall back to the situation where
# we currently don't support searching across federation, so we have
# to do it manually without pagination
limit = None
since_token = None
try:
res = await self._get_remote_list_cached(
server_name,
limit=limit,
since_token=since_token,
include_all_networks=include_all_networks,
third_party_instance_id=third_party_instance_id,
)
except (RequestSendFailed, HttpResponseException):
raise SynapseError(502, "Failed to fetch room list")
if search_filter:
res = {
"chunk": [
entry
for entry in list(res.get("chunk", []))
if _matches_room_entry(entry, search_filter)
]
}
return res
async def _get_remote_list_cached(
self,
server_name: str,
limit: Optional[int] = None,
since_token: Optional[str] = None,
search_filter: Optional[dict] = None,
include_all_networks: bool = False,
third_party_instance_id: Optional[str] = None,
) -> JsonDict:
"""Wrapper around FederationClient.get_public_rooms that caches the
result.
"""
repl_layer = self.hs.get_federation_client()
if search_filter:
# We can't cache when asking for search
return await repl_layer.get_public_rooms(
server_name,
limit=limit,
since_token=since_token,
search_filter=search_filter,
include_all_networks=include_all_networks,
third_party_instance_id=third_party_instance_id,
)
key = (
server_name,
limit,
since_token,
include_all_networks,
third_party_instance_id,
)
return await self.remote_response_cache.wrap(
key,
repl_layer.get_public_rooms,
server_name,
limit=limit,
since_token=since_token,
search_filter=search_filter,
include_all_networks=include_all_networks,
third_party_instance_id=third_party_instance_id,
)
@attr.s(slots=True, frozen=True, auto_attribs=True)
class RoomListNextBatch:
last_joined_members: int # The count to get rooms after/before
last_room_id: str # The room_id to get rooms after/before
direction_is_forward: bool # True if this is a next_batch, false if prev_batch
KEY_DICT = {
"last_joined_members": "m",
"last_room_id": "r",
"direction_is_forward": "d",
}
REVERSE_KEY_DICT = {v: k for k, v in KEY_DICT.items()}
@classmethod
def from_token(cls, token: str) -> "RoomListNextBatch":
decoded = msgpack.loads(decode_base64(token), raw=False)
return RoomListNextBatch(
**{cls.REVERSE_KEY_DICT[key]: val for key, val in decoded.items()}
)
def to_token(self) -> str:
return encode_base64(
msgpack.dumps(
{self.KEY_DICT[key]: val for key, val in attr.asdict(self).items()}
)
)
def copy_and_replace(self, **kwds: Any) -> "RoomListNextBatch":
return attr.evolve(self, **kwds)
def _matches_room_entry(room_entry: JsonDict, search_filter: dict) -> bool:
"""Determines whether the given search filter matches a room entry returned over
federation.
Only used if the remote server does not support MSC2197 remote-filtered search, and
hence does not support MSC3827 filtering of `/publicRooms` by room type either.
In this case, we cannot apply the `room_type` filter since no `room_type` field is
returned.
"""
if search_filter and search_filter.get(
PublicRoomsFilterFields.GENERIC_SEARCH_TERM, None
):
generic_search_term = search_filter[
PublicRoomsFilterFields.GENERIC_SEARCH_TERM
].upper()
if generic_search_term in room_entry.get("name", "").upper():
return True
elif generic_search_term in room_entry.get("topic", "").upper():
return True
elif generic_search_term in room_entry.get("canonical_alias", "").upper():
return True
else:
return True
return False