forked-synapse/synapse/handlers/room_list.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

622 lines
23 KiB
Python
Raw Normal View History

#
2023-11-21 15:29:58 -05:00
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# [This file includes modifications made by New Vector Limited]
#
#
2018-07-09 02:09:20 -04:00
import logging
from typing import TYPE_CHECKING, Any, List, Optional, Tuple
import attr
2018-07-09 02:09:20 -04:00
import msgpack
from unpaddedbase64 import decode_base64, encode_base64
from synapse.api.constants import (
EventContentFields,
EventTypes,
GuestAccess,
HistoryVisibility,
JoinRules,
PublicRoomsFilterFields,
)
from synapse.api.errors import (
Codes,
HttpResponseException,
RequestSendFailed,
SynapseError,
)
from synapse.storage.databases.main.room import LargestRoomStats
from synapse.types import JsonDict, JsonMapping, ThirdPartyInstanceID
from synapse.util.caches.descriptors import _CacheContext, cached
from synapse.util.caches.response_cache import ResponseCache
if TYPE_CHECKING:
from synapse.server import HomeServer
logger = logging.getLogger(__name__)
REMOTE_ROOM_LIST_POLL_INTERVAL = 60 * 1000
# This is used to indicate we should only return rooms published to the main list.
2018-07-13 07:03:34 -04:00
EMPTY_THIRD_PARTY_ID = ThirdPartyInstanceID(None, None)
# Maximum number of local public rooms returned over the CS or SS API
MAX_PUBLIC_ROOMS_IN_RESPONSE = 100
class RoomListHandler:
def __init__(self, hs: "HomeServer"):
self.store = hs.get_datastores().main
self._storage_controllers = hs.get_storage_controllers()
self.hs = hs
self.enable_room_list_search = hs.config.roomdirectory.enable_room_list_search
self.response_cache: ResponseCache[
Tuple[Optional[int], Optional[str], Optional[ThirdPartyInstanceID]]
] = ResponseCache(hs.get_clock(), "room_list")
self.remote_response_cache: ResponseCache[
Tuple[str, Optional[int], Optional[str], bool, Optional[str]]
] = ResponseCache(hs.get_clock(), "remote_room_list", timeout_ms=30 * 1000)
async def get_local_public_room_list(
self,
limit: Optional[int] = None,
since_token: Optional[str] = None,
search_filter: Optional[dict] = None,
network_tuple: Optional[ThirdPartyInstanceID] = EMPTY_THIRD_PARTY_ID,
from_federation_origin: Optional[str] = None,
) -> JsonDict:
"""Generate a local public room list.
There are multiple different lists: the main one plus one per third
party network. A client can ask for a specific list or to return all.
Args:
limit
since_token
search_filter
network_tuple: Which public list to use.
This can be (None, None) to indicate the main list, or a particular
appservice and network id to use an appservice specific one.
Setting to None returns all public rooms across all lists.
from_federation_origin: the server name of the requester, or None
if the request is not from federation.
"""
2019-03-20 10:25:58 -04:00
if not self.enable_room_list_search:
return {"chunk": [], "total_room_count_estimate": 0}
2019-03-20 10:25:28 -04:00
logger.info(
"Getting public room list: limit=%r, since=%r, search=%r, network=%r",
limit,
since_token,
bool(search_filter),
network_tuple,
)
capped_limit: int = (
MAX_PUBLIC_ROOMS_IN_RESPONSE
if limit is None or limit > MAX_PUBLIC_ROOMS_IN_RESPONSE
else limit
)
if search_filter or from_federation_origin is not None:
2016-12-07 04:58:33 -05:00
# We explicitly don't bother caching searches or requests for
# appservice specific lists.
# We also don't bother caching requests from federated homeservers.
logger.debug("Bypassing cache as search or federation request.")
return await self._get_public_room_list(
capped_limit,
since_token,
search_filter,
network_tuple=network_tuple,
from_federation_origin=from_federation_origin,
)
2016-09-16 04:05:11 -04:00
key = (capped_limit, since_token, network_tuple)
return await self.response_cache.wrap(
key,
self._get_public_room_list,
capped_limit,
since_token,
network_tuple=network_tuple,
from_federation_origin=from_federation_origin,
)
async def _get_public_room_list(
self,
limit: int,
since_token: Optional[str] = None,
search_filter: Optional[dict] = None,
network_tuple: Optional[ThirdPartyInstanceID] = EMPTY_THIRD_PARTY_ID,
from_federation_origin: Optional[str] = None,
) -> JsonDict:
"""Generate a public room list.
Args:
limit: Maximum amount of rooms to return.
since_token:
search_filter: Dictionary to filter rooms by.
network_tuple: Which public list to use.
This can be (None, None) to indicate the main list, or a particular
appservice and network id to use an appservice specific one.
Setting to None returns all public rooms across all lists.
from_federation_origin: the server name of the requester, or None
if the request is not from federation.
"""
# Pagination tokens work by storing the room ID sent in the last batch,
# plus the direction (forwards or backwards). Next batch tokens always
# go forwards, prev batch tokens always go backwards.
if since_token:
batch_token = RoomListNextBatch.from_token(since_token)
2017-03-13 05:50:10 -04:00
bounds: Optional[Tuple[int, str]] = (
batch_token.last_joined_members,
batch_token.last_room_id,
)
forwards = batch_token.direction_is_forward
has_batch_token = True
else:
bounds = None
forwards = True
has_batch_token = False
if from_federation_origin is None:
# Client-Server API:
# we request one more than wanted to see if there are more pages to come
probing_limit = limit + 1
else:
# Federation API:
# we request a handful more in case any get filtered out by ACLs
# as a best easy effort attempt to return the full number of entries
# specified by `limit`.
probing_limit = limit + 10
results = await self.store.get_largest_public_rooms(
network_tuple,
search_filter,
probing_limit,
bounds=bounds,
forwards=forwards,
ignore_non_federatable=from_federation_origin is not None,
)
def build_room_entry(room: LargestRoomStats) -> JsonDict:
entry = {
"room_id": room.room_id,
"name": room.name,
"topic": room.topic,
"canonical_alias": room.canonical_alias,
"num_joined_members": room.joined_members,
"avatar_url": room.avatar,
"world_readable": room.history_visibility
== HistoryVisibility.WORLD_READABLE,
"guest_can_join": room.guest_access == "can_join",
"join_rule": room.join_rules,
"room_type": room.room_type,
}
# Filter out Nones rather omit the field altogether
return {k: v for k, v in entry.items() if v is not None}
# Build a list of up to `limit` entries.
room_entries: List[JsonDict] = []
rooms_iterator = results if forwards else reversed(results)
# Track the first and last 'considered' rooms so that we can provide correct
# next_batch/prev_batch tokens.
# This is needed because the loop might finish early when
# `len(room_entries) >= limit` and we might be left with rooms we didn't
# 'consider' (iterate over) and we should save those rooms for the next
# batch.
first_considered_room: Optional[LargestRoomStats] = None
last_considered_room: Optional[LargestRoomStats] = None
cut_off_due_to_limit: bool = False
for room_result in rooms_iterator:
if len(room_entries) >= limit:
cut_off_due_to_limit = True
break
if first_considered_room is None:
first_considered_room = room_result
last_considered_room = room_result
if from_federation_origin is not None:
# If this is a federated request, apply server ACLs if the room has any set
acl_evaluator = (
await self._storage_controllers.state.get_server_acl_for_room(
room_result.room_id
)
)
if acl_evaluator is not None:
if not acl_evaluator.server_matches_acl_event(
from_federation_origin
):
# the requesting server is ACL blocked by the room,
# don't show in directory
continue
room_entries.append(build_room_entry(room_result))
if not forwards:
# If we are paginating backwards, we still return the chunk in
# biggest-first order, so reverse again.
room_entries.reverse()
# Swap the order of first/last considered rooms.
first_considered_room, last_considered_room = (
last_considered_room,
first_considered_room,
)
response: JsonDict = {
"chunk": room_entries,
}
num_results = len(results)
more_to_come_from_database = num_results == probing_limit
if forwards and has_batch_token:
# If there was a token given then we assume that there
# must be previous results, even if there were no results in this batch.
if first_considered_room is not None:
response["prev_batch"] = RoomListNextBatch(
last_joined_members=first_considered_room.joined_members,
last_room_id=first_considered_room.room_id,
direction_is_forward=False,
).to_token()
else:
# If we didn't find any results this time,
# we don't have an actual room ID to put in the token.
# But since `first_considered_room` is None, we know that we have
# reached the end of the results.
# So we can use a token of (0, empty room ID) to paginate from the end
# next time.
response["prev_batch"] = RoomListNextBatch(
last_joined_members=0,
last_room_id="",
direction_is_forward=False,
).to_token()
if num_results > 0:
assert first_considered_room is not None
assert last_considered_room is not None
if forwards:
if more_to_come_from_database or cut_off_due_to_limit:
response["next_batch"] = RoomListNextBatch(
last_joined_members=last_considered_room.joined_members,
last_room_id=last_considered_room.room_id,
direction_is_forward=True,
).to_token()
else: # backwards
if has_batch_token:
response["next_batch"] = RoomListNextBatch(
last_joined_members=last_considered_room.joined_members,
last_room_id=last_considered_room.room_id,
direction_is_forward=True,
).to_token()
if more_to_come_from_database or cut_off_due_to_limit:
response["prev_batch"] = RoomListNextBatch(
last_joined_members=first_considered_room.joined_members,
last_room_id=first_considered_room.room_id,
direction_is_forward=False,
).to_token()
# We can't efficiently count the total number of rooms that are not
# blocked by ACLs, but this is just an estimate so that should be
# good enough.
response["total_room_count_estimate"] = await self.store.count_public_rooms(
network_tuple,
ignore_non_federatable=from_federation_origin is not None,
search_filter=search_filter,
)
return response
@cached(num_args=1, cache_context=True)
async def generate_room_entry(
self,
room_id: str,
num_joined_users: int,
cache_context: _CacheContext,
with_alias: bool = True,
allow_private: bool = False,
) -> Optional[JsonMapping]:
2017-03-13 07:53:26 -04:00
"""Returns the entry for a room
Args:
room_id: The room's ID.
num_joined_users: Number of users in the room.
cache_context: Information for cached responses.
with_alias: Whether to return the room's aliases in the result.
allow_private: Whether invite-only rooms should be shown.
Returns:
Returns a room entry as a dictionary, or None if this
room was determined not to be shown publicly.
2017-03-13 07:53:26 -04:00
"""
result = {"room_id": room_id, "num_joined_members": num_joined_users}
if with_alias:
aliases = await self.store.get_aliases_for_room(
room_id, on_invalidate=cache_context.invalidate
)
if aliases:
result["aliases"] = aliases
current_state_ids = await self._storage_controllers.state.get_current_state_ids(
room_id, on_invalidate=cache_context.invalidate
)
if not current_state_ids:
# We're not in the room, so may as well bail out here.
return result
event_map = await self.store.get_events(
[
event_id
for key, event_id in current_state_ids.items()
if key[0]
in (
EventTypes.Create,
EventTypes.JoinRules,
EventTypes.Name,
EventTypes.Topic,
EventTypes.CanonicalAlias,
EventTypes.RoomHistoryVisibility,
EventTypes.GuestAccess,
"m.room.avatar",
)
]
)
current_state = {(ev.type, ev.state_key): ev for ev in event_map.values()}
# Double check that this is actually a public room.
join_rules_event = current_state.get((EventTypes.JoinRules, ""))
if join_rules_event:
join_rule = join_rules_event.content.get("join_rule", None)
2017-07-10 10:44:15 -04:00
if not allow_private and join_rule and join_rule != JoinRules.PUBLIC:
return None
# Return whether this room is open to federation users or not
create_event = current_state[EventTypes.Create, ""]
result["m.federate"] = create_event.content.get(
EventContentFields.FEDERATE, True
)
name_event = current_state.get((EventTypes.Name, ""))
if name_event:
name = name_event.content.get("name", None)
if name:
result["name"] = name
topic_event = current_state.get((EventTypes.Topic, ""))
if topic_event:
topic = topic_event.content.get("topic", None)
if topic:
result["topic"] = topic
canonical_event = current_state.get((EventTypes.CanonicalAlias, ""))
if canonical_event:
canonical_alias = canonical_event.content.get("alias", None)
if canonical_alias:
result["canonical_alias"] = canonical_alias
visibility_event = current_state.get((EventTypes.RoomHistoryVisibility, ""))
visibility = None
if visibility_event:
visibility = visibility_event.content.get("history_visibility", None)
result["world_readable"] = visibility == HistoryVisibility.WORLD_READABLE
guest_event = current_state.get((EventTypes.GuestAccess, ""))
guest = None
if guest_event:
guest = guest_event.content.get(EventContentFields.GUEST_ACCESS)
result["guest_can_join"] = guest == GuestAccess.CAN_JOIN
avatar_event = current_state.get(("m.room.avatar", ""))
if avatar_event:
avatar_url = avatar_event.content.get("url", None)
if avatar_url:
result["avatar_url"] = avatar_url
return result
async def get_remote_public_room_list(
self,
server_name: str,
limit: Optional[int] = None,
since_token: Optional[str] = None,
search_filter: Optional[dict] = None,
include_all_networks: bool = False,
third_party_instance_id: Optional[str] = None,
) -> JsonDict:
"""Get the public room list from remote server
Raises:
SynapseError
"""
2019-03-20 10:25:58 -04:00
if not self.enable_room_list_search:
return {"chunk": [], "total_room_count_estimate": 0}
2016-09-16 05:24:15 -04:00
if search_filter:
# Searching across federation is defined in MSC2197.
# However, the remote homeserver may or may not actually support it.
# So we first try an MSC2197 remote-filtered search, then fall back
# to a locally-filtered search if we must.
try:
res = await self._get_remote_list_cached(
server_name,
limit=limit,
since_token=since_token,
include_all_networks=include_all_networks,
third_party_instance_id=third_party_instance_id,
search_filter=search_filter,
)
return res
except HttpResponseException as hre:
syn_err = hre.to_synapse_error()
if hre.code in (404, 405) or syn_err.errcode in (
Codes.UNRECOGNIZED,
Codes.NOT_FOUND,
):
logger.debug("Falling back to locally-filtered /publicRooms")
else:
# Not an error that should trigger a fallback.
raise SynapseError(502, "Failed to fetch room list")
except RequestSendFailed:
# Not an error that should trigger a fallback.
raise SynapseError(502, "Failed to fetch room list")
# if we reach this point, then we fall back to the situation where
# we currently don't support searching across federation, so we have
2016-09-16 05:24:15 -04:00
# to do it manually without pagination
limit = None
since_token = None
try:
res = await self._get_remote_list_cached(
server_name,
limit=limit,
since_token=since_token,
include_all_networks=include_all_networks,
third_party_instance_id=third_party_instance_id,
)
except (RequestSendFailed, HttpResponseException):
raise SynapseError(502, "Failed to fetch room list")
2016-09-16 05:19:32 -04:00
if search_filter:
2016-09-16 05:24:15 -04:00
res = {
"chunk": [
2016-09-16 05:19:32 -04:00
entry
2016-09-16 05:24:15 -04:00
for entry in list(res.get("chunk", []))
2016-09-16 05:19:32 -04:00
if _matches_room_entry(entry, search_filter)
2016-09-16 05:24:15 -04:00
]
}
2016-09-16 05:19:32 -04:00
return res
async def _get_remote_list_cached(
2016-09-16 05:31:59 -04:00
self,
server_name: str,
limit: Optional[int] = None,
since_token: Optional[str] = None,
search_filter: Optional[dict] = None,
include_all_networks: bool = False,
third_party_instance_id: Optional[str] = None,
) -> JsonDict:
"""Wrapper around FederationClient.get_public_rooms that caches the
result.
"""
repl_layer = self.hs.get_federation_client()
2016-09-16 05:31:59 -04:00
if search_filter:
# We can't cache when asking for search
return await repl_layer.get_public_rooms(
server_name,
limit=limit,
since_token=since_token,
search_filter=search_filter,
include_all_networks=include_all_networks,
third_party_instance_id=third_party_instance_id,
)
2016-09-16 05:31:59 -04:00
key = (
server_name,
limit,
since_token,
include_all_networks,
third_party_instance_id,
)
return await self.remote_response_cache.wrap(
key,
repl_layer.get_public_rooms,
server_name,
limit=limit,
since_token=since_token,
search_filter=search_filter,
include_all_networks=include_all_networks,
third_party_instance_id=third_party_instance_id,
)
2016-09-16 05:31:59 -04:00
@attr.s(slots=True, frozen=True, auto_attribs=True)
class RoomListNextBatch:
last_joined_members: int # The count to get rooms after/before
last_room_id: str # The room_id to get rooms after/before
direction_is_forward: bool # True if this is a next_batch, false if prev_batch
KEY_DICT = {
"last_joined_members": "m",
"last_room_id": "r",
"direction_is_forward": "d",
}
REVERSE_KEY_DICT = {v: k for k, v in KEY_DICT.items()}
@classmethod
def from_token(cls, token: str) -> "RoomListNextBatch":
decoded = msgpack.loads(decode_base64(token), raw=False)
return RoomListNextBatch(
**{cls.REVERSE_KEY_DICT[key]: val for key, val in decoded.items()}
)
def to_token(self) -> str:
return encode_base64(
msgpack.dumps(
{self.KEY_DICT[key]: val for key, val in attr.asdict(self).items()}
)
)
def copy_and_replace(self, **kwds: Any) -> "RoomListNextBatch":
return attr.evolve(self, **kwds)
2016-09-16 05:19:32 -04:00
def _matches_room_entry(room_entry: JsonDict, search_filter: dict) -> bool:
"""Determines whether the given search filter matches a room entry returned over
federation.
Only used if the remote server does not support MSC2197 remote-filtered search, and
hence does not support MSC3827 filtering of `/publicRooms` by room type either.
In this case, we cannot apply the `room_type` filter since no `room_type` field is
returned.
"""
if search_filter and search_filter.get(
PublicRoomsFilterFields.GENERIC_SEARCH_TERM, None
):
generic_search_term = search_filter[
PublicRoomsFilterFields.GENERIC_SEARCH_TERM
].upper()
if generic_search_term in room_entry.get("name", "").upper():
2016-09-16 05:19:32 -04:00
return True
elif generic_search_term in room_entry.get("topic", "").upper():
2016-09-16 05:19:32 -04:00
return True
elif generic_search_term in room_entry.get("canonical_alias", "").upper():
2016-09-16 05:19:32 -04:00
return True
else:
return True
return False