diff --git a/changelog.d/10981.bugfix b/changelog.d/10981.bugfix new file mode 100644 index 000000000..d7bf66034 --- /dev/null +++ b/changelog.d/10981.bugfix @@ -0,0 +1 @@ +Fix a bug that could leak local users' per-room nicknames and avatars when the user directory is rebuilt. \ No newline at end of file diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index 5f538947e..5c713a732 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -228,10 +228,6 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): is_in_room = await self.is_host_joined(room_id, self.server_name) if is_in_room: - is_public = await self.is_room_world_readable_or_publicly_joinable( - room_id - ) - users_with_profile = await self.get_users_in_room_with_profiles(room_id) # Throw away users excluded from the directory. users_with_profile = { @@ -241,22 +237,33 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): or await self.should_include_local_user_in_dir(user_id) } - # Update each user in the user directory. + # Upsert a user_directory record for each remote user we see. for user_id, profile in users_with_profile.items(): + # Local users are processed separately in + # `_populate_user_directory_users`; there we can read from + # the `profiles` table to ensure we don't leak their per-room + # profiles. It also means we write local users to this table + # exactly once, rather than once for every room they're in. + if self.hs.is_mine_id(user_id): + continue + # TODO `users_with_profile` above reads from the `user_directory` + # table, meaning that `profile` is bespoke to this room. + # and this leaks remote users' per-room profiles to the user directory. await self.update_profile_in_user_dir( user_id, profile.display_name, profile.avatar_url ) - to_insert = set() - + # Now update the room sharing tables to include this room. + is_public = await self.is_room_world_readable_or_publicly_joinable( + room_id + ) if is_public: - for user_id in users_with_profile: - to_insert.add(user_id) - - if to_insert: - await self.add_users_in_public_rooms(room_id, to_insert) - to_insert.clear() + if users_with_profile: + await self.add_users_in_public_rooms( + room_id, users_with_profile.keys() + ) else: + to_insert = set() for user_id in users_with_profile: # We want the set of pairs (L, M) where L and M are # in `users_with_profile` and L is local. diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py index 6884ca9b7..fddfb8db2 100644 --- a/tests/storage/test_user_directory.py +++ b/tests/storage/test_user_directory.py @@ -11,17 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List, Set, Tuple +from typing import Any, Dict, List, Set, Tuple +from unittest import mock from unittest.mock import Mock, patch from twisted.test.proto_helpers import MemoryReactor -from synapse.api.constants import UserTypes +from synapse.api.constants import EventTypes, Membership, UserTypes from synapse.appservice import ApplicationService from synapse.rest import admin from synapse.rest.client import login, register, room from synapse.server import HomeServer from synapse.storage import DataStore +from synapse.storage.roommember import ProfileInfo from synapse.util import Clock from tests.test_utils.event_injection import inject_member_event @@ -52,6 +54,11 @@ class GetUserDirectoryTables: return r async def get_users_in_public_rooms(self) -> List[Tuple[str, str]]: + """Fetch the entire `users_in_public_rooms` table. + + Returns a list of tuples (user_id, room_id) where room_id is public and + contains the user with the given id. + """ r = await self.store.db_pool.simple_select_list( "users_in_public_rooms", None, ("user_id", "room_id") ) @@ -62,6 +69,13 @@ class GetUserDirectoryTables: return retval async def get_users_who_share_private_rooms(self) -> List[Dict[str, str]]: + """Fetch the entire `users_who_share_private_rooms` table. + + Returns a dict containing "user_id", "other_user_id" and "room_id" keys. + The dicts can be flattened to Tuples with the `_compress_shared` method. + (This seems a little awkward---maybe we could clean this up.) + """ + return await self.store.db_pool.simple_select_list( "users_who_share_private_rooms", None, @@ -69,6 +83,10 @@ class GetUserDirectoryTables: ) async def get_users_in_user_directory(self) -> Set[str]: + """Fetch the set of users in the `user_directory` table. + + This is useful when checking we've correctly excluded users from the directory. + """ result = await self.store.db_pool.simple_select_list( "user_directory", None, @@ -76,6 +94,25 @@ class GetUserDirectoryTables: ) return {row["user_id"] for row in result} + async def get_profiles_in_user_directory(self) -> Dict[str, ProfileInfo]: + """Fetch users and their profiles from the `user_directory` table. + + This is useful when we want to inspect display names and avatars. + It's almost the entire contents of the `user_directory` table: the only + thing missing is an unused room_id column. + """ + rows = await self.store.db_pool.simple_select_list( + "user_directory", + None, + ("user_id", "display_name", "avatar_url"), + ) + return { + row["user_id"]: ProfileInfo( + display_name=row["display_name"], avatar_url=row["avatar_url"] + ) + for row in rows + } + class UserDirectoryInitialPopulationTestcase(HomeserverTestCase): """Ensure that rebuilding the directory writes the correct data to the DB. @@ -201,20 +238,6 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase): self.helper.invite(private_room, src=u1, targ=u3, tok=u1_token) self.helper.join(private_room, user=u3, tok=u3_token) - self.get_success(self.store.update_user_directory_stream_pos(None)) - self.get_success(self.store.delete_all_from_user_dir()) - - shares_private = self.get_success( - self.user_dir_helper.get_users_who_share_private_rooms() - ) - public_users = self.get_success( - self.user_dir_helper.get_users_in_public_rooms() - ) - - # Nothing updated yet - self.assertEqual(shares_private, []) - self.assertEqual(public_users, []) - # Do the initial population of the user directory via the background update self._purge_and_rebuild_user_dir() @@ -346,6 +369,45 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase): # Check the AS user is not in the directory. self._check_room_sharing_tables(user, public, private) + def test_population_conceals_private_nickname(self) -> None: + # Make a private room, and set a nickname within + user = self.register_user("aaaa", "pass") + user_token = self.login(user, "pass") + private_room = self.helper.create_room_as(user, is_public=False, tok=user_token) + self.helper.send_state( + private_room, + EventTypes.Member, + state_key=user, + body={"membership": Membership.JOIN, "displayname": "BBBB"}, + tok=user_token, + ) + + # Rebuild the user directory. Make the rescan of the `users` table a no-op + # so we only see the effect of scanning the `room_memberships` table. + async def mocked_process_users(*args: Any, **kwargs: Any) -> int: + await self.store.db_pool.updates._end_background_update( + "populate_user_directory_process_users" + ) + return 1 + + with mock.patch.dict( + self.store.db_pool.updates._background_update_handlers, + populate_user_directory_process_users=mocked_process_users, + ): + self._purge_and_rebuild_user_dir() + + # Local users are ignored by the scan over rooms + users = self.get_success(self.user_dir_helper.get_profiles_in_user_directory()) + self.assertEqual(users, {}) + + # Do a full rebuild including the scan over the `users` table. The local + # user should appear with their profile name. + self._purge_and_rebuild_user_dir() + users = self.get_success(self.user_dir_helper.get_profiles_in_user_directory()) + self.assertEqual( + users, {user: ProfileInfo(display_name="aaaa", avatar_url=None)} + ) + class UserDirectoryStoreTestCase(HomeserverTestCase): def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: