Cache user IDs instead of profile objects (#13573)

The profile objects are never used and increase cache size significantly.
This commit is contained in:
Nick Mills-Barrett 2022-08-23 10:49:59 +01:00 committed by GitHub
parent 37f329c9ad
commit 5e7847dc92
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 57 additions and 54 deletions

View file

@ -835,9 +835,9 @@ class RoomMemberWorkerStore(EventsWorkerStore):
return shared_room_ids or frozenset()
async def get_joined_users_from_state(
async def get_joined_user_ids_from_state(
self, room_id: str, state: StateMap[str], state_entry: "_StateCacheEntry"
) -> Dict[str, ProfileInfo]:
) -> Set[str]:
state_group: Union[object, int] = state_entry.state_group
if not state_group:
# If state_group is None it means it has yet to be assigned a
@ -848,25 +848,25 @@ class RoomMemberWorkerStore(EventsWorkerStore):
assert state_group is not None
with Measure(self._clock, "get_joined_users_from_state"):
return await self._get_joined_users_from_context(
return await self._get_joined_user_ids_from_context(
room_id, state_group, state, context=state_entry
)
@cached(num_args=2, iterable=True, max_entries=100000)
async def _get_joined_users_from_context(
async def _get_joined_user_ids_from_context(
self,
room_id: str,
state_group: Union[object, int],
current_state_ids: StateMap[str],
event: Optional[EventBase] = None,
context: Optional["_StateCacheEntry"] = None,
) -> Dict[str, ProfileInfo]:
) -> Set[str]:
# We don't use `state_group`, it's there so that we can cache based
# on it. However, it's important that it's never None, since two current_states
# with a state_group of None are likely to be different.
assert state_group is not None
users_in_room = {}
users_in_room = set()
member_event_ids = [
e_id
for key, e_id in current_state_ids.items()
@ -879,11 +879,11 @@ class RoomMemberWorkerStore(EventsWorkerStore):
# If we do then we can reuse that result and simply update it with
# any membership changes in `delta_ids`
if context.prev_group and context.delta_ids:
prev_res = self._get_joined_users_from_context.cache.get_immediate(
prev_res = self._get_joined_user_ids_from_context.cache.get_immediate(
(room_id, context.prev_group), None
)
if prev_res and isinstance(prev_res, dict):
users_in_room = dict(prev_res)
if prev_res and isinstance(prev_res, set):
users_in_room = prev_res
member_event_ids = [
e_id
for key, e_id in context.delta_ids.items()
@ -891,7 +891,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
]
for etype, state_key in context.delta_ids:
if etype == EventTypes.Member:
users_in_room.pop(state_key, None)
users_in_room.discard(state_key)
# We check if we have any of the member event ids in the event cache
# before we ask the DB
@ -908,42 +908,41 @@ class RoomMemberWorkerStore(EventsWorkerStore):
ev_entry = event_map.get(event_id)
if ev_entry and not ev_entry.event.rejected_reason:
if ev_entry.event.membership == Membership.JOIN:
users_in_room[ev_entry.event.state_key] = ProfileInfo(
display_name=ev_entry.event.content.get("displayname", None),
avatar_url=ev_entry.event.content.get("avatar_url", None),
)
users_in_room.add(ev_entry.event.state_key)
else:
missing_member_event_ids.append(event_id)
if missing_member_event_ids:
event_to_memberships = await self._get_joined_profiles_from_event_ids(
event_to_memberships = await self._get_user_ids_from_membership_event_ids(
missing_member_event_ids
)
users_in_room.update(row for row in event_to_memberships.values() if row)
users_in_room.update(event_to_memberships.values())
if event is not None and event.type == EventTypes.Member:
if event.membership == Membership.JOIN:
if event.event_id in member_event_ids:
users_in_room[event.state_key] = ProfileInfo(
display_name=event.content.get("displayname", None),
avatar_url=event.content.get("avatar_url", None),
)
users_in_room.add(event.state_key)
return users_in_room
@cached(max_entries=10000)
def _get_joined_profile_from_event_id(
@cached(
max_entries=10000,
# This name matches the old function that has been replaced - the cache name
# is kept here to maintain backwards compatibility.
name="_get_joined_profile_from_event_id",
)
def _get_user_id_from_membership_event_id(
self, event_id: str
) -> Optional[Tuple[str, ProfileInfo]]:
raise NotImplementedError()
@cachedList(
cached_method_name="_get_joined_profile_from_event_id",
cached_method_name="_get_user_id_from_membership_event_id",
list_name="event_ids",
)
async def _get_joined_profiles_from_event_ids(
async def _get_user_ids_from_membership_event_ids(
self, event_ids: Iterable[str]
) -> Dict[str, Optional[Tuple[str, ProfileInfo]]]:
) -> Dict[str, str]:
"""For given set of member event_ids check if they point to a join
event and if so return the associated user and profile info.
@ -958,21 +957,13 @@ class RoomMemberWorkerStore(EventsWorkerStore):
table="room_memberships",
column="event_id",
iterable=event_ids,
retcols=("user_id", "display_name", "avatar_url", "event_id"),
retcols=("user_id", "event_id"),
keyvalues={"membership": Membership.JOIN},
batch_size=1000,
desc="_get_joined_profiles_from_event_ids",
desc="_get_user_ids_from_membership_event_ids",
)
return {
row["event_id"]: (
row["user_id"],
ProfileInfo(
avatar_url=row["avatar_url"], display_name=row["display_name"]
),
)
for row in rows
}
return {row["event_id"]: row["user_id"] for row in rows}
@cached(max_entries=10000)
async def is_host_joined(self, room_id: str, host: str) -> bool:
@ -1131,12 +1122,12 @@ class RoomMemberWorkerStore(EventsWorkerStore):
else:
# The cache doesn't match the state group or prev state group,
# so we calculate the result from first principles.
joined_users = await self.get_joined_users_from_state(
joined_user_ids = await self.get_joined_user_ids_from_state(
room_id, state, state_entry
)
cache.hosts_to_joined_users = {}
for user_id in joined_users:
for user_id in joined_user_ids:
host = intern_string(get_domain_from_id(user_id))
cache.hosts_to_joined_users.setdefault(host, set()).add(user_id)