Sliding Sync: Add more tracing (#17514)

Spawning from looking at a couple traces and wanting a little more info.

Follow-up to github.com/element-hq/synapse/pull/17501

The changes in this PR allow you to find slow Sliding Sync traces ignoring the
`wait_for_events` time. In Jaeger, you can now filter for the `current_sync_for_user`
operation with `RESULT.result=true` indicating that it actually returned non-empty results.

If you want to find traces for your own user, you can use
`RESULT.result=true ARG.sync_config.user="@madlittlemods:matrix.org"`
This commit is contained in:
Eric Eastwood 2024-08-06 11:43:43 -05:00 committed by GitHub
parent bef6568537
commit 1dfa59b238
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 351 additions and 245 deletions

1
changelog.d/17514.misc Normal file
View File

@ -0,0 +1 @@
Add more tracing to experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint.

View File

@ -51,7 +51,14 @@ from synapse.api.errors import SlidingSyncUnknownPosition
from synapse.events import EventBase, StrippedStateEvent
from synapse.events.utils import parse_stripped_state_event, strip_event
from synapse.handlers.relations import BundledAggregations
from synapse.logging.opentracing import log_kv, start_active_span, tag_args, trace
from synapse.logging.opentracing import (
SynapseTags,
log_kv,
set_tag,
start_active_span,
tag_args,
trace,
)
from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary
from synapse.storage.databases.main.state import (
ROOM_UNKNOWN_SENTINEL,
@ -534,6 +541,7 @@ class SlidingSyncHandler:
# Keep track of the rooms that we can display and need to fetch more info about
relevant_room_map: Dict[str, RoomSyncConfig] = {}
if has_lists and sync_config.lists is not None:
with start_active_span("assemble_sliding_window_lists"):
sync_room_map = await self.filter_rooms_relevant_for_sync(
user=sync_config.user,
room_membership_for_user_map=room_membership_for_user_map,
@ -544,7 +552,10 @@ class SlidingSyncHandler:
filtered_sync_room_map = sync_room_map
if list_config.filters is not None:
filtered_sync_room_map = await self.filter_rooms(
sync_config.user, sync_room_map, list_config.filters, to_token
sync_config.user,
sync_room_map,
list_config.filters,
to_token,
)
# Sort the list
@ -554,9 +565,11 @@ class SlidingSyncHandler:
# Find which rooms are partially stated and may need to be filtered out
# depending on the `required_state` requested (see below).
partial_state_room_map = await self.store.is_partial_state_room_batched(
partial_state_room_map = (
await self.store.is_partial_state_room_batched(
filtered_sync_room_map.keys()
)
)
# Since creating the `RoomSyncConfig` takes some work, let's just do it
# once and make a copy whenever we need it.
@ -590,14 +603,19 @@ class SlidingSyncHandler:
# Exclude partially-stated rooms unless the `required_state`
# only has `["m.room.member", "$LAZY"]` for membership
# (lazy-loading room members).
if partial_state_room_map.get(room_id) and not lazy_loading:
if (
partial_state_room_map.get(room_id)
and not lazy_loading
):
continue
# Take the superset of the `RoomSyncConfig` for each room.
#
# Update our `relevant_room_map` with the room we're going
# to display and need to fetch more info about.
existing_room_sync_config = relevant_room_map.get(room_id)
existing_room_sync_config = relevant_room_map.get(
room_id
)
if existing_room_sync_config is not None:
existing_room_sync_config.combine_room_sync_config(
room_sync_config
@ -626,7 +644,11 @@ class SlidingSyncHandler:
# Handle room subscriptions
if has_room_subscriptions and sync_config.room_subscriptions is not None:
for room_id, room_subscription in sync_config.room_subscriptions.items():
with start_active_span("assemble_room_subscriptions"):
for (
room_id,
room_subscription,
) in sync_config.room_subscriptions.items():
room_membership_for_user_at_to_token = (
await self.check_room_subscription_allowed_for_user(
room_id=room_id,
@ -647,10 +669,14 @@ class SlidingSyncHandler:
#
# Update our `relevant_room_map` with the room we're going to display
# and need to fetch more info about.
room_sync_config = RoomSyncConfig.from_room_config(room_subscription)
room_sync_config = RoomSyncConfig.from_room_config(
room_subscription
)
existing_room_sync_config = relevant_room_map.get(room_id)
if existing_room_sync_config is not None:
existing_room_sync_config.combine_room_sync_config(room_sync_config)
existing_room_sync_config.combine_room_sync_config(
room_sync_config
)
else:
relevant_room_map[room_id] = room_sync_config
@ -661,6 +687,7 @@ class SlidingSyncHandler:
# previously.
# Keep track of the rooms that we're going to display and need to fetch more info about
relevant_rooms_to_send_map = relevant_room_map
with start_active_span("filter_relevant_rooms_to_send"):
if from_token:
rooms_should_send = set()
@ -754,13 +781,20 @@ class SlidingSyncHandler:
# Initial sync without a `from_token` starts at `0`
connection_position = 0
return SlidingSyncResult(
sliding_sync_result = SlidingSyncResult(
next_pos=SlidingSyncStreamToken(to_token, connection_position),
lists=lists,
rooms=rooms,
extensions=extensions,
)
# Make it easy to find traces for syncs that aren't empty
set_tag(SynapseTags.RESULT_PREFIX + "result", bool(sliding_sync_result))
set_tag(SynapseTags.FUNC_ARG_PREFIX + "sync_config.user", user_id)
return sliding_sync_result
@trace
async def get_room_membership_for_user_at_to_token(
self,
user: UserID,
@ -1099,6 +1133,7 @@ class SlidingSyncHandler:
return sync_room_id_set
@trace
async def filter_rooms_relevant_for_sync(
self,
user: UserID,
@ -1209,6 +1244,7 @@ class SlidingSyncHandler:
# return None
@trace
async def _bulk_get_stripped_state_for_rooms_from_sync_room_map(
self,
room_ids: StrCollection,
@ -1299,6 +1335,7 @@ class SlidingSyncHandler:
return room_id_to_stripped_state_map
@trace
async def _bulk_get_partial_current_state_content_for_rooms(
self,
content_type: Literal[
@ -1498,6 +1535,7 @@ class SlidingSyncHandler:
# Filter for Direct-Message (DM) rooms
if filters.is_dm is not None:
with start_active_span("filters.is_dm"):
if filters.is_dm:
# Only DM rooms please
filtered_room_id_set = {
@ -1514,10 +1552,12 @@ class SlidingSyncHandler:
}
if filters.spaces is not None:
with start_active_span("filters.spaces"):
raise NotImplementedError()
# Filter for encrypted rooms
if filters.is_encrypted is not None:
with start_active_span("filters.is_encrypted"):
room_id_to_encryption = (
await self._bulk_get_partial_current_state_content_for_rooms(
content_type="room_encryption",
@ -1531,7 +1571,9 @@ class SlidingSyncHandler:
# Make a copy so we don't run into an error: `Set changed size during
# iteration`, when we filter out and remove items
for room_id in filtered_room_id_set.copy():
encryption = room_id_to_encryption.get(room_id, ROOM_UNKNOWN_SENTINEL)
encryption = room_id_to_encryption.get(
room_id, ROOM_UNKNOWN_SENTINEL
)
# Just remove rooms if we can't determine their encryption status
if encryption is ROOM_UNKNOWN_SENTINEL:
@ -1548,6 +1590,7 @@ class SlidingSyncHandler:
# Filter for rooms that the user has been invited to
if filters.is_invite is not None:
with start_active_span("filters.is_invite"):
# Make a copy so we don't run into an error: `Set changed size during
# iteration`, when we filter out and remove items
for room_id in filtered_room_id_set.copy():
@ -1555,7 +1598,8 @@ class SlidingSyncHandler:
# If we're looking for invite rooms, filter out rooms that the user is
# not invited to and vice versa
if (
filters.is_invite and room_for_user.membership != Membership.INVITE
filters.is_invite
and room_for_user.membership != Membership.INVITE
) or (
not filters.is_invite
and room_for_user.membership == Membership.INVITE
@ -1566,6 +1610,7 @@ class SlidingSyncHandler:
# provided in the list. `None` is a valid type for rooms which do not have a
# room type.
if filters.room_types is not None or filters.not_room_types is not None:
with start_active_span("filters.room_types"):
room_id_to_type = (
await self._bulk_get_partial_current_state_content_for_rooms(
content_type="room_type",
@ -1599,6 +1644,7 @@ class SlidingSyncHandler:
filtered_room_id_set.remove(room_id)
if filters.room_name_like is not None:
with start_active_span("filters.room_name_like"):
# TODO: The room name is a bit more sensitive to leak than the
# create/encryption event. Maybe we should consider a better way to fetch
# historical state before implementing this.
@ -1612,10 +1658,8 @@ class SlidingSyncHandler:
# )
raise NotImplementedError()
if filters.tags is not None:
raise NotImplementedError()
if filters.not_tags is not None:
if filters.tags is not None or filters.not_tags is not None:
with start_active_span("filters.tags"):
raise NotImplementedError()
# Assemble a new sync room map but only with the `filtered_room_id_set`
@ -1678,6 +1722,7 @@ class SlidingSyncHandler:
reverse=True,
)
@trace
async def get_current_state_ids_at(
self,
room_id: str,
@ -1742,6 +1787,7 @@ class SlidingSyncHandler:
return state_ids
@trace
async def get_current_state_at(
self,
room_id: str,
@ -1803,6 +1849,15 @@ class SlidingSyncHandler:
"""
user = sync_config.user
set_tag(
SynapseTags.FUNC_ARG_PREFIX + "membership",
room_membership_for_user_at_to_token.membership,
)
set_tag(
SynapseTags.FUNC_ARG_PREFIX + "timeline_limit",
room_sync_config.timeline_limit,
)
# Determine whether we should limit the timeline to the token range.
#
# We should return historical messages (before token range) in the
@ -2070,6 +2125,10 @@ class SlidingSyncHandler:
if StateValues.WILDCARD in room_sync_config.required_state_map.get(
StateValues.WILDCARD, set()
):
set_tag(
SynapseTags.FUNC_ARG_PREFIX + "required_state_wildcard",
True,
)
required_state_filter = StateFilter.all()
# TODO: `StateFilter` currently doesn't support wildcard event types. We're
# currently working around this by returning all state to the client but it
@ -2079,6 +2138,10 @@ class SlidingSyncHandler:
room_sync_config.required_state_map.get(StateValues.WILDCARD)
is not None
):
set_tag(
SynapseTags.FUNC_ARG_PREFIX + "required_state_wildcard_event_type",
True,
)
required_state_filter = StateFilter.all()
else:
required_state_types: List[Tuple[str, Optional[str]]] = []
@ -2086,8 +2149,12 @@ class SlidingSyncHandler:
state_type,
state_key_set,
) in room_sync_config.required_state_map.items():
num_wild_state_keys = 0
lazy_load_room_members = False
num_others = 0
for state_key in state_key_set:
if state_key == StateValues.WILDCARD:
num_wild_state_keys += 1
# `None` is a wildcard in the `StateFilter`
required_state_types.append((state_type, None))
# We need to fetch all relevant people when we're lazy-loading membership
@ -2095,6 +2162,7 @@ class SlidingSyncHandler:
state_type == EventTypes.Member
and state_key == StateValues.LAZY
):
lazy_load_room_members = True
# Everyone in the timeline is relevant
timeline_membership: Set[str] = set()
if timeline_events is not None:
@ -2109,10 +2177,26 @@ class SlidingSyncHandler:
# FIXME: We probably also care about invite, ban, kick, targets, etc
# but the spec only mentions "senders".
elif state_key == StateValues.ME:
num_others += 1
required_state_types.append((state_type, user.to_string()))
else:
num_others += 1
required_state_types.append((state_type, state_key))
set_tag(
SynapseTags.FUNC_ARG_PREFIX
+ "required_state_wildcard_state_key_count",
num_wild_state_keys,
)
set_tag(
SynapseTags.FUNC_ARG_PREFIX + "required_state_lazy",
lazy_load_room_members,
)
set_tag(
SynapseTags.FUNC_ARG_PREFIX + "required_state_other_count",
num_others,
)
required_state_filter = StateFilter.from_types(required_state_types)
# We need this base set of info for the response so let's just fetch it along
@ -2208,6 +2292,8 @@ class SlidingSyncHandler:
if new_bump_event_pos.stream > 0:
bump_stamp = new_bump_event_pos.stream
set_tag(SynapseTags.RESULT_PREFIX + "initial", initial)
return SlidingSyncResult.RoomResult(
name=room_name,
avatar=room_avatar,
@ -2863,6 +2949,7 @@ class SlidingSyncConnectionStore:
return room_status
@trace
async def record_rooms(
self,
sync_config: SlidingSyncConfig,
@ -2938,6 +3025,7 @@ class SlidingSyncConnectionStore:
return new_store_token
@trace
async def mark_token_seen(
self,
sync_config: SlidingSyncConfig,

View File

@ -899,6 +899,9 @@ class SlidingSyncRestServlet(RestServlet):
body = parse_and_validate_json_object_from_request(request, SlidingSyncBody)
# Tag and log useful data to differentiate requests.
set_tag(
"sliding_sync.sync_type", "initial" if from_token is None else "incremental"
)
set_tag("sliding_sync.conn_id", body.conn_id or "")
log_kv(
{
@ -912,6 +915,12 @@ class SlidingSyncRestServlet(RestServlet):
"sliding_sync.room_subscriptions": list(
(body.room_subscriptions or {}).keys()
),
# We also include the number of room subscriptions because logs are
# limited to 1024 characters and the large room ID list above can be cut
# off.
"sliding_sync.num_room_subscriptions": len(
(body.room_subscriptions or {}).keys()
),
}
)

View File

@ -39,6 +39,7 @@ from typing import (
import attr
from synapse.api.constants import EventTypes, Membership
from synapse.logging.opentracing import trace
from synapse.metrics import LaterGauge
from synapse.metrics.background_process_metrics import wrap_as_background_process
from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
@ -422,6 +423,7 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore):
return invite
return None
@trace
async def get_rooms_for_local_user_where_membership_is(
self,
user_id: str,

View File

@ -24,6 +24,7 @@ from typing import List, Optional, Tuple
import attr
from synapse.logging.opentracing import trace
from synapse.storage._base import SQLBaseStore
from synapse.storage.database import LoggingTransaction
from synapse.storage.databases.main.stream import _filter_results_by_stream
@ -159,6 +160,7 @@ class StateDeltasStore(SQLBaseStore):
self._get_max_stream_id_in_current_state_deltas_txn,
)
@trace
async def get_current_state_deltas_for_room(
self, room_id: str, from_token: RoomStreamToken, to_token: RoomStreamToken
) -> List[StateDelta]:

View File

@ -67,7 +67,7 @@ from synapse.api.constants import Direction, EventTypes, Membership
from synapse.api.filtering import Filter
from synapse.events import EventBase
from synapse.logging.context import make_deferred_yieldable, run_in_background
from synapse.logging.opentracing import trace
from synapse.logging.opentracing import tag_args, trace
from synapse.storage._base import SQLBaseStore
from synapse.storage.database import (
DatabasePool,
@ -812,6 +812,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
return ret, key
@trace
async def get_current_state_delta_membership_changes_for_user(
self,
user_id: str,
@ -1186,6 +1187,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
return None
@trace
async def get_last_event_pos_in_room_before_stream_ordering(
self,
room_id: str,
@ -1940,6 +1942,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
return rows, next_token
@trace
@tag_args
async def paginate_room_events(
self,
room_id: str,
@ -2105,6 +2108,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
return None
@trace
def get_rooms_that_might_have_updates(
self, room_ids: StrCollection, from_token: RoomStreamToken
) -> StrCollection: