From d64e85197af31f5642f64ae1d86f5a0c74050fec Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Fri, 16 Sep 2022 16:16:05 +0100 Subject: [PATCH] Remove error spam when users query the keys of departed remote users (#13826) The error message introduced in #13749 has turned out to be very spammy. Remove it for now. --- changelog.d/13826.bugfix | 1 + synapse/handlers/e2e_keys.py | 21 ++++++++++++--------- 2 files changed, 13 insertions(+), 9 deletions(-) create mode 100644 changelog.d/13826.bugfix diff --git a/changelog.d/13826.bugfix b/changelog.d/13826.bugfix new file mode 100644 index 000000000..8ffafec07 --- /dev/null +++ b/changelog.d/13826.bugfix @@ -0,0 +1 @@ +Fix a long standing bug where device lists would remain cached when remote users left and rejoined the last room shared with the local homeserver. diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 8eed63ccf..09a2492af 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -188,18 +188,21 @@ class E2eKeysHandler: ) invalid_cached_users = cached_users - valid_cached_users if invalid_cached_users: - # Fix up results. If we get here, there is either a bug in device - # list tracking, or we hit the race mentioned above. + # Fix up results. If we get here, it means there was either a bug in + # device list tracking, or we hit the race mentioned above. + # TODO: In practice, this path is hit fairly often in existing + # deployments when clients query the keys of departed remote + # users. A background update to mark the appropriate device + # lists as unsubscribed is needed. + # https://github.com/matrix-org/synapse/issues/13651 + # Note that this currently introduces a failure mode when clients + # are trying to decrypt old messages from a remote user whose + # homeserver is no longer available. We may want to consider falling + # back to the cached data when we fail to retrieve a device list + # over federation for such remote users. user_ids_not_in_cache.update(invalid_cached_users) for invalid_user_id in invalid_cached_users: remote_results.pop(invalid_user_id) - # This log message may be removed if it turns out it's almost - # entirely triggered by races. - logger.error( - "Devices for %s were cached, but the server no longer shares " - "any rooms with them. The cached device lists are stale.", - invalid_cached_users, - ) for user_id, devices in remote_results.items(): user_devices = results.setdefault(user_id, {})