From 05e129664931c114fcaae8bebe0a26685dcd9c6d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 16 Jan 2019 23:14:11 +0000 Subject: [PATCH] don't store more remote device lists if they have more than 1K devices (#4397) --- changelog.d/4397.bugfix | 1 + synapse/handlers/device.py | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 changelog.d/4397.bugfix diff --git a/changelog.d/4397.bugfix b/changelog.d/4397.bugfix new file mode 100644 index 000000000..e7526d445 --- /dev/null +++ b/changelog.d/4397.bugfix @@ -0,0 +1 @@ +Fix high CPU usage due to remote devicelist updates diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 9e017116a..8955cde4e 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -532,6 +532,25 @@ class DeviceListEduUpdater(object): stream_id = result["stream_id"] devices = result["devices"] + + # If the remote server has more than ~1000 devices for this user + # we assume that something is going horribly wrong (e.g. a bot + # that logs in and creates a new device every time it tries to + # send a message). Maintaining lots of devices per user in the + # cache can cause serious performance issues as if this request + # takes more than 60s to complete, internal replication from the + # inbound federation worker to the synapse master may time out + # causing the inbound federation to fail and causing the remote + # server to retry, causing a DoS. So in this scenario we give + # up on storing the total list of devices and only handle the + # delta instead. + if len(devices) > 1000: + logger.warn( + "Ignoring device list snapshot for %s as it has >1K devs (%d)", + user_id, len(devices) + ) + devices = [] + yield self.store.update_remote_device_list_cache( user_id, devices, stream_id, )