From 9ac417fa88906d70de6a7c6f94d40fe11fc6d2fa Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Tue, 22 Dec 2015 18:27:56 +0000 Subject: [PATCH 1/4] Add a cache for initialSync responses that expires after 5 minutes --- synapse/handlers/message.py | 24 ++++++++- synapse/util/caches/snapshot_cache.py | 71 +++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 synapse/util/caches/snapshot_cache.py diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index ccdd3d847..bef477b31 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -22,6 +22,7 @@ from synapse.events.utils import serialize_event from synapse.events.validator import EventValidator from synapse.util import unwrapFirstError from synapse.util.logcontext import PreserveLoggingContext +from synapse.util.caches.snapshot_cache import SnapshotCache from synapse.types import UserID, RoomStreamToken, StreamToken from ._base import BaseHandler @@ -45,6 +46,7 @@ class MessageHandler(BaseHandler): self.state = hs.get_state_handler() self.clock = hs.get_clock() self.validator = EventValidator() + self.snapshot_cache = SnapshotCache() @defer.inlineCallbacks def get_message(self, msg_id=None, room_id=None, sender_id=None, @@ -326,9 +328,29 @@ class MessageHandler(BaseHandler): [serialize_event(c, now) for c in room_state.values()] ) - @defer.inlineCallbacks def snapshot_all_rooms(self, user_id=None, pagin_config=None, as_client_event=True, include_archived=False): + key = ( + user_id, + pagin_config.from_token, + pagin_config.to_token, + pagin_config.direction, + pagin_config.limit, + as_client_event, + include_archived, + ) + now_ms = self.clock.time_msec() + result = self.snapshot_cache.get(now_ms, key) + if result is not None: + return result + + return self.snapshot_cache.set(now_ms, key, self._snapshot_all_rooms( + user_id, pagin_config, as_client_event, include_archived + )) + + @defer.inlineCallbacks + def _snapshot_all_rooms(self, user_id=None, pagin_config=None, + as_client_event=True, include_archived=False): """Retrieve a snapshot of all rooms the user is invited or has joined. This snapshot may include messages for all rooms where the user is diff --git a/synapse/util/caches/snapshot_cache.py b/synapse/util/caches/snapshot_cache.py new file mode 100644 index 000000000..b19aca05a --- /dev/null +++ b/synapse/util/caches/snapshot_cache.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.util.async import ObservableDeferred + + +class SnapshotCache(object): + + DURATION_MS = 5 * 60 * 1000 # Cache results for 2 minutes. + + def __init__(self): + self.pending_result_cache = {} # Request that haven't finished yet. + self.prev_result_cache = {} # The older requests that have finished. + self.next_result_cache = {} # The newer requests that have finished. + self.time_last_rotated_ms = 0 + + def rotate(self, time_now_ms): + # Rotate once if the cache duration has passed since the last rotation. + if time_now_ms - self.time_last_rotated_ms > self.DURATION_MS: + self.prev_result_cache = self.next_result_cache + self.next_result_cache = {} + self.time_last_rotated_ms += self.DURATION_MS + + # Rotate again if the cache duration has passed twice since the last + # rotation. + if time_now_ms - self.time_last_rotated_ms > self.DURATION_MS: + self.prev_result_cache = self.next_result_cache + self.next_result_cache = {} + self.time_last_rotated_ms = time_now_ms + + def get(self, time_now_ms, key): + self.rotate(time_now_ms) + # This cache is intended to deduplicate requests, so we expect it to be + # missed most of the time. So we just lookup the key in all of the + # dictionaries rather than trying to short circuit the lookup if the + # key is found. + result = self.prev_result_cache.get(key) + result = self.next_result_cache.get(key, result) + result = self.pending_result_cache.get(key, result) + if result is not None: + return result.observe() + + def set(self, time_now_ms, key, deferred): + self.rotate(time_now_ms) + + result = ObservableDeferred(deferred) + + self.pending_result_cache[key] = result + + def shuffle_along(r): + # When the deferred completes we shuffle it along to the first + # generation of the result cache. So that it will eventually + # expire from the rotation of that cache. + self.next_result_cache[key] = result + self.pending_result_cache.pop(key, None) + + result.observe().addBoth(shuffle_along) + + return result.observe() From 517fb9a023733c064dfabdcfdf4ed75bcff3f7bd Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Tue, 22 Dec 2015 18:53:47 +0000 Subject: [PATCH 2/4] Move the doc string to the public facing method --- synapse/handlers/message.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index bef477b31..a1bed9b0d 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -330,6 +330,23 @@ class MessageHandler(BaseHandler): def snapshot_all_rooms(self, user_id=None, pagin_config=None, as_client_event=True, include_archived=False): + """Retrieve a snapshot of all rooms the user is invited or has joined. + + This snapshot may include messages for all rooms where the user is + joined, depending on the pagination config. + + Args: + user_id (str): The ID of the user making the request. + pagin_config (synapse.api.streams.PaginationConfig): The pagination + config used to determine how many messages *PER ROOM* to return. + as_client_event (bool): True to get events in client-server format. + include_archived (bool): True to get rooms that the user has left + Returns: + A list of dicts with "room_id" and "membership" keys for all rooms + the user is currently invited or joined in on. Rooms where the user + is joined on, may return a "messages" key with messages, depending + on the specified PaginationConfig. + """ key = ( user_id, pagin_config.from_token, @@ -351,23 +368,7 @@ class MessageHandler(BaseHandler): @defer.inlineCallbacks def _snapshot_all_rooms(self, user_id=None, pagin_config=None, as_client_event=True, include_archived=False): - """Retrieve a snapshot of all rooms the user is invited or has joined. - This snapshot may include messages for all rooms where the user is - joined, depending on the pagination config. - - Args: - user_id (str): The ID of the user making the request. - pagin_config (synapse.api.streams.PaginationConfig): The pagination - config used to determine how many messages *PER ROOM* to return. - as_client_event (bool): True to get events in client-server format. - include_archived (bool): True to get rooms that the user has left - Returns: - A list of dicts with "room_id" and "membership" keys for all rooms - the user is currently invited or joined in on. Rooms where the user - is joined on, may return a "messages" key with messages, depending - on the specified PaginationConfig. - """ memberships = [Membership.INVITE, Membership.JOIN] if include_archived: memberships.append(Membership.LEAVE) From 7fa71e32670aa0ed2b49d04fd3c66a72e8fbc1cf Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 23 Dec 2015 11:48:03 +0000 Subject: [PATCH 3/4] Add a unit test for the snapshot cache --- synapse/util/caches/snapshot_cache.py | 4 +- tests/util/test_snapshot_cache.py | 60 +++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 tests/util/test_snapshot_cache.py diff --git a/synapse/util/caches/snapshot_cache.py b/synapse/util/caches/snapshot_cache.py index b19aca05a..8a7ca47a8 100644 --- a/synapse/util/caches/snapshot_cache.py +++ b/synapse/util/caches/snapshot_cache.py @@ -28,14 +28,14 @@ class SnapshotCache(object): def rotate(self, time_now_ms): # Rotate once if the cache duration has passed since the last rotation. - if time_now_ms - self.time_last_rotated_ms > self.DURATION_MS: + if time_now_ms - self.time_last_rotated_ms >= self.DURATION_MS: self.prev_result_cache = self.next_result_cache self.next_result_cache = {} self.time_last_rotated_ms += self.DURATION_MS # Rotate again if the cache duration has passed twice since the last # rotation. - if time_now_ms - self.time_last_rotated_ms > self.DURATION_MS: + if time_now_ms - self.time_last_rotated_ms >= self.DURATION_MS: self.prev_result_cache = self.next_result_cache self.next_result_cache = {} self.time_last_rotated_ms = time_now_ms diff --git a/tests/util/test_snapshot_cache.py b/tests/util/test_snapshot_cache.py new file mode 100644 index 000000000..f58576c94 --- /dev/null +++ b/tests/util/test_snapshot_cache.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .. import unittest + +from synapse.util.caches.snapshot_cache import SnapshotCache +from twisted.internet.defer import Deferred + +class SnapshotCacheTestCase(unittest.TestCase): + + def setUp(self): + self.cache = SnapshotCache() + self.cache.DURATION_MS = 1 + + def test_get_set(self): + # Check that getting a missing key returns None + self.assertEquals(self.cache.get(0, "key"), None) + + # Check that setting a key with a deferred returns + # a deferred that resolves when the initial deferred does + d = Deferred() + set_result = self.cache.set(0, "key", d) + self.assertIsNotNone(set_result) + self.assertFalse(set_result.called) + + # Check that getting the key before the deferred has resolved + # returns a deferred that resolves when the initial deferred does. + get_result_at_10 = self.cache.get(10, "key") + self.assertIsNotNone(get_result_at_10) + self.assertFalse(get_result_at_10.called) + + # Check that the returned deferreds resolve when the initial deferred + # does. + d.callback("v") + self.assertTrue(set_result.called) + self.assertTrue(get_result_at_10.called) + + # Check that getting the key after the deferred has resolved + # before the cache expires returns a resolved deferred. + get_result_at_11 = self.cache.get(11, "key") + self.assertIsNotNone(get_result_at_11) + self.assertTrue(get_result_at_11.called) + + # Check that getting the key after the deferred has resolved + # after the cache expires returns None + get_result_at_12 = self.cache.get(12, "key") + self.assertIsNone(get_result_at_12) From d12c00bdc311bd0685aa7e7e70f1aa7787317164 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 23 Dec 2015 15:18:11 +0000 Subject: [PATCH 4/4] Add some docstring explaining the snapshot cache does --- synapse/util/caches/snapshot_cache.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/synapse/util/caches/snapshot_cache.py b/synapse/util/caches/snapshot_cache.py index 8a7ca47a8..09f00afbc 100644 --- a/synapse/util/caches/snapshot_cache.py +++ b/synapse/util/caches/snapshot_cache.py @@ -17,8 +17,28 @@ from synapse.util.async import ObservableDeferred class SnapshotCache(object): + """Cache for snapshots like the response of /initialSync. + The response of initialSync only has to be a recent snapshot of the + server state. It shouldn't matter to clients if it is a few minutes out + of date. - DURATION_MS = 5 * 60 * 1000 # Cache results for 2 minutes. + This caches a deferred response. Until the deferred completes it will be + returned from the cache. This means that if the client retries the request + while the response is still being computed, that original response will be + used rather than trying to compute a new response. + + Once the deferred completes it will removed from the cache after 5 minutes. + We delay removing it from the cache because a client retrying its request + could race with us finishing computing the response. + + Rather than tracking precisely how long something has been in the cache we + keep two generations of completed responses. Every 5 minutes discard the + old generation, move the new generation to the old generation, and set the + new generation to be empty. This means that a result will be in the cache + somewhere between 5 and 10 minutes. + """ + + DURATION_MS = 5 * 60 * 1000 # Cache results for 5 minutes. def __init__(self): self.pending_result_cache = {} # Request that haven't finished yet. @@ -51,6 +71,8 @@ class SnapshotCache(object): result = self.pending_result_cache.get(key, result) if result is not None: return result.observe() + else: + return None def set(self, time_now_ms, key, deferred): self.rotate(time_now_ms)