From 9ac417fa88906d70de6a7c6f94d40fe11fc6d2fa Mon Sep 17 00:00:00 2001
From: Mark Haines <mark.haines@matrix.org>
Date: Tue, 22 Dec 2015 18:27:56 +0000
Subject: [PATCH 1/4] Add a cache for initialSync responses that expires after
 5 minutes

---
 synapse/handlers/message.py           | 24 ++++++++-
 synapse/util/caches/snapshot_cache.py | 71 +++++++++++++++++++++++++++
 2 files changed, 94 insertions(+), 1 deletion(-)
 create mode 100644 synapse/util/caches/snapshot_cache.py

diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index ccdd3d847..bef477b31 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -22,6 +22,7 @@ from synapse.events.utils import serialize_event
 from synapse.events.validator import EventValidator
 from synapse.util import unwrapFirstError
 from synapse.util.logcontext import PreserveLoggingContext
+from synapse.util.caches.snapshot_cache import SnapshotCache
 from synapse.types import UserID, RoomStreamToken, StreamToken
 
 from ._base import BaseHandler
@@ -45,6 +46,7 @@ class MessageHandler(BaseHandler):
         self.state = hs.get_state_handler()
         self.clock = hs.get_clock()
         self.validator = EventValidator()
+        self.snapshot_cache = SnapshotCache()
 
     @defer.inlineCallbacks
     def get_message(self, msg_id=None, room_id=None, sender_id=None,
@@ -326,9 +328,29 @@ class MessageHandler(BaseHandler):
             [serialize_event(c, now) for c in room_state.values()]
         )
 
-    @defer.inlineCallbacks
     def snapshot_all_rooms(self, user_id=None, pagin_config=None,
                            as_client_event=True, include_archived=False):
+        key = (
+            user_id,
+            pagin_config.from_token,
+            pagin_config.to_token,
+            pagin_config.direction,
+            pagin_config.limit,
+            as_client_event,
+            include_archived,
+        )
+        now_ms = self.clock.time_msec()
+        result = self.snapshot_cache.get(now_ms, key)
+        if result is not None:
+            return result
+
+        return self.snapshot_cache.set(now_ms, key, self._snapshot_all_rooms(
+            user_id, pagin_config, as_client_event, include_archived
+        ))
+
+    @defer.inlineCallbacks
+    def _snapshot_all_rooms(self, user_id=None, pagin_config=None,
+                            as_client_event=True, include_archived=False):
         """Retrieve a snapshot of all rooms the user is invited or has joined.
 
         This snapshot may include messages for all rooms where the user is
diff --git a/synapse/util/caches/snapshot_cache.py b/synapse/util/caches/snapshot_cache.py
new file mode 100644
index 000000000..b19aca05a
--- /dev/null
+++ b/synapse/util/caches/snapshot_cache.py
@@ -0,0 +1,71 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from synapse.util.async import ObservableDeferred
+
+
+class SnapshotCache(object):
+
+    DURATION_MS = 5 * 60 * 1000  # Cache results for 2 minutes.
+
+    def __init__(self):
+        self.pending_result_cache = {}  # Request that haven't finished yet.
+        self.prev_result_cache = {}  # The older requests that have finished.
+        self.next_result_cache = {}  # The newer requests that have finished.
+        self.time_last_rotated_ms = 0
+
+    def rotate(self, time_now_ms):
+        # Rotate once if the cache duration has passed since the last rotation.
+        if time_now_ms - self.time_last_rotated_ms > self.DURATION_MS:
+            self.prev_result_cache = self.next_result_cache
+            self.next_result_cache = {}
+            self.time_last_rotated_ms += self.DURATION_MS
+
+        # Rotate again if the cache duration has passed twice since the last
+        # rotation.
+        if time_now_ms - self.time_last_rotated_ms > self.DURATION_MS:
+            self.prev_result_cache = self.next_result_cache
+            self.next_result_cache = {}
+            self.time_last_rotated_ms = time_now_ms
+
+    def get(self, time_now_ms, key):
+        self.rotate(time_now_ms)
+        # This cache is intended to deduplicate requests, so we expect it to be
+        # missed most of the time. So we just lookup the key in all of the
+        # dictionaries rather than trying to short circuit the lookup if the
+        # key is found.
+        result = self.prev_result_cache.get(key)
+        result = self.next_result_cache.get(key, result)
+        result = self.pending_result_cache.get(key, result)
+        if result is not None:
+            return result.observe()
+
+    def set(self, time_now_ms, key, deferred):
+        self.rotate(time_now_ms)
+
+        result = ObservableDeferred(deferred)
+
+        self.pending_result_cache[key] = result
+
+        def shuffle_along(r):
+            # When the deferred completes we shuffle it along to the first
+            # generation of the result cache. So that it will eventually
+            # expire from the rotation of that cache.
+            self.next_result_cache[key] = result
+            self.pending_result_cache.pop(key, None)
+
+        result.observe().addBoth(shuffle_along)
+
+        return result.observe()

From 517fb9a023733c064dfabdcfdf4ed75bcff3f7bd Mon Sep 17 00:00:00 2001
From: Mark Haines <mark.haines@matrix.org>
Date: Tue, 22 Dec 2015 18:53:47 +0000
Subject: [PATCH 2/4] Move the doc string to the public facing method

---
 synapse/handlers/message.py | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index bef477b31..a1bed9b0d 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -330,6 +330,23 @@ class MessageHandler(BaseHandler):
 
     def snapshot_all_rooms(self, user_id=None, pagin_config=None,
                            as_client_event=True, include_archived=False):
+        """Retrieve a snapshot of all rooms the user is invited or has joined.
+
+        This snapshot may include messages for all rooms where the user is
+        joined, depending on the pagination config.
+
+        Args:
+            user_id (str): The ID of the user making the request.
+            pagin_config (synapse.api.streams.PaginationConfig): The pagination
+            config used to determine how many messages *PER ROOM* to return.
+            as_client_event (bool): True to get events in client-server format.
+            include_archived (bool): True to get rooms that the user has left
+        Returns:
+            A list of dicts with "room_id" and "membership" keys for all rooms
+            the user is currently invited or joined in on. Rooms where the user
+            is joined on, may return a "messages" key with messages, depending
+            on the specified PaginationConfig.
+        """
         key = (
             user_id,
             pagin_config.from_token,
@@ -351,23 +368,7 @@ class MessageHandler(BaseHandler):
     @defer.inlineCallbacks
     def _snapshot_all_rooms(self, user_id=None, pagin_config=None,
                             as_client_event=True, include_archived=False):
-        """Retrieve a snapshot of all rooms the user is invited or has joined.
 
-        This snapshot may include messages for all rooms where the user is
-        joined, depending on the pagination config.
-
-        Args:
-            user_id (str): The ID of the user making the request.
-            pagin_config (synapse.api.streams.PaginationConfig): The pagination
-            config used to determine how many messages *PER ROOM* to return.
-            as_client_event (bool): True to get events in client-server format.
-            include_archived (bool): True to get rooms that the user has left
-        Returns:
-            A list of dicts with "room_id" and "membership" keys for all rooms
-            the user is currently invited or joined in on. Rooms where the user
-            is joined on, may return a "messages" key with messages, depending
-            on the specified PaginationConfig.
-        """
         memberships = [Membership.INVITE, Membership.JOIN]
         if include_archived:
             memberships.append(Membership.LEAVE)

From 7fa71e32670aa0ed2b49d04fd3c66a72e8fbc1cf Mon Sep 17 00:00:00 2001
From: Mark Haines <mark.haines@matrix.org>
Date: Wed, 23 Dec 2015 11:48:03 +0000
Subject: [PATCH 3/4] Add a unit test for the snapshot cache

---
 synapse/util/caches/snapshot_cache.py |  4 +-
 tests/util/test_snapshot_cache.py     | 60 +++++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 2 deletions(-)
 create mode 100644 tests/util/test_snapshot_cache.py

diff --git a/synapse/util/caches/snapshot_cache.py b/synapse/util/caches/snapshot_cache.py
index b19aca05a..8a7ca47a8 100644
--- a/synapse/util/caches/snapshot_cache.py
+++ b/synapse/util/caches/snapshot_cache.py
@@ -28,14 +28,14 @@ class SnapshotCache(object):
 
     def rotate(self, time_now_ms):
         # Rotate once if the cache duration has passed since the last rotation.
-        if time_now_ms - self.time_last_rotated_ms > self.DURATION_MS:
+        if time_now_ms - self.time_last_rotated_ms >= self.DURATION_MS:
             self.prev_result_cache = self.next_result_cache
             self.next_result_cache = {}
             self.time_last_rotated_ms += self.DURATION_MS
 
         # Rotate again if the cache duration has passed twice since the last
         # rotation.
-        if time_now_ms - self.time_last_rotated_ms > self.DURATION_MS:
+        if time_now_ms - self.time_last_rotated_ms >= self.DURATION_MS:
             self.prev_result_cache = self.next_result_cache
             self.next_result_cache = {}
             self.time_last_rotated_ms = time_now_ms
diff --git a/tests/util/test_snapshot_cache.py b/tests/util/test_snapshot_cache.py
new file mode 100644
index 000000000..f58576c94
--- /dev/null
+++ b/tests/util/test_snapshot_cache.py
@@ -0,0 +1,60 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from .. import unittest
+
+from synapse.util.caches.snapshot_cache import SnapshotCache
+from twisted.internet.defer import Deferred
+
+class SnapshotCacheTestCase(unittest.TestCase):
+
+    def setUp(self):
+        self.cache = SnapshotCache()
+        self.cache.DURATION_MS = 1
+
+    def test_get_set(self):
+        # Check that getting a missing key returns None
+        self.assertEquals(self.cache.get(0, "key"), None)
+
+        # Check that setting a key with a deferred returns
+        # a deferred that resolves when the initial deferred does
+        d = Deferred()
+        set_result = self.cache.set(0, "key", d)
+        self.assertIsNotNone(set_result)
+        self.assertFalse(set_result.called)
+
+        # Check that getting the key before the deferred has resolved
+        # returns a deferred that resolves when the initial deferred does.
+        get_result_at_10 = self.cache.get(10, "key")
+        self.assertIsNotNone(get_result_at_10)
+        self.assertFalse(get_result_at_10.called)
+
+        # Check that the returned deferreds resolve when the initial deferred
+        # does.
+        d.callback("v")
+        self.assertTrue(set_result.called)
+        self.assertTrue(get_result_at_10.called)
+
+        # Check that getting the key after the deferred has resolved
+        # before the cache expires returns a resolved deferred.
+        get_result_at_11 = self.cache.get(11, "key")
+        self.assertIsNotNone(get_result_at_11)
+        self.assertTrue(get_result_at_11.called)
+
+        # Check that getting the key after the deferred has resolved
+        # after the cache expires returns None
+        get_result_at_12 = self.cache.get(12, "key")
+        self.assertIsNone(get_result_at_12)

From d12c00bdc311bd0685aa7e7e70f1aa7787317164 Mon Sep 17 00:00:00 2001
From: Mark Haines <mark.haines@matrix.org>
Date: Wed, 23 Dec 2015 15:18:11 +0000
Subject: [PATCH 4/4] Add some docstring explaining the snapshot cache does

---
 synapse/util/caches/snapshot_cache.py | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/synapse/util/caches/snapshot_cache.py b/synapse/util/caches/snapshot_cache.py
index 8a7ca47a8..09f00afbc 100644
--- a/synapse/util/caches/snapshot_cache.py
+++ b/synapse/util/caches/snapshot_cache.py
@@ -17,8 +17,28 @@ from synapse.util.async import ObservableDeferred
 
 
 class SnapshotCache(object):
+    """Cache for snapshots like the response of /initialSync.
+    The response of initialSync only has to be a recent snapshot of the
+    server state. It shouldn't matter to clients if it is a few minutes out
+    of date.
 
-    DURATION_MS = 5 * 60 * 1000  # Cache results for 2 minutes.
+    This caches a deferred response. Until the deferred completes it will be
+    returned from the cache. This means that if the client retries the request
+    while the response is still being computed, that original response will be
+    used rather than trying to compute a new response.
+
+    Once the deferred completes it will removed from the cache after 5 minutes.
+    We delay removing it from the cache because a client retrying its request
+    could race with us finishing computing the response.
+
+    Rather than tracking precisely how long something has been in the cache we
+    keep two generations of completed responses. Every 5 minutes discard the
+    old generation, move the new generation to the old generation, and set the
+    new generation to be empty. This means that a result will be in the cache
+    somewhere between 5 and 10 minutes.
+    """
+
+    DURATION_MS = 5 * 60 * 1000  # Cache results for 5 minutes.
 
     def __init__(self):
         self.pending_result_cache = {}  # Request that haven't finished yet.
@@ -51,6 +71,8 @@ class SnapshotCache(object):
         result = self.pending_result_cache.get(key, result)
         if result is not None:
             return result.observe()
+        else:
+            return None
 
     def set(self, time_now_ms, key, deferred):
         self.rotate(time_now_ms)