Improve opentracing support for ResponseCache (#11607)

This adds some opentracing annotations to ResponseCache, to make it easier to see what's going on; in particular, it adds a link back to the initial trace which is actually doing the work of generating the response.
This commit is contained in:
Richard van der Hoff 2021-12-20 18:12:08 +00:00 committed by GitHub
parent 60fa4935b5
commit c3e38b88f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 150 additions and 49 deletions

1
changelog.d/11607.misc Normal file
View File

@ -0,0 +1 @@
Improve opentracing support for requests which use a `ResponseCache`.

View File

@ -172,7 +172,7 @@ class RoomCreationHandler:
user_id = requester.user.to_string()
# Check if this room is already being upgraded by another person
for key in self._upgrade_response_cache.pending_result_cache:
for key in self._upgrade_response_cache.keys():
if key[0] == old_room_id and key[1] != user_id:
# Two different people are trying to upgrade the same room.
# Send the second an error.

View File

@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import abc
import collections
import inspect
import itertools
@ -57,7 +58,26 @@ logger = logging.getLogger(__name__)
_T = TypeVar("_T")
class ObservableDeferred(Generic[_T]):
class AbstractObservableDeferred(Generic[_T], metaclass=abc.ABCMeta):
"""Abstract base class defining the consumer interface of ObservableDeferred"""
__slots__ = ()
@abc.abstractmethod
def observe(self) -> "defer.Deferred[_T]":
"""Add a new observer for this ObservableDeferred
This returns a brand new deferred that is resolved when the underlying
deferred is resolved. Interacting with the returned deferred does not
effect the underlying deferred.
Note that the returned Deferred doesn't follow the Synapse logcontext rules -
you will probably want to `make_deferred_yieldable` it.
"""
...
class ObservableDeferred(Generic[_T], AbstractObservableDeferred[_T]):
"""Wraps a deferred object so that we can add observer deferreds. These
observer deferreds do not affect the callback chain of the original
deferred.

View File

@ -12,19 +12,37 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from typing import Any, Awaitable, Callable, Dict, Generic, Optional, TypeVar
from typing import (
TYPE_CHECKING,
Any,
Awaitable,
Callable,
Dict,
Generic,
Iterable,
Optional,
TypeVar,
)
import attr
from twisted.internet import defer
from synapse.logging.context import make_deferred_yieldable, run_in_background
from synapse.logging.opentracing import (
active_span,
start_active_span,
start_active_span_follows_from,
)
from synapse.util import Clock
from synapse.util.async_helpers import ObservableDeferred
from synapse.util.async_helpers import AbstractObservableDeferred, ObservableDeferred
from synapse.util.caches import register_cache
logger = logging.getLogger(__name__)
if TYPE_CHECKING:
import opentracing
# the type of the key in the cache
KV = TypeVar("KV")
@ -54,6 +72,20 @@ class ResponseCacheContext(Generic[KV]):
"""
@attr.s(auto_attribs=True)
class ResponseCacheEntry:
result: AbstractObservableDeferred
"""The (possibly incomplete) result of the operation.
Note that we continue to store an ObservableDeferred even after the operation
completes (rather than switching to an immediate value), since that makes it
easier to cache Failure results.
"""
opentracing_span_context: "Optional[opentracing.SpanContext]"
"""The opentracing span which generated/is generating the result"""
class ResponseCache(Generic[KV]):
"""
This caches a deferred response. Until the deferred completes it will be
@ -63,10 +95,7 @@ class ResponseCache(Generic[KV]):
"""
def __init__(self, clock: Clock, name: str, timeout_ms: float = 0):
# This is poorly-named: it includes both complete and incomplete results.
# We keep complete results rather than switching to absolute values because
# that makes it easier to cache Failure results.
self.pending_result_cache: Dict[KV, ObservableDeferred] = {}
self._result_cache: Dict[KV, ResponseCacheEntry] = {}
self.clock = clock
self.timeout_sec = timeout_ms / 1000.0
@ -75,56 +104,63 @@ class ResponseCache(Generic[KV]):
self._metrics = register_cache("response_cache", name, self, resizable=False)
def size(self) -> int:
return len(self.pending_result_cache)
return len(self._result_cache)
def __len__(self) -> int:
return self.size()
def get(self, key: KV) -> Optional[defer.Deferred]:
def keys(self) -> Iterable[KV]:
"""Get the keys currently in the result cache
Returns both incomplete entries, and (if the timeout on this cache is non-zero),
complete entries which are still in the cache.
Note that the returned iterator is not safe in the face of concurrent execution:
behaviour is undefined if `wrap` is called during iteration.
"""
return self._result_cache.keys()
def _get(self, key: KV) -> Optional[ResponseCacheEntry]:
"""Look up the given key.
Returns a new Deferred (which also doesn't follow the synapse
logcontext rules). You will probably want to make_deferred_yieldable the result.
If there is no entry for the key, returns None.
Args:
key: key to get/set in the cache
key: key to get in the cache
Returns:
None if there is no entry for this key; otherwise a deferred which
resolves to the result.
The entry for this key, if any; else None.
"""
result = self.pending_result_cache.get(key)
if result is not None:
entry = self._result_cache.get(key)
if entry is not None:
self._metrics.inc_hits()
return result.observe()
return entry
else:
self._metrics.inc_misses()
return None
def _set(
self, context: ResponseCacheContext[KV], deferred: "defer.Deferred[RV]"
) -> "defer.Deferred[RV]":
self,
context: ResponseCacheContext[KV],
deferred: "defer.Deferred[RV]",
opentracing_span_context: "Optional[opentracing.SpanContext]",
) -> ResponseCacheEntry:
"""Set the entry for the given key to the given deferred.
*deferred* should run its callbacks in the sentinel logcontext (ie,
you should wrap normal synapse deferreds with
synapse.logging.context.run_in_background).
Returns a new Deferred (which also doesn't follow the synapse logcontext rules).
You will probably want to make_deferred_yieldable the result.
Args:
context: Information about the cache miss
deferred: The deferred which resolves to the result.
opentracing_span_context: An opentracing span wrapping the calculation
Returns:
A new deferred which resolves to the actual result.
The cache entry object.
"""
result = ObservableDeferred(deferred, consumeErrors=True)
key = context.cache_key
self.pending_result_cache[key] = result
entry = ResponseCacheEntry(result, opentracing_span_context)
self._result_cache[key] = entry
def on_complete(r: RV) -> RV:
# if this cache has a non-zero timeout, and the callback has not cleared
@ -132,18 +168,18 @@ class ResponseCache(Generic[KV]):
# its removal later.
if self.timeout_sec and context.should_cache:
self.clock.call_later(
self.timeout_sec, self.pending_result_cache.pop, key, None
self.timeout_sec, self._result_cache.pop, key, None
)
else:
# otherwise, remove the result immediately.
self.pending_result_cache.pop(key, None)
self._result_cache.pop(key, None)
return r
# make sure we do this *after* adding the entry to pending_result_cache,
# make sure we do this *after* adding the entry to result_cache,
# in case the result is already complete (in which case flipping the order would
# leave us with a stuck entry in the cache).
result.addBoth(on_complete)
return result.observe()
return entry
async def wrap(
self,
@ -189,20 +225,41 @@ class ResponseCache(Generic[KV]):
Returns:
The result of the callback (from the cache, or otherwise)
"""
result = self.get(key)
if not result:
entry = self._get(key)
if not entry:
logger.debug(
"[%s]: no cached result for [%s], calculating new one", self._name, key
)
context = ResponseCacheContext(cache_key=key)
if cache_context:
kwargs["cache_context"] = context
d = run_in_background(callback, *args, **kwargs)
result = self._set(context, d)
elif not isinstance(result, defer.Deferred) or result.called:
span_context: Optional[opentracing.SpanContext] = None
async def cb() -> RV:
# NB it is important that we do not `await` before setting span_context!
nonlocal span_context
with start_active_span(f"ResponseCache[{self._name}].calculate"):
span = active_span()
if span:
span_context = span.context
return await callback(*args, **kwargs)
d = run_in_background(cb)
entry = self._set(context, d, span_context)
return await make_deferred_yieldable(entry.result.observe())
result = entry.result.observe()
if result.called:
logger.info("[%s]: using completed cached result for [%s]", self._name, key)
else:
logger.info(
"[%s]: using incomplete cached result for [%s]", self._name, key
)
return await make_deferred_yieldable(result)
span_context = entry.opentracing_span_context
with start_active_span_follows_from(
f"ResponseCache[{self._name}].wait",
contexts=(span_context,) if span_context else (),
):
return await make_deferred_yieldable(result)

View File

@ -11,6 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from unittest.mock import Mock
from parameterized import parameterized
from twisted.internet import defer
@ -60,10 +63,15 @@ class ResponseCacheTestCase(TestCase):
self.successResultOf(wrap_d),
"initial wrap result should be the same",
)
# a second call should return the result without a call to the wrapped function
unexpected = Mock(spec=())
wrap2_d = defer.ensureDeferred(cache.wrap(0, unexpected))
unexpected.assert_not_called()
self.assertEqual(
expected_result,
self.successResultOf(cache.get(0)),
"cache should have the result",
self.successResultOf(wrap2_d),
"cache should still have the result",
)
def test_cache_miss(self):
@ -80,7 +88,7 @@ class ResponseCacheTestCase(TestCase):
self.successResultOf(wrap_d),
"initial wrap result should be the same",
)
self.assertIsNone(cache.get(0), "cache should not have the result now")
self.assertCountEqual([], cache.keys(), "cache should not have the result now")
def test_cache_expire(self):
cache = self.with_cache("short_cache", ms=1000)
@ -92,16 +100,20 @@ class ResponseCacheTestCase(TestCase):
)
self.assertEqual(expected_result, self.successResultOf(wrap_d))
# a second call should return the result without a call to the wrapped function
unexpected = Mock(spec=())
wrap2_d = defer.ensureDeferred(cache.wrap(0, unexpected))
unexpected.assert_not_called()
self.assertEqual(
expected_result,
self.successResultOf(cache.get(0)),
self.successResultOf(wrap2_d),
"cache should still have the result",
)
# cache eviction timer is handled
self.reactor.pump((2,))
self.assertIsNone(cache.get(0), "cache should not have the result now")
self.assertCountEqual([], cache.keys(), "cache should not have the result now")
def test_cache_wait_hit(self):
cache = self.with_cache("neutral_cache")
@ -133,16 +145,21 @@ class ResponseCacheTestCase(TestCase):
self.reactor.pump((1, 1))
self.assertEqual(expected_result, self.successResultOf(wrap_d))
# a second call should immediately return the result without a call to the
# wrapped function
unexpected = Mock(spec=())
wrap2_d = defer.ensureDeferred(cache.wrap(0, unexpected))
unexpected.assert_not_called()
self.assertEqual(
expected_result,
self.successResultOf(cache.get(0)),
self.successResultOf(wrap2_d),
"cache should still have the result",
)
# (1 + 1 + 2) > 3.0, cache eviction timer is handled
self.reactor.pump((2,))
self.assertIsNone(cache.get(0), "cache should not have the result now")
self.assertCountEqual([], cache.keys(), "cache should not have the result now")
@parameterized.expand([(True,), (False,)])
def test_cache_context_nocache(self, should_cache: bool):
@ -183,10 +200,16 @@ class ResponseCacheTestCase(TestCase):
self.assertEqual(expected_result, self.successResultOf(wrap2_d))
if should_cache:
unexpected = Mock(spec=())
wrap3_d = defer.ensureDeferred(cache.wrap(0, unexpected))
unexpected.assert_not_called()
self.assertEqual(
expected_result,
self.successResultOf(cache.get(0)),
self.successResultOf(wrap3_d),
"cache should still have the result",
)
else:
self.assertIsNone(cache.get(0), "cache should not have the result")
self.assertCountEqual(
[], cache.keys(), "cache should not have the result now"
)