Make the media /upload tracing less ambiguous (#15888)

A lot of the functions have the same name in this space like `store_file`,
and we also do it multiple times for different reasons (main media repo,
other storage providers, thumbnails, etc) so it's good to differentiate
them so your head doesn't explode.

Follow-up to https://github.com/matrix-org/synapse/pull/15850

Tracing instrumentation to media `/upload` code paths to investigate https://github.com/matrix-org/synapse/issues/15841
This commit is contained in:
Eric Eastwood 2023-07-10 17:23:11 -05:00 committed by GitHub
parent 5e82b07d2c
commit 2328e90fbb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 58 additions and 38 deletions

1
changelog.d/15888.misc Normal file
View File

@ -0,0 +1 @@
Add tracing to media `/upload` code paths.

View File

@ -38,7 +38,7 @@ from twisted.protocols.basic import FileSender
from synapse.api.errors import NotFoundError from synapse.api.errors import NotFoundError
from synapse.logging.context import defer_to_thread, make_deferred_yieldable from synapse.logging.context import defer_to_thread, make_deferred_yieldable
from synapse.logging.opentracing import trace from synapse.logging.opentracing import start_active_span, trace, trace_with_opname
from synapse.util import Clock from synapse.util import Clock
from synapse.util.file_consumer import BackgroundFileConsumer from synapse.util.file_consumer import BackgroundFileConsumer
@ -77,7 +77,7 @@ class MediaStorage:
self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
self.clock = hs.get_clock() self.clock = hs.get_clock()
@trace @trace_with_opname("MediaStorage.store_file")
async def store_file(self, source: IO, file_info: FileInfo) -> str: async def store_file(self, source: IO, file_info: FileInfo) -> str:
"""Write `source` to the on disk media store, and also any other """Write `source` to the on disk media store, and also any other
configured storage providers configured storage providers
@ -91,18 +91,19 @@ class MediaStorage:
""" """
with self.store_into_file(file_info) as (f, fname, finish_cb): with self.store_into_file(file_info) as (f, fname, finish_cb):
# Write to the main repository # Write to the main media repository
await self.write_to_file(source, f) await self.write_to_file(source, f)
# Write to the other storage providers
await finish_cb() await finish_cb()
return fname return fname
@trace @trace_with_opname("MediaStorage.write_to_file")
async def write_to_file(self, source: IO, output: IO) -> None: async def write_to_file(self, source: IO, output: IO) -> None:
"""Asynchronously write the `source` to `output`.""" """Asynchronously write the `source` to `output`."""
await defer_to_thread(self.reactor, _write_file_synchronously, source, output) await defer_to_thread(self.reactor, _write_file_synchronously, source, output)
@trace @trace_with_opname("MediaStorage.store_into_file")
@contextlib.contextmanager @contextlib.contextmanager
def store_into_file( def store_into_file(
self, file_info: FileInfo self, file_info: FileInfo
@ -117,9 +118,9 @@ class MediaStorage:
fname can be used to read the contents from after upload, e.g. to fname can be used to read the contents from after upload, e.g. to
generate thumbnails. generate thumbnails.
finish_cb must be called and waited on after the file has been finish_cb must be called and waited on after the file has been successfully been
successfully been written to. Should not be called if there was an written to. Should not be called if there was an error. Checks for spam and
error. stores the file into the configured storage providers.
Args: Args:
file_info: Info about the file to store file_info: Info about the file to store
@ -139,10 +140,19 @@ class MediaStorage:
finished_called = [False] finished_called = [False]
main_media_repo_write_trace_scope = start_active_span(
"writing to main media repo"
)
main_media_repo_write_trace_scope.__enter__()
try: try:
with open(fname, "wb") as f: with open(fname, "wb") as f:
async def finish() -> None: async def finish() -> None:
# When someone calls finish, we assume they are done writing to the main media repo
main_media_repo_write_trace_scope.__exit__(None, None, None)
with start_active_span("writing to other storage providers"):
# Ensure that all writes have been flushed and close the # Ensure that all writes have been flushed and close the
# file. # file.
f.flush() f.flush()
@ -161,6 +171,7 @@ class MediaStorage:
raise SpamMediaException(errcode=spam_check[0]) raise SpamMediaException(errcode=spam_check[0])
for provider in self.storage_providers: for provider in self.storage_providers:
with start_active_span(str(provider)):
await provider.store_file(path, file_info) await provider.store_file(path, file_info)
finished_called[0] = True finished_called[0] = True
@ -168,6 +179,9 @@ class MediaStorage:
yield f, fname, finish yield f, fname, finish
except Exception as e: except Exception as e:
try: try:
main_media_repo_write_trace_scope.__exit__(
type(e), None, e.__traceback__
)
os.remove(fname) os.remove(fname)
except Exception: except Exception:
pass pass
@ -175,7 +189,11 @@ class MediaStorage:
raise e from None raise e from None
if not finished_called: if not finished_called:
raise Exception("Finished callback not called") exc = Exception("Finished callback not called")
main_media_repo_write_trace_scope.__exit__(
type(exc), None, exc.__traceback__
)
raise exc
async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]: async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]:
"""Attempts to fetch media described by file_info from the local cache """Attempts to fetch media described by file_info from the local cache

View File

@ -20,7 +20,7 @@ from typing import TYPE_CHECKING, Callable, Optional
from synapse.config._base import Config from synapse.config._base import Config
from synapse.logging.context import defer_to_thread, run_in_background from synapse.logging.context import defer_to_thread, run_in_background
from synapse.logging.opentracing import trace from synapse.logging.opentracing import start_active_span, trace_with_opname
from synapse.util.async_helpers import maybe_awaitable from synapse.util.async_helpers import maybe_awaitable
from ._base import FileInfo, Responder from ._base import FileInfo, Responder
@ -87,7 +87,7 @@ class StorageProviderWrapper(StorageProvider):
def __str__(self) -> str: def __str__(self) -> str:
return "StorageProviderWrapper[%s]" % (self.backend,) return "StorageProviderWrapper[%s]" % (self.backend,)
@trace @trace_with_opname("StorageProviderWrapper.store_file")
async def store_file(self, path: str, file_info: FileInfo) -> None: async def store_file(self, path: str, file_info: FileInfo) -> None:
if not file_info.server_name and not self.store_local: if not file_info.server_name and not self.store_local:
return None return None
@ -116,7 +116,7 @@ class StorageProviderWrapper(StorageProvider):
run_in_background(store) run_in_background(store)
@trace @trace_with_opname("StorageProviderWrapper.fetch")
async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]: async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
if file_info.url_cache: if file_info.url_cache:
# Files in the URL preview cache definitely aren't stored here, # Files in the URL preview cache definitely aren't stored here,
@ -144,7 +144,7 @@ class FileStorageProviderBackend(StorageProvider):
def __str__(self) -> str: def __str__(self) -> str:
return "FileStorageProviderBackend[%s]" % (self.base_directory,) return "FileStorageProviderBackend[%s]" % (self.base_directory,)
@trace @trace_with_opname("FileStorageProviderBackend.store_file")
async def store_file(self, path: str, file_info: FileInfo) -> None: async def store_file(self, path: str, file_info: FileInfo) -> None:
"""See StorageProvider.store_file""" """See StorageProvider.store_file"""
@ -156,6 +156,7 @@ class FileStorageProviderBackend(StorageProvider):
# mypy needs help inferring the type of the second parameter, which is generic # mypy needs help inferring the type of the second parameter, which is generic
shutil_copyfile: Callable[[str, str], str] = shutil.copyfile shutil_copyfile: Callable[[str, str], str] = shutil.copyfile
with start_active_span("shutil_copyfile"):
await defer_to_thread( await defer_to_thread(
self.hs.get_reactor(), self.hs.get_reactor(),
shutil_copyfile, shutil_copyfile,
@ -163,7 +164,7 @@ class FileStorageProviderBackend(StorageProvider):
backup_fname, backup_fname,
) )
@trace @trace_with_opname("FileStorageProviderBackend.fetch")
async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]: async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
"""See StorageProvider.fetch""" """See StorageProvider.fetch"""