Improve exception handling for concurrent execution (#12109)

* fix incorrect unwrapFirstError import

this was being imported from the wrong place

* Refactor `concurrently_execute` to use `yieldable_gather_results`

* Improve exception handling in `yieldable_gather_results`

Try to avoid swallowing so many stack traces.

* mark unwrapFirstError deprecated

* changelog
This commit is contained in:
Richard van der Hoff 2022-03-01 09:34:30 +00:00 committed by GitHub
parent 952efd0bca
commit 9d11fee8f2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 151 additions and 27 deletions

View file

@ -29,6 +29,7 @@ from typing import (
Hashable,
Iterable,
Iterator,
List,
Optional,
Set,
Tuple,
@ -51,7 +52,7 @@ from synapse.logging.context import (
make_deferred_yieldable,
run_in_background,
)
from synapse.util import Clock, unwrapFirstError
from synapse.util import Clock
logger = logging.getLogger(__name__)
@ -193,9 +194,9 @@ class ObservableDeferred(Generic[_T], AbstractObservableDeferred[_T]):
T = TypeVar("T")
def concurrently_execute(
async def concurrently_execute(
func: Callable[[T], Any], args: Iterable[T], limit: int
) -> defer.Deferred:
) -> None:
"""Executes the function with each argument concurrently while limiting
the number of concurrent executions.
@ -221,20 +222,14 @@ def concurrently_execute(
# We use `itertools.islice` to handle the case where the number of args is
# less than the limit, avoiding needlessly spawning unnecessary background
# tasks.
return make_deferred_yieldable(
defer.gatherResults(
[
run_in_background(_concurrently_execute_inner, value)
for value in itertools.islice(it, limit)
],
consumeErrors=True,
)
).addErrback(unwrapFirstError)
await yieldable_gather_results(
_concurrently_execute_inner, (value for value in itertools.islice(it, limit))
)
def yieldable_gather_results(
func: Callable, iter: Iterable, *args: Any, **kwargs: Any
) -> defer.Deferred:
async def yieldable_gather_results(
func: Callable[..., Awaitable[T]], iter: Iterable, *args: Any, **kwargs: Any
) -> List[T]:
"""Executes the function with each argument concurrently.
Args:
@ -245,15 +240,30 @@ def yieldable_gather_results(
**kwargs: Keyword arguments to be passed to each call to func
Returns
Deferred[list]: Resolved when all functions have been invoked, or errors if
one of the function calls fails.
A list containing the results of the function
"""
return make_deferred_yieldable(
defer.gatherResults(
[run_in_background(func, item, *args, **kwargs) for item in iter],
consumeErrors=True,
try:
return await make_deferred_yieldable(
defer.gatherResults(
[run_in_background(func, item, *args, **kwargs) for item in iter],
consumeErrors=True,
)
)
).addErrback(unwrapFirstError)
except defer.FirstError as dfe:
# unwrap the error from defer.gatherResults.
# The raised exception's traceback only includes func() etc if
# the 'await' happens before the exception is thrown - ie if the failure
# happens *asynchronously* - otherwise Twisted throws away the traceback as it
# could be large.
#
# We could maybe reconstruct a fake traceback from Failure.frames. Or maybe
# we could throw Twisted into the fires of Mordor.
# suppress exception chaining, because the FirstError doesn't tell us anything
# very interesting.
assert isinstance(dfe.subFailure.value, BaseException)
raise dfe.subFailure.value from None
T1 = TypeVar("T1")