Improve exception handling for background processes

There were a bunch of places where we fire off a process to happen in the
background, but don't have any exception handling on it - instead relying on
the unhandled error being logged when the relevent deferred gets
garbage-collected.

This is unsatisfactory for a number of reasons:
 - logging on garbage collection is best-effort and may happen some time after
   the error, if at all
 - it can be hard to figure out where the error actually happened.
 - it is logged as a scary CRITICAL error which (a) I always forget to grep for
   and (b) it's not really CRITICAL if a background process we don't care about
   fails.

So this is an attempt to add exception handling to everything we fire off into
the background.
This commit is contained in:
Richard van der Hoff 2018-04-27 11:07:40 +01:00
parent 0ced8b5b47
commit 9255a6cb17
20 changed files with 331 additions and 233 deletions

View file

@ -857,15 +857,25 @@ class EventCreationHandler(object):
@defer.inlineCallbacks
def _notify():
yield run_on_reactor()
self.notifier.on_new_room_event(
event, event_stream_id, max_stream_id,
extra_users=extra_users
)
try:
self.notifier.on_new_room_event(
event, event_stream_id, max_stream_id,
extra_users=extra_users
)
except Exception:
logger.exception("Error notifying about new room event")
preserve_fn(_notify)()
if event.type == EventTypes.Message:
presence = self.hs.get_presence_handler()
# We don't want to block sending messages on any presence code. This
# matters as sometimes presence code can take a while.
preserve_fn(presence.bump_presence_active_time)(requester.user)
run_in_background(self._bump_active_time, requester.user)
@defer.inlineCallbacks
def _bump_active_time(self, user):
try:
presence = self.hs.get_presence_handler()
yield presence.bump_presence_active_time(user)
except Exception:
logger.exception("Error bumping presence active time")

View file

@ -31,7 +31,7 @@ from synapse.storage.presence import UserPresenceState
from synapse.util.caches.descriptors import cachedInlineCallbacks
from synapse.util.async import Linearizer
from synapse.util.logcontext import preserve_fn
from synapse.util.logcontext import run_in_background
from synapse.util.logutils import log_function
from synapse.util.metrics import Measure
from synapse.util.wheel_timer import WheelTimer
@ -254,6 +254,14 @@ class PresenceHandler(object):
logger.info("Finished _persist_unpersisted_changes")
@defer.inlineCallbacks
def _update_states_and_catch_exception(self, new_states):
try:
res = yield self._update_states(new_states)
defer.returnValue(res)
except Exception:
logger.exception("Error updating presence")
@defer.inlineCallbacks
def _update_states(self, new_states):
"""Updates presence of users. Sets the appropriate timeouts. Pokes
@ -364,7 +372,7 @@ class PresenceHandler(object):
now=now,
)
preserve_fn(self._update_states)(changes)
run_in_background(self._update_states_and_catch_exception, changes)
except Exception:
logger.exception("Exception in _handle_timeouts loop")
@ -422,20 +430,23 @@ class PresenceHandler(object):
@defer.inlineCallbacks
def _end():
if affect_presence:
try:
self.user_to_num_current_syncs[user_id] -= 1
prev_state = yield self.current_state_for_user(user_id)
yield self._update_states([prev_state.copy_and_replace(
last_user_sync_ts=self.clock.time_msec(),
)])
except Exception:
logger.exception("Error updating presence after sync")
@contextmanager
def _user_syncing():
try:
yield
finally:
preserve_fn(_end)()
if affect_presence:
run_in_background(_end)
defer.returnValue(_user_syncing())

View file

@ -135,37 +135,40 @@ class ReceiptsHandler(BaseHandler):
"""Given a list of receipts, works out which remote servers should be
poked and pokes them.
"""
# TODO: Some of this stuff should be coallesced.
for receipt in receipts:
room_id = receipt["room_id"]
receipt_type = receipt["receipt_type"]
user_id = receipt["user_id"]
event_ids = receipt["event_ids"]
data = receipt["data"]
try:
# TODO: Some of this stuff should be coallesced.
for receipt in receipts:
room_id = receipt["room_id"]
receipt_type = receipt["receipt_type"]
user_id = receipt["user_id"]
event_ids = receipt["event_ids"]
data = receipt["data"]
users = yield self.state.get_current_user_in_room(room_id)
remotedomains = set(get_domain_from_id(u) for u in users)
remotedomains = remotedomains.copy()
remotedomains.discard(self.server_name)
users = yield self.state.get_current_user_in_room(room_id)
remotedomains = set(get_domain_from_id(u) for u in users)
remotedomains = remotedomains.copy()
remotedomains.discard(self.server_name)
logger.debug("Sending receipt to: %r", remotedomains)
logger.debug("Sending receipt to: %r", remotedomains)
for domain in remotedomains:
self.federation.send_edu(
destination=domain,
edu_type="m.receipt",
content={
room_id: {
receipt_type: {
user_id: {
"event_ids": event_ids,
"data": data,
for domain in remotedomains:
self.federation.send_edu(
destination=domain,
edu_type="m.receipt",
content={
room_id: {
receipt_type: {
user_id: {
"event_ids": event_ids,
"data": data,
}
}
}
},
},
},
key=(room_id, receipt_type, user_id),
)
key=(room_id, receipt_type, user_id),
)
except Exception:
logger.exception("Error pushing receipts to remote servers")
@defer.inlineCallbacks
def get_receipts_for_room(self, room_id, to_key):

View file

@ -205,28 +205,31 @@ class TypingHandler(object):
@defer.inlineCallbacks
def _push_remote(self, member, typing):
users = yield self.state.get_current_user_in_room(member.room_id)
self._member_last_federation_poke[member] = self.clock.time_msec()
try:
users = yield self.state.get_current_user_in_room(member.room_id)
self._member_last_federation_poke[member] = self.clock.time_msec()
now = self.clock.time_msec()
self.wheel_timer.insert(
now=now,
obj=member,
then=now + FEDERATION_PING_INTERVAL,
)
now = self.clock.time_msec()
self.wheel_timer.insert(
now=now,
obj=member,
then=now + FEDERATION_PING_INTERVAL,
)
for domain in set(get_domain_from_id(u) for u in users):
if domain != self.server_name:
self.federation.send_edu(
destination=domain,
edu_type="m.typing",
content={
"room_id": member.room_id,
"user_id": member.user_id,
"typing": typing,
},
key=member,
)
for domain in set(get_domain_from_id(u) for u in users):
if domain != self.server_name:
self.federation.send_edu(
destination=domain,
edu_type="m.typing",
content={
"room_id": member.room_id,
"user_id": member.user_id,
"typing": typing,
},
key=member,
)
except Exception:
logger.exception("Error pushing typing notif to remotes")
@defer.inlineCallbacks
def _recv_edu(self, origin, content):