Wrap polling/retry blocks in try/excepts to avoid sending to other ASes breaking permanently should an error occur.

This commit is contained in:
Kegan Dougal 2015-03-16 10:38:02 +00:00
parent 835e01fc70
commit c9c444f562

View File

@ -120,6 +120,7 @@ class _TransactionController(object):
@defer.inlineCallbacks @defer.inlineCallbacks
def start_polling(self): def start_polling(self):
try:
groups = self.event_grouper.drain_groups() groups = self.event_grouper.drain_groups()
for service in groups: for service in groups:
txn = yield self.store.create_appservice_txn( txn = yield self.store.create_appservice_txn(
@ -133,6 +134,8 @@ class _TransactionController(object):
txn.complete(self.store) txn.complete(self.store)
else: else:
self._start_recoverer(service) self._start_recoverer(service)
except Exception as e:
logger.exception(e)
self.clock.call_later(1, self.start_polling) self.clock.call_later(1, self.start_polling)
@defer.inlineCallbacks @defer.inlineCallbacks
@ -200,8 +203,15 @@ class _Recoverer(object):
def recover(self): def recover(self):
self.clock.call_later((2 ** self.backoff_counter), self.retry) self.clock.call_later((2 ** self.backoff_counter), self.retry)
def _backoff(self):
# cap the backoff to be around 18h => (2^16) = 65536 secs
if self.backoff_counter < 16:
self.backoff_counter += 1
self.recover()
@defer.inlineCallbacks @defer.inlineCallbacks
def retry(self): def retry(self):
try:
txn = yield self.store.get_oldest_unsent_txn(self.service) txn = yield self.store.get_oldest_unsent_txn(self.service)
if txn: if txn:
logger.info("Retrying transaction %s for AS ID %s", logger.info("Retrying transaction %s for AS ID %s",
@ -213,12 +223,12 @@ class _Recoverer(object):
self.backoff_counter = 1 self.backoff_counter = 1
yield self.retry() yield self.retry()
else: else:
# cap the backoff to be around 18h => (2^16) = 65536 secs self._backoff()
if self.backoff_counter < 16:
self.backoff_counter += 1
self.recover()
else: else:
self._set_service_recovered() self._set_service_recovered()
except Exception as e:
logger.exception(e)
self._backoff()
def _set_service_recovered(self): def _set_service_recovered(self):
self.callback(self) self.callback(self)