Wrap polling/retry blocks in try/excepts to avoid sending to other ASes breaking permanently should an error occur.

This commit is contained in:
Kegan Dougal 2015-03-16 10:38:02 +00:00
parent 835e01fc70
commit c9c444f562

View File

@ -120,19 +120,22 @@ class _TransactionController(object):
@defer.inlineCallbacks @defer.inlineCallbacks
def start_polling(self): def start_polling(self):
groups = self.event_grouper.drain_groups() try:
for service in groups: groups = self.event_grouper.drain_groups()
txn = yield self.store.create_appservice_txn( for service in groups:
service=service, txn = yield self.store.create_appservice_txn(
events=groups[service] service=service,
) events=groups[service]
service_is_up = yield self._is_service_up(service) )
if service_is_up: service_is_up = yield self._is_service_up(service)
sent = yield txn.send(self.as_api) if service_is_up:
if sent: sent = yield txn.send(self.as_api)
txn.complete(self.store) if sent:
else: txn.complete(self.store)
self._start_recoverer(service) else:
self._start_recoverer(service)
except Exception as e:
logger.exception(e)
self.clock.call_later(1, self.start_polling) self.clock.call_later(1, self.start_polling)
@defer.inlineCallbacks @defer.inlineCallbacks
@ -200,25 +203,32 @@ class _Recoverer(object):
def recover(self): def recover(self):
self.clock.call_later((2 ** self.backoff_counter), self.retry) self.clock.call_later((2 ** self.backoff_counter), self.retry)
def _backoff(self):
# cap the backoff to be around 18h => (2^16) = 65536 secs
if self.backoff_counter < 16:
self.backoff_counter += 1
self.recover()
@defer.inlineCallbacks @defer.inlineCallbacks
def retry(self): def retry(self):
txn = yield self.store.get_oldest_unsent_txn(self.service) try:
if txn: txn = yield self.store.get_oldest_unsent_txn(self.service)
logger.info("Retrying transaction %s for AS ID %s", if txn:
txn.id, txn.service.id) logger.info("Retrying transaction %s for AS ID %s",
sent = yield txn.send(self.as_api) txn.id, txn.service.id)
if sent: sent = yield txn.send(self.as_api)
yield txn.complete(self.store) if sent:
# reset the backoff counter and retry immediately yield txn.complete(self.store)
self.backoff_counter = 1 # reset the backoff counter and retry immediately
yield self.retry() self.backoff_counter = 1
yield self.retry()
else:
self._backoff()
else: else:
# cap the backoff to be around 18h => (2^16) = 65536 secs self._set_service_recovered()
if self.backoff_counter < 16: except Exception as e:
self.backoff_counter += 1 logger.exception(e)
self.recover() self._backoff()
else:
self._set_service_recovered()
def _set_service_recovered(self): def _set_service_recovered(self):
self.callback(self) self.callback(self)