Merge pull request #106 from matrix-org/application-services-txn-reliability

Application services transaction reliability (PR #106)
This commit is contained in:
Kegsay 2015-03-26 10:30:47 +00:00
commit 5b999e206e
11 changed files with 1171 additions and 42 deletions

View File

@ -20,6 +20,50 @@ import re
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class ApplicationServiceState(object):
DOWN = "down"
UP = "up"
class AppServiceTransaction(object):
"""Represents an application service transaction."""
def __init__(self, service, id, events):
self.service = service
self.id = id
self.events = events
def send(self, as_api):
"""Sends this transaction using the provided AS API interface.
Args:
as_api(ApplicationServiceApi): The API to use to send.
Returns:
A Deferred which resolves to True if the transaction was sent.
"""
return as_api.push_bulk(
service=self.service,
events=self.events,
txn_id=self.id
)
def complete(self, store):
"""Completes this transaction as successful.
Marks this transaction ID on the application service and removes the
transaction contents from the database.
Args:
store: The database store to operate on.
Returns:
A Deferred which resolves to True if the transaction was completed.
"""
return store.complete_appservice_txn(
service=self.service,
txn_id=self.id
)
class ApplicationService(object): class ApplicationService(object):
"""Defines an application service. This definition is mostly what is """Defines an application service. This definition is mostly what is
provided to the /register AS API. provided to the /register AS API.
@ -35,13 +79,13 @@ class ApplicationService(object):
NS_LIST = [NS_USERS, NS_ALIASES, NS_ROOMS] NS_LIST = [NS_USERS, NS_ALIASES, NS_ROOMS]
def __init__(self, token, url=None, namespaces=None, hs_token=None, def __init__(self, token, url=None, namespaces=None, hs_token=None,
sender=None, txn_id=None): sender=None, id=None):
self.token = token self.token = token
self.url = url self.url = url
self.hs_token = hs_token self.hs_token = hs_token
self.sender = sender self.sender = sender
self.namespaces = self._check_namespaces(namespaces) self.namespaces = self._check_namespaces(namespaces)
self.txn_id = txn_id self.id = id
def _check_namespaces(self, namespaces): def _check_namespaces(self, namespaces):
# Sanity check that it is of the form: # Sanity check that it is of the form:

View File

@ -72,14 +72,19 @@ class ApplicationServiceApi(SimpleHttpClient):
defer.returnValue(False) defer.returnValue(False)
@defer.inlineCallbacks @defer.inlineCallbacks
def push_bulk(self, service, events): def push_bulk(self, service, events, txn_id=None):
events = self._serialize(events) events = self._serialize(events)
if txn_id is None:
logger.warning("push_bulk: Missing txn ID sending events to %s",
service.url)
txn_id = str(0)
txn_id = str(txn_id)
uri = service.url + ("/transactions/%s" % uri = service.url + ("/transactions/%s" %
urllib.quote(str(0))) # TODO txn_ids urllib.quote(txn_id))
response = None
try: try:
response = yield self.put_json( yield self.put_json(
uri=uri, uri=uri,
json_body={ json_body={
"events": events "events": events
@ -87,9 +92,8 @@ class ApplicationServiceApi(SimpleHttpClient):
args={ args={
"access_token": service.hs_token "access_token": service.hs_token
}) })
if response: # just an empty json object
# TODO: Mark txn as sent successfully
defer.returnValue(True) defer.returnValue(True)
return
except CodeMessageException as e: except CodeMessageException as e:
logger.warning("push_bulk to %s received %s", uri, e.code) logger.warning("push_bulk to %s received %s", uri, e.code)
except Exception as ex: except Exception as ex:
@ -97,8 +101,8 @@ class ApplicationServiceApi(SimpleHttpClient):
defer.returnValue(False) defer.returnValue(False)
@defer.inlineCallbacks @defer.inlineCallbacks
def push(self, service, event): def push(self, service, event, txn_id=None):
response = yield self.push_bulk(service, [event]) response = yield self.push_bulk(service, [event], txn_id)
defer.returnValue(response) defer.returnValue(response)
def _serialize(self, events): def _serialize(self, events):

View File

@ -0,0 +1,254 @@
# -*- coding: utf-8 -*-
# Copyright 2015 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module controls the reliability for application service transactions.
The nominal flow through this module looks like:
__________
1---ASa[e]-->| Service |--> Queue ASa[f]
2----ASb[e]->| Queuer |
3--ASa[f]--->|__________|-----------+ ASa[e], ASb[e]
V
-````````- +------------+
|````````|<--StoreTxn-|Transaction |
|Database| | Controller |---> SEND TO AS
`--------` +------------+
What happens on SEND TO AS depends on the state of the Application Service:
- If the AS is marked as DOWN, do nothing.
- If the AS is marked as UP, send the transaction.
* SUCCESS : Increment where the AS is up to txn-wise and nuke the txn
contents from the db.
* FAILURE : Marked AS as DOWN and start Recoverer.
Recoverer attempts to recover ASes who have died. The flow for this looks like:
,--------------------- backoff++ --------------.
V |
START ---> Wait exp ------> Get oldest txn ID from ----> FAILURE
backoff DB and try to send it
^ |___________
Mark AS as | V
UP & quit +---------- YES SUCCESS
| | |
NO <--- Have more txns? <------ Mark txn success & nuke <-+
from db; incr AS pos.
Reset backoff.
This is all tied together by the AppServiceScheduler which DIs the required
components.
"""
from synapse.appservice import ApplicationServiceState
from twisted.internet import defer
import logging
logger = logging.getLogger(__name__)
class AppServiceScheduler(object):
""" Public facing API for this module. Does the required DI to tie the
components together. This also serves as the "event_pool", which in this
case is a simple array.
"""
def __init__(self, clock, store, as_api):
self.clock = clock
self.store = store
self.as_api = as_api
def create_recoverer(service, callback):
return _Recoverer(clock, store, as_api, service, callback)
self.txn_ctrl = _TransactionController(
clock, store, as_api, create_recoverer
)
self.queuer = _ServiceQueuer(self.txn_ctrl)
@defer.inlineCallbacks
def start(self):
logger.info("Starting appservice scheduler")
# check for any DOWN ASes and start recoverers for them.
recoverers = yield _Recoverer.start(
self.clock, self.store, self.as_api, self.txn_ctrl.on_recovered
)
self.txn_ctrl.add_recoverers(recoverers)
def submit_event_for_as(self, service, event):
self.queuer.enqueue(service, event)
class _ServiceQueuer(object):
"""Queues events for the same application service together, sending
transactions as soon as possible. Once a transaction is sent successfully,
this schedules any other events in the queue to run.
"""
def __init__(self, txn_ctrl):
self.queued_events = {} # dict of {service_id: [events]}
self.pending_requests = {} # dict of {service_id: Deferred}
self.txn_ctrl = txn_ctrl
def enqueue(self, service, event):
# if this service isn't being sent something
if not self.pending_requests.get(service.id):
self._send_request(service, [event])
else:
# add to queue for this service
if service.id not in self.queued_events:
self.queued_events[service.id] = []
self.queued_events[service.id].append(event)
def _send_request(self, service, events):
# send request and add callbacks
d = self.txn_ctrl.send(service, events)
d.addBoth(self._on_request_finish)
d.addErrback(self._on_request_fail)
self.pending_requests[service.id] = d
def _on_request_finish(self, service):
self.pending_requests[service.id] = None
# if there are queued events, then send them.
if (service.id in self.queued_events
and len(self.queued_events[service.id]) > 0):
self._send_request(service, self.queued_events[service.id])
self.queued_events[service.id] = []
def _on_request_fail(self, err):
logger.error("AS request failed: %s", err)
class _TransactionController(object):
def __init__(self, clock, store, as_api, recoverer_fn):
self.clock = clock
self.store = store
self.as_api = as_api
self.recoverer_fn = recoverer_fn
# keep track of how many recoverers there are
self.recoverers = []
@defer.inlineCallbacks
def send(self, service, events):
try:
txn = yield self.store.create_appservice_txn(
service=service,
events=events
)
service_is_up = yield self._is_service_up(service)
if service_is_up:
sent = yield txn.send(self.as_api)
if sent:
txn.complete(self.store)
else:
self._start_recoverer(service)
except Exception as e:
logger.exception(e)
self._start_recoverer(service)
# request has finished
defer.returnValue(service)
@defer.inlineCallbacks
def on_recovered(self, recoverer):
self.recoverers.remove(recoverer)
logger.info("Successfully recovered application service AS ID %s",
recoverer.service.id)
logger.info("Remaining active recoverers: %s", len(self.recoverers))
yield self.store.set_appservice_state(
recoverer.service,
ApplicationServiceState.UP
)
def add_recoverers(self, recoverers):
for r in recoverers:
self.recoverers.append(r)
if len(recoverers) > 0:
logger.info("New active recoverers: %s", len(self.recoverers))
@defer.inlineCallbacks
def _start_recoverer(self, service):
yield self.store.set_appservice_state(
service,
ApplicationServiceState.DOWN
)
logger.info(
"Application service falling behind. Starting recoverer. AS ID %s",
service.id
)
recoverer = self.recoverer_fn(service, self.on_recovered)
self.add_recoverers([recoverer])
recoverer.recover()
@defer.inlineCallbacks
def _is_service_up(self, service):
state = yield self.store.get_appservice_state(service)
defer.returnValue(state == ApplicationServiceState.UP or state is None)
class _Recoverer(object):
@staticmethod
@defer.inlineCallbacks
def start(clock, store, as_api, callback):
services = yield store.get_appservices_by_state(
ApplicationServiceState.DOWN
)
recoverers = [
_Recoverer(clock, store, as_api, s, callback) for s in services
]
for r in recoverers:
logger.info("Starting recoverer for AS ID %s which was marked as "
"DOWN", r.service.id)
r.recover()
defer.returnValue(recoverers)
def __init__(self, clock, store, as_api, service, callback):
self.clock = clock
self.store = store
self.as_api = as_api
self.service = service
self.callback = callback
self.backoff_counter = 1
def recover(self):
self.clock.call_later((2 ** self.backoff_counter), self.retry)
def _backoff(self):
# cap the backoff to be around 18h => (2^16) = 65536 secs
if self.backoff_counter < 16:
self.backoff_counter += 1
self.recover()
@defer.inlineCallbacks
def retry(self):
try:
txn = yield self.store.get_oldest_unsent_txn(self.service)
if txn:
logger.info("Retrying transaction %s for AS ID %s",
txn.id, txn.service.id)
sent = yield txn.send(self.as_api)
if sent:
yield txn.complete(self.store)
# reset the backoff counter and retry immediately
self.backoff_counter = 1
yield self.retry()
else:
self._backoff()
else:
self._set_service_recovered()
except Exception as e:
logger.exception(e)
self._backoff()
def _set_service_recovered(self):
self.callback(self)

View File

@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from synapse.appservice.scheduler import AppServiceScheduler
from synapse.appservice.api import ApplicationServiceApi from synapse.appservice.api import ApplicationServiceApi
from .register import RegistrationHandler from .register import RegistrationHandler
from .room import ( from .room import (
@ -54,7 +55,12 @@ class Handlers(object):
self.directory_handler = DirectoryHandler(hs) self.directory_handler = DirectoryHandler(hs)
self.typing_notification_handler = TypingNotificationHandler(hs) self.typing_notification_handler = TypingNotificationHandler(hs)
self.admin_handler = AdminHandler(hs) self.admin_handler = AdminHandler(hs)
asapi = ApplicationServiceApi(hs)
self.appservice_handler = ApplicationServicesHandler( self.appservice_handler = ApplicationServicesHandler(
hs, ApplicationServiceApi(hs) hs, asapi, AppServiceScheduler(
clock=hs.get_clock(),
store=hs.get_datastore(),
as_api=asapi
)
) )
self.sync_handler = SyncHandler(hs) self.sync_handler = SyncHandler(hs)

View File

@ -26,15 +26,28 @@ import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def log_failure(failure):
logger.error(
"Application Services Failure",
exc_info=(
failure.type,
failure.value,
failure.getTracebackObject()
)
)
# NB: Purposefully not inheriting BaseHandler since that contains way too much # NB: Purposefully not inheriting BaseHandler since that contains way too much
# setup code which this handler does not need or use. This makes testing a lot # setup code which this handler does not need or use. This makes testing a lot
# easier. # easier.
class ApplicationServicesHandler(object): class ApplicationServicesHandler(object):
def __init__(self, hs, appservice_api): def __init__(self, hs, appservice_api, appservice_scheduler):
self.store = hs.get_datastore() self.store = hs.get_datastore()
self.hs = hs self.hs = hs
self.appservice_api = appservice_api self.appservice_api = appservice_api
self.scheduler = appservice_scheduler
self.started_scheduler = False
@defer.inlineCallbacks @defer.inlineCallbacks
def register(self, app_service): def register(self, app_service):
@ -46,13 +59,13 @@ class ApplicationServicesHandler(object):
) )
if not stored_service: if not stored_service:
raise StoreError(404, "Application service not found") raise StoreError(404, "Application service not found")
app_service.id = stored_service.id
except StoreError: except StoreError:
raise SynapseError( raise SynapseError(
403, "Unrecognised application services token. " 403, "Unrecognised application services token. "
"Consult the home server admin.", "Consult the home server admin.",
errcode=Codes.FORBIDDEN errcode=Codes.FORBIDDEN
) )
app_service.hs_token = self._generate_hs_token() app_service.hs_token = self._generate_hs_token()
# create a sender for this application service which is used when # create a sender for this application service which is used when
@ -90,9 +103,13 @@ class ApplicationServicesHandler(object):
if event.type == EventTypes.Member: if event.type == EventTypes.Member:
yield self._check_user_exists(event.state_key) yield self._check_user_exists(event.state_key)
# Fork off pushes to these services - XXX First cut, best effort if not self.started_scheduler:
self.scheduler.start().addErrback(log_failure)
self.started_scheduler = True
# Fork off pushes to these services
for service in services: for service in services:
self.appservice_api.push(service, event) self.scheduler.submit_event_for_as(service, event)
@defer.inlineCallbacks @defer.inlineCallbacks
def query_user_exists(self, user_id): def query_user_exists(self, user_id):

View File

@ -14,9 +14,10 @@
# limitations under the License. # limitations under the License.
from twisted.internet import defer from twisted.internet import defer
from .appservice import (
ApplicationServiceStore, ApplicationServiceTransactionStore
)
from ._base import Cache from ._base import Cache
from .appservice import ApplicationServiceStore
from .directory import DirectoryStore from .directory import DirectoryStore
from .events import EventsStore from .events import EventsStore
from .presence import PresenceStore from .presence import PresenceStore
@ -50,7 +51,7 @@ logger = logging.getLogger(__name__)
# Remember to update this number every time a change is made to database # Remember to update this number every time a change is made to database
# schema files, so the users will be informed on server restarts. # schema files, so the users will be informed on server restarts.
SCHEMA_VERSION = 14 SCHEMA_VERSION = 15
dir_path = os.path.abspath(os.path.dirname(__file__)) dir_path = os.path.abspath(os.path.dirname(__file__))
@ -71,6 +72,7 @@ class DataStore(RoomMemberStore, RoomStore,
FilteringStore, FilteringStore,
PusherStore, PusherStore,
PushRuleStore, PushRuleStore,
ApplicationServiceTransactionStore,
EventsStore, EventsStore,
): ):

View File

@ -13,13 +13,13 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import logging import logging
import simplejson
from simplejson import JSONDecodeError from simplejson import JSONDecodeError
import simplejson as json
from twisted.internet import defer from twisted.internet import defer
from synapse.api.constants import Membership from synapse.api.constants import Membership
from synapse.api.errors import StoreError from synapse.api.errors import StoreError
from synapse.appservice import ApplicationService from synapse.appservice import ApplicationService, AppServiceTransaction
from synapse.storage.roommember import RoomsForUser from synapse.storage.roommember import RoomsForUser
from ._base import SQLBaseStore from ._base import SQLBaseStore
@ -37,7 +37,7 @@ class ApplicationServiceStore(SQLBaseStore):
def __init__(self, hs): def __init__(self, hs):
super(ApplicationServiceStore, self).__init__(hs) super(ApplicationServiceStore, self).__init__(hs)
self.services_cache = [] self.services_cache = []
self.cache_defer = self._populate_cache() self.cache_defer = self._populate_appservice_cache()
self.cache_defer.addErrback(log_failure) self.cache_defer.addErrback(log_failure)
@defer.inlineCallbacks @defer.inlineCallbacks
@ -101,11 +101,12 @@ class ApplicationServiceStore(SQLBaseStore):
if not service.hs_token: if not service.hs_token:
raise StoreError(500, "No HS token") raise StoreError(500, "No HS token")
yield self.runInteraction( as_id = yield self.runInteraction(
"update_app_service", "update_app_service",
self._update_app_service_txn, self._update_app_service_txn,
service service
) )
service.id = as_id
# update cache TODO: Should this be in the txn? # update cache TODO: Should this be in the txn?
for (index, cache_service) in enumerate(self.services_cache): for (index, cache_service) in enumerate(self.services_cache):
@ -124,7 +125,7 @@ class ApplicationServiceStore(SQLBaseStore):
"update_app_service_txn: Failed to find as_id for token=", "update_app_service_txn: Failed to find as_id for token=",
service.token service.token
) )
return False return
txn.execute( txn.execute(
"UPDATE application_services SET url=?, hs_token=?, sender=? " "UPDATE application_services SET url=?, hs_token=?, sender=? "
@ -142,9 +143,9 @@ class ApplicationServiceStore(SQLBaseStore):
txn.execute( txn.execute(
"INSERT INTO application_services_regex(" "INSERT INTO application_services_regex("
"as_id, namespace, regex) values(?,?,?)", "as_id, namespace, regex) values(?,?,?)",
(as_id, ns_int, simplejson.dumps(regex_obj)) (as_id, ns_int, json.dumps(regex_obj))
) )
return True return as_id
def _get_as_id_txn(self, txn, token): def _get_as_id_txn(self, txn, token):
cursor = txn.execute( cursor = txn.execute(
@ -277,12 +278,7 @@ class ApplicationServiceStore(SQLBaseStore):
return rooms_for_user_matching_user_id return rooms_for_user_matching_user_id
@defer.inlineCallbacks def _parse_services_dict(self, results):
def _populate_cache(self):
"""Populates the ApplicationServiceCache from the database."""
sql = ("SELECT * FROM application_services LEFT JOIN "
"application_services_regex ON application_services.id = "
"application_services_regex.as_id")
# SQL results in the form: # SQL results in the form:
# [ # [
# { # {
@ -296,12 +292,14 @@ class ApplicationServiceStore(SQLBaseStore):
# } # }
# ] # ]
services = {} services = {}
results = yield self._execute_and_decode("_populate_cache", sql)
for res in results: for res in results:
as_token = res["token"] as_token = res["token"]
if as_token is None:
continue
if as_token not in services: if as_token not in services:
# add the service # add the service
services[as_token] = { services[as_token] = {
"id": res["id"],
"url": res["url"], "url": res["url"],
"token": as_token, "token": as_token,
"hs_token": res["hs_token"], "hs_token": res["hs_token"],
@ -319,20 +317,232 @@ class ApplicationServiceStore(SQLBaseStore):
try: try:
services[as_token]["namespaces"][ services[as_token]["namespaces"][
ApplicationService.NS_LIST[ns_int]].append( ApplicationService.NS_LIST[ns_int]].append(
simplejson.loads(res["regex"]) json.loads(res["regex"])
) )
except IndexError: except IndexError:
logger.error("Bad namespace enum '%s'. %s", ns_int, res) logger.error("Bad namespace enum '%s'. %s", ns_int, res)
except JSONDecodeError: except JSONDecodeError:
logger.error("Bad regex object '%s'", res["regex"]) logger.error("Bad regex object '%s'", res["regex"])
# TODO get last successful txn id f.e. service service_list = []
for service in services.values(): for service in services.values():
logger.info("Found application service: %s", service) service_list.append(ApplicationService(
self.services_cache.append(ApplicationService(
token=service["token"], token=service["token"],
url=service["url"], url=service["url"],
namespaces=service["namespaces"], namespaces=service["namespaces"],
hs_token=service["hs_token"], hs_token=service["hs_token"],
sender=service["sender"] sender=service["sender"],
id=service["id"]
)) ))
return service_list
@defer.inlineCallbacks
def _populate_appservice_cache(self):
"""Populates the ApplicationServiceCache from the database."""
sql = ("SELECT r.*, a.* FROM application_services AS a LEFT JOIN "
"application_services_regex AS r ON a.id = r.as_id")
results = yield self._execute_and_decode("appservice_cache", sql)
services = self._parse_services_dict(results)
for service in services:
logger.info("Found application service: %s", service)
self.services_cache.append(service)
class ApplicationServiceTransactionStore(SQLBaseStore):
def __init__(self, hs):
super(ApplicationServiceTransactionStore, self).__init__(hs)
@defer.inlineCallbacks
def get_appservices_by_state(self, state):
"""Get a list of application services based on their state.
Args:
state(ApplicationServiceState): The state to filter on.
Returns:
A Deferred which resolves to a list of ApplicationServices, which
may be empty.
"""
sql = (
"SELECT r.*, a.* FROM application_services_state AS s LEFT JOIN"
" application_services AS a ON a.id=s.as_id LEFT JOIN"
" application_services_regex AS r ON r.as_id=a.id WHERE state = ?"
)
results = yield self._execute_and_decode(
"get_appservices_by_state", sql, state
)
# NB: This assumes this class is linked with ApplicationServiceStore
defer.returnValue(self._parse_services_dict(results))
@defer.inlineCallbacks
def get_appservice_state(self, service):
"""Get the application service state.
Args:
service(ApplicationService): The service whose state to set.
Returns:
A Deferred which resolves to ApplicationServiceState.
"""
result = yield self._simple_select_one(
"application_services_state",
dict(as_id=service.id),
["state"],
allow_none=True
)
if result:
defer.returnValue(result.get("state"))
return
defer.returnValue(None)
def set_appservice_state(self, service, state):
"""Set the application service state.
Args:
service(ApplicationService): The service whose state to set.
state(ApplicationServiceState): The connectivity state to apply.
Returns:
A Deferred which resolves when the state was set successfully.
"""
return self._simple_upsert(
"application_services_state",
dict(as_id=service.id),
dict(state=state)
)
def create_appservice_txn(self, service, events):
"""Atomically creates a new transaction for this application service
with the given list of events.
Args:
service(ApplicationService): The service who the transaction is for.
events(list<Event>): A list of events to put in the transaction.
Returns:
AppServiceTransaction: A new transaction.
"""
return self.runInteraction(
"create_appservice_txn",
self._create_appservice_txn,
service, events
)
def _create_appservice_txn(self, txn, service, events):
# work out new txn id (highest txn id for this service += 1)
# The highest id may be the last one sent (in which case it is last_txn)
# or it may be the highest in the txns list (which are waiting to be/are
# being sent)
last_txn_id = self._get_last_txn(txn, service.id)
result = txn.execute(
"SELECT MAX(txn_id) FROM application_services_txns WHERE as_id=?",
(service.id,)
)
highest_txn_id = result.fetchone()[0]
if highest_txn_id is None:
highest_txn_id = 0
new_txn_id = max(highest_txn_id, last_txn_id) + 1
# Insert new txn into txn table
event_ids = [e.event_id for e in events]
txn.execute(
"INSERT INTO application_services_txns(as_id, txn_id, event_ids) "
"VALUES(?,?,?)",
(service.id, new_txn_id, json.dumps(event_ids))
)
return AppServiceTransaction(
service=service, id=new_txn_id, events=events
)
def complete_appservice_txn(self, txn_id, service):
"""Completes an application service transaction.
Args:
txn_id(str): The transaction ID being completed.
service(ApplicationService): The application service which was sent
this transaction.
Returns:
A Deferred which resolves if this transaction was stored
successfully.
"""
return self.runInteraction(
"complete_appservice_txn",
self._complete_appservice_txn,
txn_id, service
)
def _complete_appservice_txn(self, txn, txn_id, service):
txn_id = int(txn_id)
# Debugging query: Make sure the txn being completed is EXACTLY +1 from
# what was there before. If it isn't, we've got problems (e.g. the AS
# has probably missed some events), so whine loudly but still continue,
# since it shouldn't fail completion of the transaction.
last_txn_id = self._get_last_txn(txn, service.id)
if (last_txn_id + 1) != txn_id:
logger.error(
"appservice: Completing a transaction which has an ID > 1 from "
"the last ID sent to this AS. We've either dropped events or "
"sent it to the AS out of order. FIX ME. last_txn=%s "
"completing_txn=%s service_id=%s", last_txn_id, txn_id,
service.id
)
# Set current txn_id for AS to 'txn_id'
self._simple_upsert_txn(
txn, "application_services_state", dict(as_id=service.id),
dict(last_txn=txn_id)
)
# Delete txn
self._simple_delete_txn(
txn, "application_services_txns",
dict(txn_id=txn_id, as_id=service.id)
)
def get_oldest_unsent_txn(self, service):
"""Get the oldest transaction which has not been sent for this
service.
Args:
service(ApplicationService): The app service to get the oldest txn.
Returns:
A Deferred which resolves to an AppServiceTransaction or
None.
"""
return self.runInteraction(
"get_oldest_unsent_appservice_txn",
self._get_oldest_unsent_txn,
service
)
def _get_oldest_unsent_txn(self, txn, service):
# Monotonically increasing txn ids, so just select the smallest
# one in the txns table (we delete them when they are sent)
result = txn.execute(
"SELECT MIN(txn_id), * FROM application_services_txns WHERE as_id=?",
(service.id,)
)
entry = self.cursor_to_dict(result)[0]
if not entry or entry["txn_id"] is None:
# the min(txn_id) part will force a row, so entry may not be None
return None
event_ids = json.loads(entry["event_ids"])
events = self._get_events_txn(txn, event_ids)
return AppServiceTransaction(
service=service, id=entry["txn_id"], events=events
)
def _get_last_txn(self, txn, service_id):
result = txn.execute(
"SELECT last_txn FROM application_services_state WHERE as_id=?",
(service_id,)
)
last_txn_id = result.fetchone()
if last_txn_id is None or last_txn_id[0] is None: # no row exists
return 0
else:
return int(last_txn_id[0]) # select 'last_txn' col

View File

@ -0,0 +1,31 @@
/* Copyright 2015 OpenMarket Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
CREATE TABLE IF NOT EXISTS application_services_state(
as_id INTEGER PRIMARY KEY,
state TEXT,
last_txn TEXT,
FOREIGN KEY(as_id) REFERENCES application_services(id)
);
CREATE TABLE IF NOT EXISTS application_services_txns(
as_id INTEGER NOT NULL,
txn_id INTEGER NOT NULL,
event_ids TEXT NOT NULL,
UNIQUE(as_id, txn_id) ON CONFLICT ROLLBACK
);

View File

@ -0,0 +1,252 @@
# -*- coding: utf-8 -*-
# Copyright 2015 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from synapse.appservice import ApplicationServiceState, AppServiceTransaction
from synapse.appservice.scheduler import (
_ServiceQueuer, _TransactionController, _Recoverer
)
from twisted.internet import defer
from ..utils import MockClock
from mock import Mock
from tests import unittest
class ApplicationServiceSchedulerTransactionCtrlTestCase(unittest.TestCase):
def setUp(self):
self.clock = MockClock()
self.store = Mock()
self.as_api = Mock()
self.recoverer = Mock()
self.recoverer_fn = Mock(return_value=self.recoverer)
self.txnctrl = _TransactionController(
clock=self.clock, store=self.store, as_api=self.as_api,
recoverer_fn=self.recoverer_fn
)
def test_single_service_up_txn_sent(self):
# Test: The AS is up and the txn is successfully sent.
service = Mock()
events = [Mock(), Mock()]
txn_id = "foobar"
txn = Mock(id=txn_id, service=service, events=events)
# mock methods
self.store.get_appservice_state = Mock(
return_value=defer.succeed(ApplicationServiceState.UP)
)
txn.send = Mock(return_value=defer.succeed(True))
self.store.create_appservice_txn = Mock(
return_value=defer.succeed(txn)
)
# actual call
self.txnctrl.send(service, events)
self.store.create_appservice_txn.assert_called_once_with(
service=service, events=events # txn made and saved
)
self.assertEquals(0, len(self.txnctrl.recoverers)) # no recoverer made
txn.complete.assert_called_once_with(self.store) # txn completed
def test_single_service_down(self):
# Test: The AS is down so it shouldn't push; Recoverers will do it.
# It should still make a transaction though.
service = Mock()
events = [Mock(), Mock()]
txn = Mock(id="idhere", service=service, events=events)
self.store.get_appservice_state = Mock(
return_value=defer.succeed(ApplicationServiceState.DOWN)
)
self.store.create_appservice_txn = Mock(
return_value=defer.succeed(txn)
)
# actual call
self.txnctrl.send(service, events)
self.store.create_appservice_txn.assert_called_once_with(
service=service, events=events # txn made and saved
)
self.assertEquals(0, txn.send.call_count) # txn not sent though
self.assertEquals(0, txn.complete.call_count) # or completed
def test_single_service_up_txn_not_sent(self):
# Test: The AS is up and the txn is not sent. A Recoverer is made and
# started.
service = Mock()
events = [Mock(), Mock()]
txn_id = "foobar"
txn = Mock(id=txn_id, service=service, events=events)
# mock methods
self.store.get_appservice_state = Mock(
return_value=defer.succeed(ApplicationServiceState.UP)
)
self.store.set_appservice_state = Mock(return_value=defer.succeed(True))
txn.send = Mock(return_value=defer.succeed(False)) # fails to send
self.store.create_appservice_txn = Mock(
return_value=defer.succeed(txn)
)
# actual call
self.txnctrl.send(service, events)
self.store.create_appservice_txn.assert_called_once_with(
service=service, events=events
)
self.assertEquals(1, self.recoverer_fn.call_count) # recoverer made
self.assertEquals(1, self.recoverer.recover.call_count) # and invoked
self.assertEquals(1, len(self.txnctrl.recoverers)) # and stored
self.assertEquals(0, txn.complete.call_count) # txn not completed
self.store.set_appservice_state.assert_called_once_with(
service, ApplicationServiceState.DOWN # service marked as down
)
class ApplicationServiceSchedulerRecovererTestCase(unittest.TestCase):
def setUp(self):
self.clock = MockClock()
self.as_api = Mock()
self.store = Mock()
self.service = Mock()
self.callback = Mock()
self.recoverer = _Recoverer(
clock=self.clock,
as_api=self.as_api,
store=self.store,
service=self.service,
callback=self.callback,
)
def test_recover_single_txn(self):
txn = Mock()
# return one txn to send, then no more old txns
txns = [txn, None]
def take_txn(*args, **kwargs):
return defer.succeed(txns.pop(0))
self.store.get_oldest_unsent_txn = Mock(side_effect=take_txn)
self.recoverer.recover()
# shouldn't have called anything prior to waiting for exp backoff
self.assertEquals(0, self.store.get_oldest_unsent_txn.call_count)
txn.send = Mock(return_value=True)
# wait for exp backoff
self.clock.advance_time(2)
self.assertEquals(1, txn.send.call_count)
self.assertEquals(1, txn.complete.call_count)
# 2 because it needs to get None to know there are no more txns
self.assertEquals(2, self.store.get_oldest_unsent_txn.call_count)
self.callback.assert_called_once_with(self.recoverer)
self.assertEquals(self.recoverer.service, self.service)
def test_recover_retry_txn(self):
txn = Mock()
txns = [txn, None]
pop_txn = False
def take_txn(*args, **kwargs):
if pop_txn:
return defer.succeed(txns.pop(0))
else:
return defer.succeed(txn)
self.store.get_oldest_unsent_txn = Mock(side_effect=take_txn)
self.recoverer.recover()
self.assertEquals(0, self.store.get_oldest_unsent_txn.call_count)
txn.send = Mock(return_value=False)
self.clock.advance_time(2)
self.assertEquals(1, txn.send.call_count)
self.assertEquals(0, txn.complete.call_count)
self.assertEquals(0, self.callback.call_count)
self.clock.advance_time(4)
self.assertEquals(2, txn.send.call_count)
self.assertEquals(0, txn.complete.call_count)
self.assertEquals(0, self.callback.call_count)
self.clock.advance_time(8)
self.assertEquals(3, txn.send.call_count)
self.assertEquals(0, txn.complete.call_count)
self.assertEquals(0, self.callback.call_count)
txn.send = Mock(return_value=True) # successfully send the txn
pop_txn = True # returns the txn the first time, then no more.
self.clock.advance_time(16)
self.assertEquals(1, txn.send.call_count) # new mock reset call count
self.assertEquals(1, txn.complete.call_count)
self.callback.assert_called_once_with(self.recoverer)
class ApplicationServiceSchedulerQueuerTestCase(unittest.TestCase):
def setUp(self):
self.txn_ctrl = Mock()
self.queuer = _ServiceQueuer(self.txn_ctrl)
def test_send_single_event_no_queue(self):
# Expect the event to be sent immediately.
service = Mock(id=4)
event = Mock()
self.queuer.enqueue(service, event)
self.txn_ctrl.send.assert_called_once_with(service, [event])
def test_send_single_event_with_queue(self):
d = defer.Deferred()
self.txn_ctrl.send = Mock(return_value=d)
service = Mock(id=4)
event = Mock(event_id="first")
event2 = Mock(event_id="second")
event3 = Mock(event_id="third")
# Send an event and don't resolve it just yet.
self.queuer.enqueue(service, event)
# Send more events: expect send() to NOT be called multiple times.
self.queuer.enqueue(service, event2)
self.queuer.enqueue(service, event3)
self.txn_ctrl.send.assert_called_with(service, [event])
self.assertEquals(1, self.txn_ctrl.send.call_count)
# Resolve the send event: expect the queued events to be sent
d.callback(service)
self.txn_ctrl.send.assert_called_with(service, [event2, event3])
self.assertEquals(2, self.txn_ctrl.send.call_count)
def test_multiple_service_queues(self):
# Tests that each service has its own queue, and that they don't block
# on each other.
srv1 = Mock(id=4)
srv_1_defer = defer.Deferred()
srv_1_event = Mock(event_id="srv1a")
srv_1_event2 = Mock(event_id="srv1b")
srv2 = Mock(id=6)
srv_2_defer = defer.Deferred()
srv_2_event = Mock(event_id="srv2a")
srv_2_event2 = Mock(event_id="srv2b")
send_return_list = [srv_1_defer, srv_2_defer]
self.txn_ctrl.send = Mock(side_effect=lambda x,y: send_return_list.pop(0))
# send events for different ASes and make sure they are sent
self.queuer.enqueue(srv1, srv_1_event)
self.queuer.enqueue(srv1, srv_1_event2)
self.txn_ctrl.send.assert_called_with(srv1, [srv_1_event])
self.queuer.enqueue(srv2, srv_2_event)
self.queuer.enqueue(srv2, srv_2_event2)
self.txn_ctrl.send.assert_called_with(srv2, [srv_2_event])
# make sure callbacks for a service only send queued events for THAT
# service
srv_2_defer.callback(srv2)
self.txn_ctrl.send.assert_called_with(srv2, [srv_2_event2])
self.assertEquals(3, self.txn_ctrl.send.call_count)

View File

@ -27,10 +27,11 @@ class AppServiceHandlerTestCase(unittest.TestCase):
def setUp(self): def setUp(self):
self.mock_store = Mock() self.mock_store = Mock()
self.mock_as_api = Mock() self.mock_as_api = Mock()
self.mock_scheduler = Mock()
hs = Mock() hs = Mock()
hs.get_datastore = Mock(return_value=self.mock_store) hs.get_datastore = Mock(return_value=self.mock_store)
self.handler = ApplicationServicesHandler( self.handler = ApplicationServicesHandler(
hs, self.mock_as_api hs, self.mock_as_api, self.mock_scheduler
) )
@defer.inlineCallbacks @defer.inlineCallbacks
@ -52,7 +53,9 @@ class AppServiceHandlerTestCase(unittest.TestCase):
) )
self.mock_as_api.push = Mock() self.mock_as_api.push = Mock()
yield self.handler.notify_interested_services(event) yield self.handler.notify_interested_services(event)
self.mock_as_api.push.assert_called_once_with(interested_service, event) self.mock_scheduler.submit_event_for_as.assert_called_once_with(
interested_service, event
)
@defer.inlineCallbacks @defer.inlineCallbacks
def test_query_room_alias_exists(self): def test_query_room_alias_exists(self):

View File

@ -15,10 +15,13 @@
from tests import unittest from tests import unittest
from twisted.internet import defer from twisted.internet import defer
from synapse.appservice import ApplicationService from synapse.appservice import ApplicationService, ApplicationServiceState
from synapse.server import HomeServer from synapse.server import HomeServer
from synapse.storage.appservice import ApplicationServiceStore from synapse.storage.appservice import (
ApplicationServiceStore, ApplicationServiceTransactionStore
)
import json
from mock import Mock from mock import Mock
from tests.utils import SQLiteMemoryDbPool, MockClock from tests.utils import SQLiteMemoryDbPool, MockClock
@ -114,3 +117,306 @@ class ApplicationServiceStoreTestCase(unittest.TestCase):
def test_retrieval_of_all_services(self): def test_retrieval_of_all_services(self):
services = yield self.store.get_app_services() services = yield self.store.get_app_services()
self.assertEquals(len(services), 3) self.assertEquals(len(services), 3)
class ApplicationServiceTransactionStoreTestCase(unittest.TestCase):
@defer.inlineCallbacks
def setUp(self):
self.db_pool = SQLiteMemoryDbPool()
yield self.db_pool.prepare()
hs = HomeServer(
"test", db_pool=self.db_pool, clock=MockClock(), config=Mock()
)
self.as_list = [
{
"token": "token1",
"url": "https://matrix-as.org",
"id": 3
},
{
"token": "alpha_tok",
"url": "https://alpha.com",
"id": 5
},
{
"token": "beta_tok",
"url": "https://beta.com",
"id": 6
},
{
"token": "delta_tok",
"url": "https://delta.com",
"id": 7
},
]
for s in self.as_list:
yield self._add_service(s["id"], s["url"], s["token"])
self.store = TestTransactionStore(hs)
def _add_service(self, as_id, url, token):
return self.db_pool.runQuery(
"INSERT INTO application_services(id, url, token) VALUES(?,?,?)",
(as_id, url, token)
)
def _set_state(self, id, state, txn=None):
return self.db_pool.runQuery(
"INSERT INTO application_services_state(as_id, state, last_txn) "
"VALUES(?,?,?)",
(id, state, txn)
)
def _insert_txn(self, as_id, txn_id, events):
return self.db_pool.runQuery(
"INSERT INTO application_services_txns(as_id, txn_id, event_ids) "
"VALUES(?,?,?)",
(as_id, txn_id, json.dumps([e.event_id for e in events]))
)
def _set_last_txn(self, as_id, txn_id):
return self.db_pool.runQuery(
"INSERT INTO application_services_state(as_id, last_txn, state) "
"VALUES(?,?,?)",
(as_id, txn_id, ApplicationServiceState.UP)
)
@defer.inlineCallbacks
def test_get_appservice_state_none(self):
service = Mock(id=999)
state = yield self.store.get_appservice_state(service)
self.assertEquals(None, state)
@defer.inlineCallbacks
def test_get_appservice_state_up(self):
yield self._set_state(
self.as_list[0]["id"], ApplicationServiceState.UP
)
service = Mock(id=self.as_list[0]["id"])
state = yield self.store.get_appservice_state(service)
self.assertEquals(ApplicationServiceState.UP, state)
@defer.inlineCallbacks
def test_get_appservice_state_down(self):
yield self._set_state(
self.as_list[0]["id"], ApplicationServiceState.UP
)
yield self._set_state(
self.as_list[1]["id"], ApplicationServiceState.DOWN
)
yield self._set_state(
self.as_list[2]["id"], ApplicationServiceState.DOWN
)
service = Mock(id=self.as_list[1]["id"])
state = yield self.store.get_appservice_state(service)
self.assertEquals(ApplicationServiceState.DOWN, state)
@defer.inlineCallbacks
def test_get_appservices_by_state_none(self):
services = yield self.store.get_appservices_by_state(
ApplicationServiceState.DOWN
)
self.assertEquals(0, len(services))
@defer.inlineCallbacks
def test_set_appservices_state_down(self):
service = Mock(id=self.as_list[1]["id"])
yield self.store.set_appservice_state(
service,
ApplicationServiceState.DOWN
)
rows = yield self.db_pool.runQuery(
"SELECT as_id FROM application_services_state WHERE state=?",
(ApplicationServiceState.DOWN,)
)
self.assertEquals(service.id, rows[0][0])
@defer.inlineCallbacks
def test_set_appservices_state_multiple_up(self):
service = Mock(id=self.as_list[1]["id"])
yield self.store.set_appservice_state(
service,
ApplicationServiceState.UP
)
yield self.store.set_appservice_state(
service,
ApplicationServiceState.DOWN
)
yield self.store.set_appservice_state(
service,
ApplicationServiceState.UP
)
rows = yield self.db_pool.runQuery(
"SELECT as_id FROM application_services_state WHERE state=?",
(ApplicationServiceState.UP,)
)
self.assertEquals(service.id, rows[0][0])
@defer.inlineCallbacks
def test_create_appservice_txn_first(self):
service = Mock(id=self.as_list[0]["id"])
events = [Mock(event_id="e1"), Mock(event_id="e2")]
txn = yield self.store.create_appservice_txn(service, events)
self.assertEquals(txn.id, 1)
self.assertEquals(txn.events, events)
self.assertEquals(txn.service, service)
@defer.inlineCallbacks
def test_create_appservice_txn_older_last_txn(self):
service = Mock(id=self.as_list[0]["id"])
events = [Mock(event_id="e1"), Mock(event_id="e2")]
yield self._set_last_txn(service.id, 9643) # AS is falling behind
yield self._insert_txn(service.id, 9644, events)
yield self._insert_txn(service.id, 9645, events)
txn = yield self.store.create_appservice_txn(service, events)
self.assertEquals(txn.id, 9646)
self.assertEquals(txn.events, events)
self.assertEquals(txn.service, service)
@defer.inlineCallbacks
def test_create_appservice_txn_up_to_date_last_txn(self):
service = Mock(id=self.as_list[0]["id"])
events = [Mock(event_id="e1"), Mock(event_id="e2")]
yield self._set_last_txn(service.id, 9643)
txn = yield self.store.create_appservice_txn(service, events)
self.assertEquals(txn.id, 9644)
self.assertEquals(txn.events, events)
self.assertEquals(txn.service, service)
@defer.inlineCallbacks
def test_create_appservice_txn_up_fuzzing(self):
service = Mock(id=self.as_list[0]["id"])
events = [Mock(event_id="e1"), Mock(event_id="e2")]
yield self._set_last_txn(service.id, 9643)
# dump in rows with higher IDs to make sure the queries aren't wrong.
yield self._set_last_txn(self.as_list[1]["id"], 119643)
yield self._set_last_txn(self.as_list[2]["id"], 9)
yield self._set_last_txn(self.as_list[3]["id"], 9643)
yield self._insert_txn(self.as_list[1]["id"], 119644, events)
yield self._insert_txn(self.as_list[1]["id"], 119645, events)
yield self._insert_txn(self.as_list[1]["id"], 119646, events)
yield self._insert_txn(self.as_list[2]["id"], 10, events)
yield self._insert_txn(self.as_list[3]["id"], 9643, events)
txn = yield self.store.create_appservice_txn(service, events)
self.assertEquals(txn.id, 9644)
self.assertEquals(txn.events, events)
self.assertEquals(txn.service, service)
@defer.inlineCallbacks
def test_complete_appservice_txn_first_txn(self):
service = Mock(id=self.as_list[0]["id"])
events = [Mock(event_id="e1"), Mock(event_id="e2")]
txn_id = 1
yield self._insert_txn(service.id, txn_id, events)
yield self.store.complete_appservice_txn(txn_id=txn_id, service=service)
res = yield self.db_pool.runQuery(
"SELECT last_txn FROM application_services_state WHERE as_id=?",
(service.id,)
)
self.assertEquals(1, len(res))
self.assertEquals(str(txn_id), res[0][0])
res = yield self.db_pool.runQuery(
"SELECT * FROM application_services_txns WHERE txn_id=?",
(txn_id,)
)
self.assertEquals(0, len(res))
@defer.inlineCallbacks
def test_complete_appservice_txn_existing_in_state_table(self):
service = Mock(id=self.as_list[0]["id"])
events = [Mock(event_id="e1"), Mock(event_id="e2")]
txn_id = 5
yield self._set_last_txn(service.id, 4)
yield self._insert_txn(service.id, txn_id, events)
yield self.store.complete_appservice_txn(txn_id=txn_id, service=service)
res = yield self.db_pool.runQuery(
"SELECT last_txn, state FROM application_services_state WHERE "
"as_id=?",
(service.id,)
)
self.assertEquals(1, len(res))
self.assertEquals(str(txn_id), res[0][0])
self.assertEquals(ApplicationServiceState.UP, res[0][1])
res = yield self.db_pool.runQuery(
"SELECT * FROM application_services_txns WHERE txn_id=?",
(txn_id,)
)
self.assertEquals(0, len(res))
@defer.inlineCallbacks
def test_get_oldest_unsent_txn_none(self):
service = Mock(id=self.as_list[0]["id"])
txn = yield self.store.get_oldest_unsent_txn(service)
self.assertEquals(None, txn)
@defer.inlineCallbacks
def test_get_oldest_unsent_txn(self):
service = Mock(id=self.as_list[0]["id"])
events = [Mock(event_id="e1"), Mock(event_id="e2")]
other_events = [Mock(event_id="e5"), Mock(event_id="e6")]
# we aren't testing store._base stuff here, so mock this out
self.store._get_events_txn = Mock(return_value=events)
yield self._insert_txn(self.as_list[1]["id"], 9, other_events)
yield self._insert_txn(service.id, 10, events)
yield self._insert_txn(service.id, 11, other_events)
yield self._insert_txn(service.id, 12, other_events)
txn = yield self.store.get_oldest_unsent_txn(service)
self.assertEquals(service, txn.service)
self.assertEquals(10, txn.id)
self.assertEquals(events, txn.events)
@defer.inlineCallbacks
def test_get_appservices_by_state_single(self):
yield self._set_state(
self.as_list[0]["id"], ApplicationServiceState.DOWN
)
yield self._set_state(
self.as_list[1]["id"], ApplicationServiceState.UP
)
services = yield self.store.get_appservices_by_state(
ApplicationServiceState.DOWN
)
self.assertEquals(1, len(services))
self.assertEquals(self.as_list[0]["id"], services[0].id)
@defer.inlineCallbacks
def test_get_appservices_by_state_multiple(self):
yield self._set_state(
self.as_list[0]["id"], ApplicationServiceState.DOWN
)
yield self._set_state(
self.as_list[1]["id"], ApplicationServiceState.UP
)
yield self._set_state(
self.as_list[2]["id"], ApplicationServiceState.DOWN
)
yield self._set_state(
self.as_list[3]["id"], ApplicationServiceState.UP
)
services = yield self.store.get_appservices_by_state(
ApplicationServiceState.DOWN
)
self.assertEquals(2, len(services))
self.assertEquals(self.as_list[2]["id"], services[0].id)
self.assertEquals(self.as_list[0]["id"], services[1].id)
# required for ApplicationServiceTransactionStoreTestCase tests
class TestTransactionStore(ApplicationServiceTransactionStore,
ApplicationServiceStore):
def __init__(self, hs):
super(TestTransactionStore, self).__init__(hs)