Merge branch 'federation_client_retries' of github.com:matrix-org/synapse into develop

This commit is contained in:
Erik Johnston 2015-02-05 14:06:13 +00:00
commit 4996398858
8 changed files with 190 additions and 110 deletions

View File

@ -39,7 +39,7 @@ class Codes(object):
TOO_LARGE = "M_TOO_LARGE" TOO_LARGE = "M_TOO_LARGE"
class CodeMessageException(Exception): class CodeMessageException(RuntimeError):
"""An exception with integer code and message string attributes.""" """An exception with integer code and message string attributes."""
def __init__(self, code, msg): def __init__(self, code, msg):
@ -227,3 +227,9 @@ class FederationError(RuntimeError):
"affected": self.affected, "affected": self.affected,
"source": self.source if self.source else self.affected, "source": self.source if self.source else self.affected,
} }
class HttpResponseException(CodeMessageException):
def __init__(self, code, msg, response):
self.response = response
super(HttpResponseException, self).__init__(code, msg)

View File

@ -19,6 +19,7 @@ from twisted.internet import defer
from .federation_base import FederationBase from .federation_base import FederationBase
from .units import Edu from .units import Edu
from synapse.api.errors import CodeMessageException
from synapse.util.logutils import log_function from synapse.util.logutils import log_function
from synapse.events import FrozenEvent from synapse.events import FrozenEvent
@ -180,7 +181,8 @@ class FederationClient(FederationBase):
pdu = yield self._check_sigs_and_hash(pdu) pdu = yield self._check_sigs_and_hash(pdu)
break break
except CodeMessageException:
raise
except Exception as e: except Exception as e:
logger.info( logger.info(
"Failed to get PDU %s from %s because %s", "Failed to get PDU %s from %s because %s",
@ -251,7 +253,9 @@ class FederationClient(FederationBase):
defer.returnValue(signed_auth) defer.returnValue(signed_auth)
@defer.inlineCallbacks @defer.inlineCallbacks
def make_join(self, destination, room_id, user_id): def make_join(self, destinations, room_id, user_id):
for destination in destinations:
try:
ret = yield self.transport_layer.make_join( ret = yield self.transport_layer.make_join(
destination, room_id, user_id destination, room_id, user_id
) )
@ -260,10 +264,24 @@ class FederationClient(FederationBase):
logger.debug("Got response to make_join: %s", pdu_dict) logger.debug("Got response to make_join: %s", pdu_dict)
defer.returnValue(self.event_from_pdu_json(pdu_dict)) defer.returnValue(
(destination, self.event_from_pdu_json(pdu_dict))
)
break
except CodeMessageException:
raise
except Exception as e:
logger.warn(
"Failed to make_join via %s: %s",
destination, e.message
)
raise RuntimeError("Failed to send to any server.")
@defer.inlineCallbacks @defer.inlineCallbacks
def send_join(self, destination, pdu): def send_join(self, destinations, pdu):
for destination in destinations:
try:
time_now = self._clock.time_msec() time_now = self._clock.time_msec()
_, content = yield self.transport_layer.send_join( _, content = yield self.transport_layer.send_join(
destination=destination, destination=destination,
@ -297,7 +315,17 @@ class FederationClient(FederationBase):
defer.returnValue({ defer.returnValue({
"state": signed_state, "state": signed_state,
"auth_chain": signed_auth, "auth_chain": signed_auth,
"origin": destination,
}) })
except CodeMessageException:
raise
except Exception as e:
logger.warn(
"Failed to send_join via %s: %s",
destination, e.message
)
raise RuntimeError("Failed to send to any server.")
@defer.inlineCallbacks @defer.inlineCallbacks
def send_invite(self, destination, room_id, event_id, pdu): def send_invite(self, destination, room_id, event_id, pdu):

View File

@ -19,6 +19,7 @@ from twisted.internet import defer
from .persistence import TransactionActions from .persistence import TransactionActions
from .units import Transaction from .units import Transaction
from synapse.api.errors import HttpResponseException
from synapse.util.logutils import log_function from synapse.util.logutils import log_function
from synapse.util.logcontext import PreserveLoggingContext from synapse.util.logcontext import PreserveLoggingContext
@ -238,9 +239,14 @@ class TransactionQueue(object):
del p["age_ts"] del p["age_ts"]
return data return data
code, response = yield self.transport_layer.send_transaction( try:
response = yield self.transport_layer.send_transaction(
transaction, json_data_cb transaction, json_data_cb
) )
code = 200
except HttpResponseException as e:
code = e.code
response = e.response
logger.info("TX [%s] got %d response", destination, code) logger.info("TX [%s] got %d response", destination, code)
@ -274,8 +280,7 @@ class TransactionQueue(object):
pass pass
logger.debug("TX [%s] Yielded to callbacks", destination) logger.debug("TX [%s] Yielded to callbacks", destination)
except RuntimeError as e:
except Exception as e:
# We capture this here as there as nothing actually listens # We capture this here as there as nothing actually listens
# for this finishing functions deferred. # for this finishing functions deferred.
logger.warn( logger.warn(
@ -283,6 +288,14 @@ class TransactionQueue(object):
destination, destination,
e, e,
) )
except Exception as e:
# We capture this here as there as nothing actually listens
# for this finishing functions deferred.
logger.exception(
"TX [%s] Problem in _attempt_transaction: %s",
destination,
e,
)
self.set_retrying(destination, retry_interval) self.set_retrying(destination, retry_interval)

View File

@ -19,7 +19,6 @@ from synapse.api.urls import FEDERATION_PREFIX as PREFIX
from synapse.util.logutils import log_function from synapse.util.logutils import log_function
import logging import logging
import json
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -129,7 +128,7 @@ class TransportLayerClient(object):
# generated by the json_data_callback. # generated by the json_data_callback.
json_data = transaction.get_dict() json_data = transaction.get_dict()
code, response = yield self.client.put_json( response = yield self.client.put_json(
transaction.destination, transaction.destination,
path=PREFIX + "/send/%s/" % transaction.transaction_id, path=PREFIX + "/send/%s/" % transaction.transaction_id,
data=json_data, data=json_data,
@ -137,95 +136,86 @@ class TransportLayerClient(object):
) )
logger.debug( logger.debug(
"send_data dest=%s, txid=%s, got response: %d", "send_data dest=%s, txid=%s, got response: 200",
transaction.destination, transaction.transaction_id, code transaction.destination, transaction.transaction_id,
) )
defer.returnValue((code, response)) defer.returnValue(response)
@defer.inlineCallbacks @defer.inlineCallbacks
@log_function @log_function
def make_query(self, destination, query_type, args, retry_on_dns_fail): def make_query(self, destination, query_type, args, retry_on_dns_fail):
path = PREFIX + "/query/%s" % query_type path = PREFIX + "/query/%s" % query_type
response = yield self.client.get_json( content = yield self.client.get_json(
destination=destination, destination=destination,
path=path, path=path,
args=args, args=args,
retry_on_dns_fail=retry_on_dns_fail, retry_on_dns_fail=retry_on_dns_fail,
) )
defer.returnValue(response) defer.returnValue(content)
@defer.inlineCallbacks @defer.inlineCallbacks
@log_function @log_function
def make_join(self, destination, room_id, user_id, retry_on_dns_fail=True): def make_join(self, destination, room_id, user_id, retry_on_dns_fail=True):
path = PREFIX + "/make_join/%s/%s" % (room_id, user_id) path = PREFIX + "/make_join/%s/%s" % (room_id, user_id)
response = yield self.client.get_json( content = yield self.client.get_json(
destination=destination, destination=destination,
path=path, path=path,
retry_on_dns_fail=retry_on_dns_fail, retry_on_dns_fail=retry_on_dns_fail,
) )
defer.returnValue(response) defer.returnValue(content)
@defer.inlineCallbacks @defer.inlineCallbacks
@log_function @log_function
def send_join(self, destination, room_id, event_id, content): def send_join(self, destination, room_id, event_id, content):
path = PREFIX + "/send_join/%s/%s" % (room_id, event_id) path = PREFIX + "/send_join/%s/%s" % (room_id, event_id)
code, content = yield self.client.put_json( response = yield self.client.put_json(
destination=destination, destination=destination,
path=path, path=path,
data=content, data=content,
) )
if not 200 <= code < 300:
raise RuntimeError("Got %d from send_join", code)
defer.returnValue(json.loads(content))
@defer.inlineCallbacks
@log_function
def send_invite(self, destination, room_id, event_id, content):
path = PREFIX + "/invite/%s/%s" % (room_id, event_id)
code, content = yield self.client.put_json(
destination=destination,
path=path,
data=content,
)
if not 200 <= code < 300:
raise RuntimeError("Got %d from send_invite", code)
defer.returnValue(json.loads(content))
@defer.inlineCallbacks
@log_function
def get_event_auth(self, destination, room_id, event_id):
path = PREFIX + "/event_auth/%s/%s" % (room_id, event_id)
response = yield self.client.get_json(
destination=destination,
path=path,
)
defer.returnValue(response) defer.returnValue(response)
@defer.inlineCallbacks @defer.inlineCallbacks
@log_function @log_function
def send_query_auth(self, destination, room_id, event_id, content): def send_invite(self, destination, room_id, event_id, content):
path = PREFIX + "/query_auth/%s/%s" % (room_id, event_id) path = PREFIX + "/invite/%s/%s" % (room_id, event_id)
code, content = yield self.client.post_json( response = yield self.client.put_json(
destination=destination, destination=destination,
path=path, path=path,
data=content, data=content,
) )
if not 200 <= code < 300: defer.returnValue(response)
raise RuntimeError("Got %d from send_invite", code)
defer.returnValue(json.loads(content)) @defer.inlineCallbacks
@log_function
def get_event_auth(self, destination, room_id, event_id):
path = PREFIX + "/event_auth/%s/%s" % (room_id, event_id)
content = yield self.client.get_json(
destination=destination,
path=path,
)
defer.returnValue(content)
@defer.inlineCallbacks
@log_function
def send_query_auth(self, destination, room_id, event_id, content):
path = PREFIX + "/query_auth/%s/%s" % (room_id, event_id)
content = yield self.client.post_json(
destination=destination,
path=path,
data=content,
)
defer.returnValue(content)

View File

@ -113,7 +113,16 @@ class DirectoryHandler(BaseHandler):
) )
extra_servers = yield self.store.get_joined_hosts_for_room(room_id) extra_servers = yield self.store.get_joined_hosts_for_room(room_id)
servers = list(set(extra_servers) | set(servers)) servers = set(extra_servers) | set(servers)
# If this server is in the list of servers, return it first.
if self.server_name in servers:
servers = (
[self.server_name]
+ [s for s in servers if s != self.server_name]
)
else:
servers = list(servers)
defer.returnValue({ defer.returnValue({
"room_id": room_id, "room_id": room_id,

View File

@ -273,7 +273,7 @@ class FederationHandler(BaseHandler):
@log_function @log_function
@defer.inlineCallbacks @defer.inlineCallbacks
def do_invite_join(self, target_host, room_id, joinee, content, snapshot): def do_invite_join(self, target_hosts, room_id, joinee, content, snapshot):
""" Attempts to join the `joinee` to the room `room_id` via the """ Attempts to join the `joinee` to the room `room_id` via the
server `target_host`. server `target_host`.
@ -287,8 +287,8 @@ class FederationHandler(BaseHandler):
""" """
logger.debug("Joining %s to %s", joinee, room_id) logger.debug("Joining %s to %s", joinee, room_id)
pdu = yield self.replication_layer.make_join( origin, pdu = yield self.replication_layer.make_join(
target_host, target_hosts,
room_id, room_id,
joinee joinee
) )
@ -330,11 +330,17 @@ class FederationHandler(BaseHandler):
new_event = builder.build() new_event = builder.build()
# Try the host we successfully got a response to /make_join/
# request first.
target_hosts.remove(origin)
target_hosts.insert(0, origin)
ret = yield self.replication_layer.send_join( ret = yield self.replication_layer.send_join(
target_host, target_hosts,
new_event new_event
) )
origin = ret["origin"]
state = ret["state"] state = ret["state"]
auth_chain = ret["auth_chain"] auth_chain = ret["auth_chain"]
auth_chain.sort(key=lambda e: e.depth) auth_chain.sort(key=lambda e: e.depth)
@ -371,7 +377,7 @@ class FederationHandler(BaseHandler):
if e.event_id in auth_ids if e.event_id in auth_ids
} }
yield self._handle_new_event( yield self._handle_new_event(
target_host, e, auth_events=auth origin, e, auth_events=auth
) )
except: except:
logger.exception( logger.exception(
@ -391,7 +397,7 @@ class FederationHandler(BaseHandler):
if e.event_id in auth_ids if e.event_id in auth_ids
} }
yield self._handle_new_event( yield self._handle_new_event(
target_host, e, auth_events=auth origin, e, auth_events=auth
) )
except: except:
logger.exception( logger.exception(
@ -406,7 +412,7 @@ class FederationHandler(BaseHandler):
} }
yield self._handle_new_event( yield self._handle_new_event(
target_host, origin,
new_event, new_event,
state=state, state=state,
current_state=state, current_state=state,

View File

@ -389,8 +389,6 @@ class RoomMemberHandler(BaseHandler):
if not hosts: if not hosts:
raise SynapseError(404, "No known servers") raise SynapseError(404, "No known servers")
host = hosts[0]
# If event doesn't include a display name, add one. # If event doesn't include a display name, add one.
yield self.distributor.fire( yield self.distributor.fire(
"collect_presencelike_data", joinee, content "collect_presencelike_data", joinee, content
@ -407,12 +405,12 @@ class RoomMemberHandler(BaseHandler):
}) })
event, context = yield self._create_new_client_event(builder) event, context = yield self._create_new_client_event(builder)
yield self._do_join(event, context, room_host=host, do_auth=True) yield self._do_join(event, context, room_hosts=hosts, do_auth=True)
defer.returnValue({"room_id": room_id}) defer.returnValue({"room_id": room_id})
@defer.inlineCallbacks @defer.inlineCallbacks
def _do_join(self, event, context, room_host=None, do_auth=True): def _do_join(self, event, context, room_hosts=None, do_auth=True):
joinee = UserID.from_string(event.state_key) joinee = UserID.from_string(event.state_key)
# room_id = RoomID.from_string(event.room_id, self.hs) # room_id = RoomID.from_string(event.room_id, self.hs)
room_id = event.room_id room_id = event.room_id
@ -441,7 +439,7 @@ class RoomMemberHandler(BaseHandler):
if is_host_in_room: if is_host_in_room:
should_do_dance = False should_do_dance = False
elif room_host: # TODO: Shouldn't this be remote_room_host? elif room_hosts: # TODO: Shouldn't this be remote_room_host?
should_do_dance = True should_do_dance = True
else: else:
# TODO(markjh): get prev_state from snapshot # TODO(markjh): get prev_state from snapshot
@ -453,7 +451,7 @@ class RoomMemberHandler(BaseHandler):
inviter = UserID.from_string(prev_state.user_id) inviter = UserID.from_string(prev_state.user_id)
should_do_dance = not self.hs.is_mine(inviter) should_do_dance = not self.hs.is_mine(inviter)
room_host = inviter.domain room_hosts = [inviter.domain]
else: else:
# return the same error as join_room_alias does # return the same error as join_room_alias does
raise SynapseError(404, "No known servers") raise SynapseError(404, "No known servers")
@ -461,7 +459,7 @@ class RoomMemberHandler(BaseHandler):
if should_do_dance: if should_do_dance:
handler = self.hs.get_handlers().federation_handler handler = self.hs.get_handlers().federation_handler
yield handler.do_invite_join( yield handler.do_invite_join(
room_host, room_hosts,
room_id, room_id,
event.user_id, event.user_id,
event.get_dict()["content"], # FIXME To get a non-frozen dict event.get_dict()["content"], # FIXME To get a non-frozen dict

View File

@ -27,7 +27,9 @@ from synapse.util.logcontext import PreserveLoggingContext
from syutil.jsonutil import encode_canonical_json from syutil.jsonutil import encode_canonical_json
from synapse.api.errors import CodeMessageException, SynapseError, Codes from synapse.api.errors import (
SynapseError, Codes, HttpResponseException,
)
from syutil.crypto.jsonsign import sign_json from syutil.crypto.jsonsign import sign_json
@ -163,13 +165,12 @@ class MatrixFederationHttpClient(object):
) )
if 200 <= response.code < 300: if 200 <= response.code < 300:
# We need to update the transactions table to say it was sent?
pass pass
else: else:
# :'( # :'(
# Update transactions table? # Update transactions table?
raise CodeMessageException( raise HttpResponseException(
response.code, response.phrase response.code, response.phrase, response
) )
defer.returnValue(response) defer.returnValue(response)
@ -238,11 +239,20 @@ class MatrixFederationHttpClient(object):
headers_dict={"Content-Type": ["application/json"]}, headers_dict={"Content-Type": ["application/json"]},
) )
if 200 <= response.code < 300:
# We need to update the transactions table to say it was sent?
c_type = response.headers.getRawHeaders("Content-Type")
if "application/json" not in c_type:
raise RuntimeError(
"Content-Type not application/json"
)
logger.debug("Getting resp body") logger.debug("Getting resp body")
body = yield readBody(response) body = yield readBody(response)
logger.debug("Got resp body") logger.debug("Got resp body")
defer.returnValue((response.code, body)) defer.returnValue(json.loads(body))
@defer.inlineCallbacks @defer.inlineCallbacks
def post_json(self, destination, path, data={}): def post_json(self, destination, path, data={}):
@ -275,11 +285,20 @@ class MatrixFederationHttpClient(object):
headers_dict={"Content-Type": ["application/json"]}, headers_dict={"Content-Type": ["application/json"]},
) )
if 200 <= response.code < 300:
# We need to update the transactions table to say it was sent?
c_type = response.headers.getRawHeaders("Content-Type")
if "application/json" not in c_type:
raise RuntimeError(
"Content-Type not application/json"
)
logger.debug("Getting resp body") logger.debug("Getting resp body")
body = yield readBody(response) body = yield readBody(response)
logger.debug("Got resp body") logger.debug("Got resp body")
defer.returnValue((response.code, body)) defer.returnValue(json.loads(body))
@defer.inlineCallbacks @defer.inlineCallbacks
def get_json(self, destination, path, args={}, retry_on_dns_fail=True): def get_json(self, destination, path, args={}, retry_on_dns_fail=True):
@ -321,7 +340,18 @@ class MatrixFederationHttpClient(object):
retry_on_dns_fail=retry_on_dns_fail retry_on_dns_fail=retry_on_dns_fail
) )
if 200 <= response.code < 300:
# We need to update the transactions table to say it was sent?
c_type = response.headers.getRawHeaders("Content-Type")
if "application/json" not in c_type:
raise RuntimeError(
"Content-Type not application/json"
)
logger.debug("Getting resp body")
body = yield readBody(response) body = yield readBody(response)
logger.debug("Got resp body")
defer.returnValue(json.loads(body)) defer.returnValue(json.loads(body))