diff --git a/CHANGES.rst b/CHANGES.rst index 068ed9f2f..5bea8e82d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,9 @@ +Changes in synapse v0.11.0-r1 (2015-11-18) +========================================== + +* Retry and fail federation requests more aggressively for requests that block + client side requests (PR #384) + Changes in synapse v0.11.0 (2015-11-17) ======================================= diff --git a/synapse/__init__.py b/synapse/__init__.py index f0eac97ba..5a83f5f3e 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.11.0" +__version__ = "0.11.0-r1" diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 3d59e1c65..0e0cb7ebc 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -136,6 +136,7 @@ class TransportLayerClient(object): path=PREFIX + "/send/%s/" % transaction.transaction_id, data=json_data, json_data_callback=json_data_callback, + long_retries=True, ) logger.debug( diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index ca9591556..614c06a6d 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -56,7 +56,8 @@ incoming_responses_counter = metrics.register_counter( ) -MAX_RETRIES = 10 +MAX_LONG_RETRIES = 10 +MAX_SHORT_RETRIES = 3 class MatrixFederationEndpointFactory(object): @@ -103,7 +104,7 @@ class MatrixFederationHttpClient(object): def _create_request(self, destination, method, path_bytes, body_callback, headers_dict={}, param_bytes=b"", query_bytes=b"", retry_on_dns_fail=True, - timeout=None): + timeout=None, long_retries=False): """ Creates and sends a request to the given url """ headers_dict[b"User-Agent"] = [self.version_string] @@ -123,7 +124,10 @@ class MatrixFederationHttpClient(object): # XXX: Would be much nicer to retry only at the transaction-layer # (once we have reliable transactions in place) - retries_left = MAX_RETRIES + if long_retries: + retries_left = MAX_LONG_RETRIES + else: + retries_left = MAX_SHORT_RETRIES http_url_bytes = urlparse.urlunparse( ("", "", path_bytes, param_bytes, query_bytes, "") @@ -184,9 +188,15 @@ class MatrixFederationHttpClient(object): ) if retries_left and not timeout: - delay = 4 ** (MAX_RETRIES + 1 - retries_left) - delay = max(delay, 60) - delay *= random.uniform(0.8, 1.4) + if long_retries: + delay = 4 ** (MAX_LONG_RETRIES + 1 - retries_left) + delay = max(delay, 60) + delay *= random.uniform(0.8, 1.4) + else: + delay = 0.5 * 2 ** (MAX_SHORT_RETRIES - retries_left) + delay = max(delay, 2) + delay *= random.uniform(0.8, 1.4) + yield sleep(delay) retries_left -= 1 else: @@ -237,7 +247,8 @@ class MatrixFederationHttpClient(object): headers_dict[b"Authorization"] = auth_headers @defer.inlineCallbacks - def put_json(self, destination, path, data={}, json_data_callback=None): + def put_json(self, destination, path, data={}, json_data_callback=None, + long_retries=False): """ Sends the specifed json data using PUT Args: @@ -248,6 +259,8 @@ class MatrixFederationHttpClient(object): the request body. This will be encoded as JSON. json_data_callback (callable): A callable returning the dict to use as the request body. + long_retries (bool): A boolean that indicates whether we should + retry for a short or long time. Returns: Deferred: Succeeds when we get a 2xx HTTP response. The result @@ -273,6 +286,7 @@ class MatrixFederationHttpClient(object): path.encode("ascii"), body_callback=body_callback, headers_dict={"Content-Type": ["application/json"]}, + long_retries=long_retries, ) if 200 <= response.code < 300: @@ -491,6 +505,9 @@ class _JsonProducer(object): def stopProducing(self): pass + def resumeProducing(self): + pass + def _flatten_response_never_received(e): if hasattr(e, "reasons"): diff --git a/tests/federation/test_federation.py b/tests/federation/test_federation.py index a4ef60b91..96570f907 100644 --- a/tests/federation/test_federation.py +++ b/tests/federation/test_federation.py @@ -197,6 +197,7 @@ class FederationTestCase(unittest.TestCase): 'pdu_failures': [], }, json_data_callback=ANY, + long_retries=True, ) @defer.inlineCallbacks @@ -228,6 +229,7 @@ class FederationTestCase(unittest.TestCase): 'pdu_failures': [], }, json_data_callback=ANY, + long_retries=True, ) @defer.inlineCallbacks diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py index 10d4482cc..1172ceae8 100644 --- a/tests/handlers/test_presence.py +++ b/tests/handlers/test_presence.py @@ -409,6 +409,7 @@ class PresenceInvitesTestCase(PresenceTestCase): } ), json_data_callback=ANY, + long_retries=True, ), defer.succeed((200, "OK")) ) @@ -443,6 +444,7 @@ class PresenceInvitesTestCase(PresenceTestCase): } ), json_data_callback=ANY, + long_retries=True, ), defer.succeed((200, "OK")) ) @@ -483,6 +485,7 @@ class PresenceInvitesTestCase(PresenceTestCase): } ), json_data_callback=ANY, + long_retries=True, ), defer.succeed((200, "OK")) ) @@ -827,6 +830,7 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase): } ), json_data_callback=ANY, + long_retries=True, ), defer.succeed((200, "OK")) ) @@ -843,6 +847,7 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase): } ), json_data_callback=ANY, + long_retries=True, ), defer.succeed((200, "OK")) ) @@ -1033,6 +1038,7 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase): } ), json_data_callback=ANY, + long_retries=True, ), defer.succeed((200, "OK")) ) @@ -1048,6 +1054,7 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase): } ), json_data_callback=ANY, + long_retries=True, ), defer.succeed((200, "OK")) ) @@ -1078,6 +1085,7 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase): } ), json_data_callback=ANY, + long_retries=True, ), defer.succeed((200, "OK")) ) @@ -1184,6 +1192,7 @@ class PresencePollingTestCase(MockedDatastorePresenceTestCase): }, ), json_data_callback=ANY, + long_retries=True, ), defer.succeed((200, "OK")) ) @@ -1200,6 +1209,7 @@ class PresencePollingTestCase(MockedDatastorePresenceTestCase): }, ), json_data_callback=ANY, + long_retries=True, ), defer.succeed((200, "OK")) ) @@ -1232,6 +1242,7 @@ class PresencePollingTestCase(MockedDatastorePresenceTestCase): }, ), json_data_callback=ANY, + long_retries=True, ), defer.succeed((200, "OK")) ) @@ -1265,6 +1276,7 @@ class PresencePollingTestCase(MockedDatastorePresenceTestCase): }, ), json_data_callback=ANY, + long_retries=True, ), defer.succeed((200, "OK")) ) @@ -1297,6 +1309,7 @@ class PresencePollingTestCase(MockedDatastorePresenceTestCase): }, ), json_data_callback=ANY, + long_retries=True, ), defer.succeed((200, "OK")) ) diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py index 2d7ba4356..5b1feeb45 100644 --- a/tests/handlers/test_typing.py +++ b/tests/handlers/test_typing.py @@ -218,6 +218,7 @@ class TypingNotificationsTestCase(unittest.TestCase): } ), json_data_callback=ANY, + long_retries=True, ), defer.succeed((200, "OK")) ) @@ -284,6 +285,7 @@ class TypingNotificationsTestCase(unittest.TestCase): } ), json_data_callback=ANY, + long_retries=True, ), defer.succeed((200, "OK")) )