Merge pull request #3853 from matrix-org/erikj/log_outbound_each_time

Log outbound requests when we retry
This commit is contained in:
Erik Johnston 2018-09-12 16:55:40 +01:00 committed by GitHub
commit fa0d464fa4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 73 additions and 73 deletions

1
changelog.d/3853.misc Normal file
View File

@ -0,0 +1 @@
Log when we retry outbound requests

View File

@ -177,11 +177,6 @@ class MatrixFederationHttpClient(object):
txn_id = "%s-O-%s" % (method, self._next_id) txn_id = "%s-O-%s" % (method, self._next_id)
self._next_id = (self._next_id + 1) % (MAXINT - 1) self._next_id = (self._next_id + 1) % (MAXINT - 1)
outbound_logger.info(
"{%s} [%s] Sending request: %s %s",
txn_id, destination, method, url
)
# XXX: Would be much nicer to retry only at the transaction-layer # XXX: Would be much nicer to retry only at the transaction-layer
# (once we have reliable transactions in place) # (once we have reliable transactions in place)
if long_retries: if long_retries:
@ -194,85 +189,89 @@ class MatrixFederationHttpClient(object):
).decode('ascii') ).decode('ascii')
log_result = None log_result = None
try: while True:
while True: try:
try: if json_callback:
if json_callback: json = json_callback()
json = json_callback()
if json: if json:
data = encode_canonical_json(json) data = encode_canonical_json(json)
headers_dict["Content-Type"] = ["application/json"] headers_dict["Content-Type"] = ["application/json"]
self.sign_request( self.sign_request(
destination, method, http_url, headers_dict, json destination, method, http_url, headers_dict, json
)
else:
data = None
self.sign_request(destination, method, http_url, headers_dict)
request_deferred = treq.request(
method,
url,
headers=Headers(headers_dict),
data=data,
agent=self.agent,
)
add_timeout_to_deferred(
request_deferred,
timeout / 1000. if timeout else 60,
self.hs.get_reactor(),
cancelled_to_request_timed_out_error,
)
response = yield make_deferred_yieldable(
request_deferred,
) )
else:
data = None
self.sign_request(destination, method, http_url, headers_dict)
log_result = "%d %s" % (response.code, response.phrase,) outbound_logger.info(
break "{%s} [%s] Sending request: %s %s",
except Exception as e: txn_id, destination, method, url
if not retry_on_dns_fail and isinstance(e, DNSLookupError): )
logger.warn(
"DNS Lookup failed to %s with %s",
destination,
e
)
log_result = "DNS Lookup failed to %s with %s" % (
destination, e
)
raise
request_deferred = treq.request(
method,
url,
headers=Headers(headers_dict),
data=data,
agent=self.agent,
)
add_timeout_to_deferred(
request_deferred,
timeout / 1000. if timeout else 60,
self.hs.get_reactor(),
cancelled_to_request_timed_out_error,
)
response = yield make_deferred_yieldable(
request_deferred,
)
log_result = "%d %s" % (response.code, response.phrase,)
break
except Exception as e:
if not retry_on_dns_fail and isinstance(e, DNSLookupError):
logger.warn( logger.warn(
"{%s} Sending request failed to %s: %s %s: %s", "DNS Lookup failed to %s with %s",
txn_id,
destination, destination,
method, e
url,
_flatten_response_never_received(e),
) )
log_result = "DNS Lookup failed to %s with %s" % (
destination, e
)
raise
log_result = _flatten_response_never_received(e) logger.warn(
"{%s} Sending request failed to %s: %s %s: %s",
txn_id,
destination,
method,
url,
_flatten_response_never_received(e),
)
if retries_left and not timeout: log_result = _flatten_response_never_received(e)
if long_retries:
delay = 4 ** (MAX_LONG_RETRIES + 1 - retries_left)
delay = min(delay, 60)
delay *= random.uniform(0.8, 1.4)
else:
delay = 0.5 * 2 ** (MAX_SHORT_RETRIES - retries_left)
delay = min(delay, 2)
delay *= random.uniform(0.8, 1.4)
yield self.clock.sleep(delay) if retries_left and not timeout:
retries_left -= 1 if long_retries:
delay = 4 ** (MAX_LONG_RETRIES + 1 - retries_left)
delay = min(delay, 60)
delay *= random.uniform(0.8, 1.4)
else: else:
raise delay = 0.5 * 2 ** (MAX_SHORT_RETRIES - retries_left)
finally: delay = min(delay, 2)
outbound_logger.info( delay *= random.uniform(0.8, 1.4)
"{%s} [%s] Result: %s",
txn_id, yield self.clock.sleep(delay)
destination, retries_left -= 1
log_result, else:
) raise
finally:
outbound_logger.info(
"{%s} [%s] Result: %s",
txn_id,
destination,
log_result,
)
if 200 <= response.code < 300: if 200 <= response.code < 300:
pass pass