Add 'failure_ts' column to 'destinations' table (#6016)

Track the time that a server started failing at, for general analysis purposes.
This commit is contained in:
Richard van der Hoff 2019-09-17 11:41:54 +01:00 committed by GitHub
parent 850dcfd2d3
commit 1e19ce00bf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 195 additions and 12 deletions

View file

@ -0,0 +1,25 @@
/* Copyright 2019 The Matrix.org Foundation C.I.C
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Record the timestamp when a given server started failing
*/
ALTER TABLE destinations ADD failure_ts BIGINT;
/* as a rough approximation, we assume that the server started failing at
* retry_interval before the last retry
*/
UPDATE destinations SET failure_ts = retry_last_ts - retry_interval
WHERE retry_last_ts > 0;

View file

@ -165,7 +165,7 @@ class TransactionStore(SQLBaseStore):
txn,
table="destinations",
keyvalues={"destination": destination},
retcols=("destination", "retry_last_ts", "retry_interval"),
retcols=("destination", "failure_ts", "retry_last_ts", "retry_interval"),
allow_none=True,
)
@ -174,12 +174,15 @@ class TransactionStore(SQLBaseStore):
else:
return None
def set_destination_retry_timings(self, destination, retry_last_ts, retry_interval):
def set_destination_retry_timings(
self, destination, failure_ts, retry_last_ts, retry_interval
):
"""Sets the current retry timings for a given destination.
Both timings should be zero if retrying is no longer occuring.
Args:
destination (str)
failure_ts (int|None) - when the server started failing (ms since epoch)
retry_last_ts (int) - time of last retry attempt in unix epoch ms
retry_interval (int) - how long until next retry in ms
"""
@ -189,12 +192,13 @@ class TransactionStore(SQLBaseStore):
"set_destination_retry_timings",
self._set_destination_retry_timings,
destination,
failure_ts,
retry_last_ts,
retry_interval,
)
def _set_destination_retry_timings(
self, txn, destination, retry_last_ts, retry_interval
self, txn, destination, failure_ts, retry_last_ts, retry_interval
):
if self.database_engine.can_native_upsert:
@ -202,9 +206,12 @@ class TransactionStore(SQLBaseStore):
# resetting it) or greater than the existing retry interval.
sql = """
INSERT INTO destinations (destination, retry_last_ts, retry_interval)
VALUES (?, ?, ?)
INSERT INTO destinations (
destination, failure_ts, retry_last_ts, retry_interval
)
VALUES (?, ?, ?, ?)
ON CONFLICT (destination) DO UPDATE SET
failure_ts = EXCLUDED.failure_ts,
retry_last_ts = EXCLUDED.retry_last_ts,
retry_interval = EXCLUDED.retry_interval
WHERE
@ -212,7 +219,7 @@ class TransactionStore(SQLBaseStore):
OR destinations.retry_interval < EXCLUDED.retry_interval
"""
txn.execute(sql, (destination, retry_last_ts, retry_interval))
txn.execute(sql, (destination, failure_ts, retry_last_ts, retry_interval))
return
@ -225,7 +232,7 @@ class TransactionStore(SQLBaseStore):
txn,
table="destinations",
keyvalues={"destination": destination},
retcols=("retry_last_ts", "retry_interval"),
retcols=("failure_ts", "retry_last_ts", "retry_interval"),
allow_none=True,
)
@ -235,6 +242,7 @@ class TransactionStore(SQLBaseStore):
table="destinations",
values={
"destination": destination,
"failure_ts": failure_ts,
"retry_last_ts": retry_last_ts,
"retry_interval": retry_interval,
},
@ -245,6 +253,7 @@ class TransactionStore(SQLBaseStore):
"destinations",
keyvalues={"destination": destination},
updatevalues={
"failure_ts": failure_ts,
"retry_last_ts": retry_last_ts,
"retry_interval": retry_interval,
},