Fix could not serialize access errors for claim_e2e_one_time_keys (#10504)

This commit is contained in:
Erik Johnston 2021-08-04 13:09:04 +01:00 committed by GitHub
parent c2000ab35b
commit 11540be55e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 128 additions and 63 deletions

1
changelog.d/10504.misc Normal file
View File

@ -0,0 +1 @@
Reduce errors in PostgreSQL logs due to concurrent serialization errors.

View File

@ -755,81 +755,145 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore):
""" """
@trace @trace
def _claim_e2e_one_time_keys(txn): def _claim_e2e_one_time_key_simple(
sql = ( txn, user_id: str, device_id: str, algorithm: str
"SELECT key_id, key_json FROM e2e_one_time_keys_json" ) -> Optional[Tuple[str, str]]:
" WHERE user_id = ? AND device_id = ? AND algorithm = ?" """Claim OTK for device for DBs that don't support RETURNING.
" LIMIT 1"
) Returns:
fallback_sql = ( A tuple of key name (algorithm + key ID) and key JSON, if an
"SELECT key_id, key_json, used FROM e2e_fallback_keys_json" OTK was found.
" WHERE user_id = ? AND device_id = ? AND algorithm = ?" """
" LIMIT 1"
) sql = """
result = {} SELECT key_id, key_json FROM e2e_one_time_keys_json
delete = [] WHERE user_id = ? AND device_id = ? AND algorithm = ?
used_fallbacks = [] LIMIT 1
for user_id, device_id, algorithm in query_list: """
user_result = result.setdefault(user_id, {})
device_result = user_result.setdefault(device_id, {})
txn.execute(sql, (user_id, device_id, algorithm)) txn.execute(sql, (user_id, device_id, algorithm))
otk_row = txn.fetchone() otk_row = txn.fetchone()
if otk_row is not None: if otk_row is None:
key_id, key_json = otk_row return None
device_result[algorithm + ":" + key_id] = key_json
delete.append((user_id, device_id, algorithm, key_id))
else:
# no one-time key available, so see if there's a fallback
# key
txn.execute(fallback_sql, (user_id, device_id, algorithm))
fallback_row = txn.fetchone()
if fallback_row is not None:
key_id, key_json, used = fallback_row
device_result[algorithm + ":" + key_id] = key_json
if not used:
used_fallbacks.append(
(user_id, device_id, algorithm, key_id)
)
# drop any one-time keys that were claimed key_id, key_json = otk_row
sql = (
"DELETE FROM e2e_one_time_keys_json" self.db_pool.simple_delete_one_txn(
" WHERE user_id = ? AND device_id = ? AND algorithm = ?"
" AND key_id = ?"
)
for user_id, device_id, algorithm, key_id in delete:
log_kv(
{
"message": "Executing claim e2e_one_time_keys transaction on database."
}
)
txn.execute(sql, (user_id, device_id, algorithm, key_id))
log_kv({"message": "finished executing and invalidating cache"})
self._invalidate_cache_and_stream(
txn, self.count_e2e_one_time_keys, (user_id, device_id)
)
# mark fallback keys as used
for user_id, device_id, algorithm, key_id in used_fallbacks:
self.db_pool.simple_update_txn(
txn, txn,
"e2e_fallback_keys_json", table="e2e_one_time_keys_json",
{ keyvalues={
"user_id": user_id, "user_id": user_id,
"device_id": device_id, "device_id": device_id,
"algorithm": algorithm, "algorithm": algorithm,
"key_id": key_id, "key_id": key_id,
}, },
{"used": True},
) )
self._invalidate_cache_and_stream( self._invalidate_cache_and_stream(
txn, self.get_e2e_unused_fallback_key_types, (user_id, device_id) txn, self.count_e2e_one_time_keys, (user_id, device_id)
) )
return result return f"{algorithm}:{key_id}", key_json
return await self.db_pool.runInteraction( @trace
"claim_e2e_one_time_keys", _claim_e2e_one_time_keys def _claim_e2e_one_time_key_returning(
txn, user_id: str, device_id: str, algorithm: str
) -> Optional[Tuple[str, str]]:
"""Claim OTK for device for DBs that support RETURNING.
Returns:
A tuple of key name (algorithm + key ID) and key JSON, if an
OTK was found.
"""
# We can use RETURNING to do the fetch and DELETE in once step.
sql = """
DELETE FROM e2e_one_time_keys_json
WHERE user_id = ? AND device_id = ? AND algorithm = ?
AND key_id IN (
SELECT key_id FROM e2e_one_time_keys_json
WHERE user_id = ? AND device_id = ? AND algorithm = ?
LIMIT 1
) )
RETURNING key_id, key_json
"""
txn.execute(
sql, (user_id, device_id, algorithm, user_id, device_id, algorithm)
)
otk_row = txn.fetchone()
if otk_row is None:
return None
key_id, key_json = otk_row
return f"{algorithm}:{key_id}", key_json
results = {}
for user_id, device_id, algorithm in query_list:
if self.database_engine.supports_returning:
# If we support RETURNING clause we can use a single query that
# allows us to use autocommit mode.
_claim_e2e_one_time_key = _claim_e2e_one_time_key_returning
db_autocommit = True
else:
_claim_e2e_one_time_key = _claim_e2e_one_time_key_simple
db_autocommit = False
row = await self.db_pool.runInteraction(
"claim_e2e_one_time_keys",
_claim_e2e_one_time_key,
user_id,
device_id,
algorithm,
db_autocommit=db_autocommit,
)
if row:
device_results = results.setdefault(user_id, {}).setdefault(
device_id, {}
)
device_results[row[0]] = row[1]
continue
# No one-time key available, so see if there's a fallback
# key
row = await self.db_pool.simple_select_one(
table="e2e_fallback_keys_json",
keyvalues={
"user_id": user_id,
"device_id": device_id,
"algorithm": algorithm,
},
retcols=("key_id", "key_json", "used"),
desc="_get_fallback_key",
allow_none=True,
)
if row is None:
continue
key_id = row["key_id"]
key_json = row["key_json"]
used = row["used"]
# Mark fallback key as used if not already.
if not used:
await self.db_pool.simple_update_one(
table="e2e_fallback_keys_json",
keyvalues={
"user_id": user_id,
"device_id": device_id,
"algorithm": algorithm,
"key_id": key_id,
},
updatevalues={"used": True},
desc="_get_fallback_key_set_used",
)
await self.invalidate_cache_and_stream(
"get_e2e_unused_fallback_key_types", (user_id, device_id)
)
device_results = results.setdefault(user_id, {}).setdefault(device_id, {})
device_results[f"{algorithm}:{key_id}"] = key_json
return results
class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore):