Merge branch 'erikj/fixup_devices_last_seen_query' of github.com:matrix-org/synapse into develop

This commit is contained in:
Erik Johnston 2019-10-01 10:17:24 +01:00
commit 1d349fb159
8 changed files with 60 additions and 11 deletions

View File

@ -2,7 +2,7 @@ Synapse 1.4.0rc1 (2019-09-26)
=============================
Note that this release includes significant changes around 3pid
verification. Administrators are reminded to review the [upgrade notes](UPGRADE.rst##upgrading-to-v140).
verification. Administrators are reminded to review the [upgrade notes](UPGRADE.rst#upgrading-to-v140).
Features
--------
@ -48,7 +48,7 @@ Features
- Let synctl accept a directory of config files. ([\#5904](https://github.com/matrix-org/synapse/issues/5904))
- Increase max display name size to 256. ([\#5906](https://github.com/matrix-org/synapse/issues/5906))
- Add admin API endpoint for getting whether or not a user is a server administrator. ([\#5914](https://github.com/matrix-org/synapse/issues/5914))
- Redact events in the database that have been redacted for a month. ([\#5934](https://github.com/matrix-org/synapse/issues/5934))
- Redact events in the database that have been redacted for a week. ([\#5934](https://github.com/matrix-org/synapse/issues/5934))
- New prometheus metrics:
- `synapse_federation_known_servers`: represents the total number of servers your server knows about (i.e. is in rooms with), including itself. Enable by setting `metrics_flags.known_servers` to True in the configuration.([\#5981](https://github.com/matrix-org/synapse/issues/5981))
- `synapse_build_info`: exposes the Python version, OS version, and Synapse version of the running server. ([\#6005](https://github.com/matrix-org/synapse/issues/6005))

1
changelog.d/6117.misc Normal file
View File

@ -0,0 +1 @@
Fix up sample config entry for `redaction_retention_period` option.

1
changelog.d/6135.bugfix Normal file
View File

@ -0,0 +1 @@
Fix bug in background update that adds last seen information to the `devices` table, and improve its performance on Postgres.

View File

@ -314,7 +314,7 @@ listeners:
#
# Defaults to `7d`. Set to `null` to disable.
#
redaction_retention_period: 7d
#redaction_retention_period: 28d
# How long to track users' last seen time and IPs in the database.
#

View File

@ -742,7 +742,7 @@ class ServerConfig(Config):
#
# Defaults to `7d`. Set to `null` to disable.
#
redaction_retention_period: 7d
#redaction_retention_period: 28d
# How long to track users' last seen time and IPs in the database.
#

View File

@ -463,14 +463,46 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
last_device_id = progress.get("last_device_id", "")
def _devices_last_seen_update_txn(txn):
# This consists of two queries:
#
# 1. The sub-query searches for the next N devices and joins
# against user_ips to find the max last_seen associated with
# that device.
# 2. The outer query then joins again against user_ips on
# user/device/last_seen. This *should* hopefully only
# return one row, but if it does return more than one then
# we'll just end up updating the same device row multiple
# times, which is fine.
if self.database_engine.supports_tuple_comparison:
where_clause = "(user_id, device_id) > (?, ?)"
where_args = [last_user_id, last_device_id]
else:
# We explicitly do a `user_id >= ? AND (...)` here to ensure
# that an index is used, as doing `user_id > ? OR (user_id = ? AND ...)`
# makes it hard for query optimiser to tell that it can use the
# index on user_id
where_clause = "user_id >= ? AND (user_id > ? OR device_id > ?)"
where_args = [last_user_id, last_user_id, last_device_id]
sql = """
SELECT u.last_seen, u.ip, u.user_agent, user_id, device_id FROM devices
INNER JOIN user_ips AS u USING (user_id, device_id)
WHERE user_id > ? OR (user_id = ? AND device_id > ?)
ORDER BY user_id ASC, device_id ASC
LIMIT ?
"""
txn.execute(sql, (last_user_id, last_user_id, last_device_id, batch_size))
SELECT
last_seen, ip, user_agent, user_id, device_id
FROM (
SELECT
user_id, device_id, MAX(u.last_seen) AS last_seen
FROM devices
INNER JOIN user_ips AS u USING (user_id, device_id)
WHERE %(where_clause)s
GROUP BY user_id, device_id
ORDER BY user_id ASC, device_id ASC
LIMIT ?
) c
INNER JOIN user_ips AS u USING (user_id, device_id, last_seen)
""" % {
"where_clause": where_clause
}
txn.execute(sql, where_args + [batch_size])
rows = txn.fetchall()
if not rows:

View File

@ -72,6 +72,13 @@ class PostgresEngine(object):
"""
return True
@property
def supports_tuple_comparison(self):
"""
Do we support comparing tuples, i.e. `(a, b) > (c, d)`?
"""
return True
def is_deadlock(self, error):
if isinstance(error, self.module.DatabaseError):
# https://www.postgresql.org/docs/current/static/errcodes-appendix.html

View File

@ -38,6 +38,14 @@ class Sqlite3Engine(object):
"""
return self.module.sqlite_version_info >= (3, 24, 0)
@property
def supports_tuple_comparison(self):
"""
Do we support comparing tuples, i.e. `(a, b) > (c, d)`? This requires
SQLite 3.15+.
"""
return self.module.sqlite_version_info >= (3, 15, 0)
def check_database(self, txn):
pass