From 367158a609d18b6dbd143f8bee0529e743d5b5a4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Sep 2019 14:16:16 +0100 Subject: [PATCH 1/7] Add wrap_as_background_process decorator. This does the same thing as `run_as_background_process` but means we don't need to create superfluous functions. --- synapse/metrics/background_process_metrics.py | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index edd6b42db..b24e2fab4 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -15,6 +15,8 @@ import logging import threading +from asyncio import iscoroutine +from functools import wraps import six @@ -197,7 +199,15 @@ def run_as_background_process(desc, func, *args, **kwargs): _background_processes.setdefault(desc, set()).add(proc) try: - yield func(*args, **kwargs) + # We ensureDeferred here to handle coroutines + result = func(*args, **kwargs) + + # We need this check because ensureDeferred doesn't like when + # func doesn't return a Deferred or coroutine. + if iscoroutine(result): + result = defer.ensureDeferred(result) + + return (yield result) except Exception: logger.exception("Background process '%s' threw an exception", desc) finally: @@ -208,3 +218,20 @@ def run_as_background_process(desc, func, *args, **kwargs): with PreserveLoggingContext(): return run() + + +def wrap_as_background_process(desc): + """Decorator that wraps a function that gets called as a background + process. + + Equivalent of calling the function with `run_as_background_process` + """ + + def wrap_as_background_process_inner(func): + @wraps(func) + def wrap_as_background_process_inner_2(*args, **kwargs): + return run_as_background_process(desc, func, *args, **kwargs) + + return wrap_as_background_process_inner_2 + + return wrap_as_background_process_inner From 2135c198d17b41297511a2fc3b39551d160069b2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Sep 2019 14:18:31 +0100 Subject: [PATCH 2/7] Add has_completed_background_update This allows checking if a specific background update has completed. --- synapse/storage/background_updates.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index e5f0668f0..3fc25cd82 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -140,7 +140,7 @@ class BackgroundUpdateStore(SQLBaseStore): "background_updates", keyvalues=None, retcol="1", - desc="check_background_updates", + desc="has_completed_background_updates", ) if not updates: self._all_done = True @@ -148,6 +148,29 @@ class BackgroundUpdateStore(SQLBaseStore): return False + async def has_completed_background_update(self, update_name): + """Check if the given background update has finished running. + + Returns: + Deferred[bool] + """ + + if self._all_done: + return True + + if update_name in self._background_update_queue: + return False + + update_exists = await self._simple_select_one_onecol( + "background_updates", + keyvalues={"update_name": update_name}, + retcol="1", + desc="has_completed_background_update", + allow_none=True, + ) + + return not update_exists + @defer.inlineCallbacks def do_next_background_update(self, desired_duration_ms): """Does some amount of work on the next queued background update From 242017db8b7b57be28a019ecbba1619d75d54889 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Sep 2019 15:20:40 +0100 Subject: [PATCH 3/7] Prune rows in user_ips older than configured period Defaults to pruning everything older than 28d. --- docs/sample_config.yaml | 6 ++++ synapse/config/server.py | 13 ++++++++ synapse/storage/client_ips.py | 62 ++++++++++++++++++++++++++++++----- 3 files changed, 73 insertions(+), 8 deletions(-) diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 61d9f09a9..cc6035c83 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -313,6 +313,12 @@ listeners: # redaction_retention_period: 7d +# How long to track users' last seen time and IPs in the database. +# +# Defaults to `28d`. Set to `null` to disable. +# +#user_ips_max_age: 14d + ## TLS ## diff --git a/synapse/config/server.py b/synapse/config/server.py index 7f8d31595..655e7487a 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -172,6 +172,13 @@ class ServerConfig(Config): else: self.redaction_retention_period = None + # How long to keep entries in the `users_ips` table. + user_ips_max_age = config.get("user_ips_max_age", "28d") + if user_ips_max_age is not None: + self.user_ips_max_age = self.parse_duration(user_ips_max_age) + else: + self.user_ips_max_age = None + # Options to disable HS self.hs_disabled = config.get("hs_disabled", False) self.hs_disabled_message = config.get("hs_disabled_message", "") @@ -735,6 +742,12 @@ class ServerConfig(Config): # Defaults to `7d`. Set to `null` to disable. # redaction_retention_period: 7d + + # How long to track users' last seen time and IPs in the database. + # + # Defaults to `28d`. Set to `null` to disable. + # + #user_ips_max_age: 14d """ % locals() ) diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index a4e6d9dbe..176c812b1 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -19,7 +19,7 @@ from six import iteritems from twisted.internet import defer -from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.util.caches import CACHE_SIZE_FACTOR from . import background_updates @@ -42,6 +42,8 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): super(ClientIpStore, self).__init__(db_conn, hs) + self.user_ips_max_age = hs.config.user_ips_max_age + self.register_background_index_update( "user_ips_device_index", index_name="user_ips_device_id", @@ -100,6 +102,9 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): "before", "shutdown", self._update_client_ips_batch ) + if self.user_ips_max_age: + self._clock.looping_call(self._prune_old_user_ips, 5 * 1000) + @defer.inlineCallbacks def _remove_user_ip_nonunique(self, progress, batch_size): def f(conn): @@ -319,20 +324,19 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): self._batch_row_update[key] = (user_agent, device_id, now) + @wrap_as_background_process("update_client_ips") def _update_client_ips_batch(self): # If the DB pool has already terminated, don't try updating if not self.hs.get_db_pool().running: return - def update(): - to_update = self._batch_row_update - self._batch_row_update = {} - return self.runInteraction( - "_update_client_ips_batch", self._update_client_ips_batch_txn, to_update - ) + to_update = self._batch_row_update + self._batch_row_update = {} - return run_as_background_process("update_client_ips", update) + return self.runInteraction( + "_update_client_ips_batch", self._update_client_ips_batch_txn, to_update + ) def _update_client_ips_batch_txn(self, txn, to_update): if "user_ips" in self._unsafe_to_upsert_tables or ( @@ -496,3 +500,45 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): yield self._end_background_update("devices_last_seen") return updated + + @wrap_as_background_process("prune_old_user_ips") + async def _prune_old_user_ips(self): + """Removes entries in user IPs older than the configured period. + """ + + if not self.user_ips_max_age: + # Nothing to do + return + + if not await self.has_completed_background_update("devices_last_seen"): + # Only start pruning if we have finished populating the devices + # last seen info. + return + + # We do a slightly funky SQL delete to ensure we don't try and delete + # too much at once (as the table may be very large from before we + # started pruning). + # + # This works by finding the max last_seen that is less than the given + # time, but has no more than N rows before it, deleting all rows with + # a lesser last_seen time. (We COALESCE so that the sub-SELECT always + # returns exactly one row). + sql = """ + DELETE FROM user_ips + WHERE last_seen <= ( + SELECT COALESCE(MAX(last_seen), -1) + FROM ( + SELECT last_seen FROM user_ips + WHERE last_seen <= ? + ORDER BY last_seen ASC + LIMIT 5000 + ) AS u + ) + """ + + timestamp = self.clock.time_msec() - self.user_ips_max_age + + def _prune_old_user_ips_txn(txn): + txn.execute(sql, (timestamp,)) + + await self.runInteraction("_prune_old_user_ips", _prune_old_user_ips_txn) From faac453f08046ddf00b39b90ba255f774b75c253 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Sep 2019 15:51:42 +0100 Subject: [PATCH 4/7] Test that pruning of old user IPs works --- tests/storage/test_client_ips.py | 71 ++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py index 76fe65b59..afac5dec7 100644 --- a/tests/storage/test_client_ips.py +++ b/tests/storage/test_client_ips.py @@ -279,6 +279,77 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): r, ) + def test_old_user_ips_pruned(self): + # First make sure we have completed all updates. + while not self.get_success(self.store.has_completed_background_updates()): + self.get_success(self.store.do_next_background_update(100), by=0.1) + + # Insert a user IP + user_id = "@user:id" + self.get_success( + self.store.insert_client_ip( + user_id, "access_token", "ip", "user_agent", "device_id" + ) + ) + + # Force persisting to disk + self.reactor.advance(200) + + # We should see that in the DB + result = self.get_success( + self.store._simple_select_list( + table="user_ips", + keyvalues={"user_id": user_id}, + retcols=["access_token", "ip", "user_agent", "device_id", "last_seen"], + desc="get_user_ip_and_agents", + ) + ) + + self.assertEqual( + result, + [ + { + "access_token": "access_token", + "ip": "ip", + "user_agent": "user_agent", + "device_id": "device_id", + "last_seen": 0, + } + ], + ) + + # Now advance by a couple of months + self.reactor.advance(60 * 24 * 60 * 60) + + # We should get no results. + result = self.get_success( + self.store._simple_select_list( + table="user_ips", + keyvalues={"user_id": user_id}, + retcols=["access_token", "ip", "user_agent", "device_id", "last_seen"], + desc="get_user_ip_and_agents", + ) + ) + + self.assertEqual(result, []) + + # But we should still get the correct values for the device + result = self.get_success( + self.store.get_last_client_ip_by_device(user_id, "device_id") + ) + + r = result[(user_id, "device_id")] + self.assertDictContainsSubset( + { + "user_id": user_id, + "device_id": "device_id", + "ip": "ip", + "user_agent": "user_agent", + "last_seen": 0, + }, + r, + ) + class ClientIpAuthTestCase(unittest.HomeserverTestCase): From f8b02c54207e5e99fcd95cb3e19a11423768e696 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Sep 2019 15:59:43 +0100 Subject: [PATCH 5/7] Newsfile --- changelog.d/6098.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6098.feature diff --git a/changelog.d/6098.feature b/changelog.d/6098.feature new file mode 100644 index 000000000..f3c693c06 --- /dev/null +++ b/changelog.d/6098.feature @@ -0,0 +1 @@ +Add support for pruning old rows in `user_ips` table. From 39b50ad42a8cf784e627959e9652589338121ccd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Sep 2019 17:22:33 +0100 Subject: [PATCH 6/7] Review comments --- docs/sample_config.yaml | 2 +- synapse/config/server.py | 2 +- synapse/storage/background_updates.py | 5 +---- synapse/storage/client_ips.py | 2 +- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index cc6035c83..7902d9ed6 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -315,7 +315,7 @@ redaction_retention_period: 7d # How long to track users' last seen time and IPs in the database. # -# Defaults to `28d`. Set to `null` to disable. +# Defaults to `28d`. Set to `null` to disable clearing out of old rows. # #user_ips_max_age: 14d diff --git a/synapse/config/server.py b/synapse/config/server.py index 655e7487a..f8b7b4bef 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -745,7 +745,7 @@ class ServerConfig(Config): # How long to track users' last seen time and IPs in the database. # - # Defaults to `28d`. Set to `null` to disable. + # Defaults to `28d`. Set to `null` to disable clearing out of old rows. # #user_ips_max_age: 14d """ diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 3fc25cd82..30788137a 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -148,11 +148,8 @@ class BackgroundUpdateStore(SQLBaseStore): return False - async def has_completed_background_update(self, update_name): + async def has_completed_background_update(self, update_name) -> bool: """Check if the given background update has finished running. - - Returns: - Deferred[bool] """ if self._all_done: diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 176c812b1..a4d40dfa1 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -506,7 +506,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): """Removes entries in user IPs older than the configured period. """ - if not self.user_ips_max_age: + if self.user_ips_max_age is None: # Nothing to do return From a96318127dc17ee102bcf90821d90b7e6079a85d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 25 Sep 2019 18:17:39 +0100 Subject: [PATCH 7/7] Update comments and docstring --- synapse/metrics/background_process_metrics.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index b24e2fab4..c53d2a0d4 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -175,7 +175,7 @@ def run_as_background_process(desc, func, *args, **kwargs): Args: desc (str): a description for this background process type - func: a function, which may return a Deferred + func: a function, which may return a Deferred or a coroutine args: positional args for func kwargs: keyword args for func @@ -199,11 +199,13 @@ def run_as_background_process(desc, func, *args, **kwargs): _background_processes.setdefault(desc, set()).add(proc) try: - # We ensureDeferred here to handle coroutines result = func(*args, **kwargs) - # We need this check because ensureDeferred doesn't like when - # func doesn't return a Deferred or coroutine. + # We probably don't have an ensureDeferred in our call stack to handle + # coroutine results, so we need to ensureDeferred here. + # + # But we need this check because ensureDeferred doesn't like being + # called on immediate values (as opposed to Deferreds or coroutines). if iscoroutine(result): result = defer.ensureDeferred(result)