From 9187e0762f0b4f028d15fac4502e458f513d6642 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 28 Mar 2018 10:02:32 +0100 Subject: [PATCH 01/12] count_daily_users failed if db was sqlite due to type failure - presumably this prevcented all sqlite homeservers reporting home --- synapse/storage/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index de00cae44..b97e5e5ff 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -260,7 +260,7 @@ class DataStore(RoomMemberStore, RoomStore, ) u """ - txn.execute(sql, (yesterday,)) + txn.execute(sql, (str(yesterday),)) count, = txn.fetchone() return count From a32d2548d986f7075e8310184ce0b70c69513a02 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 28 Mar 2018 10:39:13 +0100 Subject: [PATCH 02/12] query and call for r30 stats --- synapse/app/homeserver.py | 2 ++ synapse/storage/__init__.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index c00afbba2..8bce9f1ac 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -425,6 +425,8 @@ def run(hs): stats["daily_active_rooms"] = yield hs.get_datastore().count_daily_active_rooms() stats["daily_messages"] = yield hs.get_datastore().count_daily_messages() + stats["r30_users"] = yield hs.get_datastore().count_r30_users() + daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages() stats["daily_sent_messages"] = daily_sent_messages diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index b97e5e5ff..10f99c3cd 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -267,6 +267,42 @@ class DataStore(RoomMemberStore, RoomStore, ret = yield self.runInteraction("count_users", _count_users) defer.returnValue(ret) + @defer.inlineCallbacks + def count_r30_users(self): + """ + Counts the number of 30 day retained users, defined as:- + * Users who have created their accounts more than 30 days + * Where last seen at most 30 days ago + * Where account creation and last_seen are > 30 days + """ + def _count_r30_users(txn): + thirty_days_in_secs = 86400 * 30 + now = int(self._clock.time_msec()) + thirty_days_ago_in_secs = now - thirty_days_in_secs + + sql = """ + SELECT COALESCE(count(*), 0) FROM ( + SELECT users.name, users.creation_ts * 1000, MAX(user_ips.last_seen) + FROM users, user_ips + WHERE users.name = user_ips.user_id + AND appservice_id is NULL + AND users.creation_ts < ? + AND user_ips.last_seen/1000 > ? + AND (user_ips.last_seen/1000) - users.creation_ts > ? + GROUP BY users.name, users.creation_ts + ) u + """ + + txn.execute(sql, (thirty_days_ago_in_secs, + thirty_days_ago_in_secs, + thirty_days_in_secs)) + + count, = txn.fetchone() + return count + + ret = yield self.runInteraction("count_r30_users", _count_r30_users) + defer.returnValue(ret) + def get_users(self): """Function to reterive a list of users in users table. From 0f890f477eb2ed03b8fd48710d1960210f44a334 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 28 Mar 2018 11:49:57 +0100 Subject: [PATCH 03/12] No need to cast in count_daily_users --- synapse/storage/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 10f99c3cd..ba43b2d8e 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -250,7 +250,7 @@ class DataStore(RoomMemberStore, RoomStore, Counts the number of users who used this homeserver in the last 24 hours. """ def _count_users(txn): - yesterday = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24), + yesterday = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24) sql = """ SELECT COALESCE(count(*), 0) FROM ( @@ -260,7 +260,7 @@ class DataStore(RoomMemberStore, RoomStore, ) u """ - txn.execute(sql, (str(yesterday),)) + txn.execute(sql, (yesterday,)) count, = txn.fetchone() return count From 788e69098c93f2433ef907015666c624bb39318f Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 28 Mar 2018 12:03:13 +0100 Subject: [PATCH 04/12] Add user_ips last seen index --- synapse/storage/client_ips.py | 7 +++++++ .../delta/48/add_user_ips_last_seen_index.sql | 17 +++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 synapse/storage/schema/delta/48/add_user_ips_last_seen_index.sql diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index a03d1d610..7b44dae0f 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -48,6 +48,13 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): columns=["user_id", "device_id", "last_seen"], ) + self.register_background_index_update( + "user_ips_last_seen_index", + index_name="user_ips_last_seen", + table="user_ips", + columns=["user_id", "last_seen"], + ) + # (user_id, access_token, ip) -> (user_agent, device_id, last_seen) self._batch_row_update = {} diff --git a/synapse/storage/schema/delta/48/add_user_ips_last_seen_index.sql b/synapse/storage/schema/delta/48/add_user_ips_last_seen_index.sql new file mode 100644 index 000000000..9248b0b24 --- /dev/null +++ b/synapse/storage/schema/delta/48/add_user_ips_last_seen_index.sql @@ -0,0 +1,17 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT into background_updates (update_name, progress_json) + VALUES ('user_ips_last_seen_index', '{}'); From 792d340572026becf48fe73421f0b73cf575fe46 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 28 Mar 2018 12:25:02 +0100 Subject: [PATCH 05/12] rename stat to future proof --- synapse/app/homeserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 8bce9f1ac..286f4dcf7 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -425,7 +425,7 @@ def run(hs): stats["daily_active_rooms"] = yield hs.get_datastore().count_daily_active_rooms() stats["daily_messages"] = yield hs.get_datastore().count_daily_messages() - stats["r30_users"] = yield hs.get_datastore().count_r30_users() + stats["r30_users_all"] = yield hs.get_datastore().count_r30_users() daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages() stats["daily_sent_messages"] = daily_sent_messages From 86932be2cb1837688d154ff78fb6418f78483133 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 28 Mar 2018 14:36:53 +0100 Subject: [PATCH 06/12] Support multi client R30 for psql --- synapse/app/homeserver.py | 4 +++- synapse/storage/__init__.py | 34 +++++++++++++++++++++++++++------- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 286f4dcf7..35e2b00f1 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -425,7 +425,9 @@ def run(hs): stats["daily_active_rooms"] = yield hs.get_datastore().count_daily_active_rooms() stats["daily_messages"] = yield hs.get_datastore().count_daily_messages() - stats["r30_users_all"] = yield hs.get_datastore().count_r30_users() + r30_results = yield hs.get_datastore().count_r30_users() + for name, count in r30_results.items(): + stats["r30_users_" + name] = count daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages() stats["daily_sent_messages"] = daily_sent_messages diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index ba43b2d8e..b651973c7 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -280,6 +280,15 @@ class DataStore(RoomMemberStore, RoomStore, now = int(self._clock.time_msec()) thirty_days_ago_in_secs = now - thirty_days_in_secs + # Are these filters sufficiently robust? + filters = { + "ALL": "", + "IOS": "^(Vector|Riot|Riot\.im)\/.* iOS", + "ANDROID": "^(Dalvik|Riot|Riot\.im)\/.* Android", + "ELECTRON": "Electron", + "WEB": "(Gecko|Mozilla)", + } + sql = """ SELECT COALESCE(count(*), 0) FROM ( SELECT users.name, users.creation_ts * 1000, MAX(user_ips.last_seen) @@ -289,16 +298,27 @@ class DataStore(RoomMemberStore, RoomStore, AND users.creation_ts < ? AND user_ips.last_seen/1000 > ? AND (user_ips.last_seen/1000) - users.creation_ts > ? - GROUP BY users.name, users.creation_ts - ) u """ - txn.execute(sql, (thirty_days_ago_in_secs, - thirty_days_ago_in_secs, - thirty_days_in_secs)) + if isinstance(self.database_engine, PostgresEngine): + sql = sql + "AND user_ips.user_agent ~ ? " + sql = sql + "GROUP BY users.name, users.creation_ts ) u" - count, = txn.fetchone() - return count + results = {} + if isinstance(self.database_engine, PostgresEngine): + for filter_name, user_agent_filter in filters.items(): + txn.execute(sql, (thirty_days_ago_in_secs, + thirty_days_ago_in_secs, + thirty_days_in_secs, + user_agent_filter)) + results[filter_name], = txn.fetchone() + + else: + txn.execute(sql, (thirty_days_ago_in_secs, + thirty_days_ago_in_secs, + thirty_days_in_secs)) + results["ALL"], = txn.fetchone() + return results ret = yield self.runInteraction("count_r30_users", _count_r30_users) defer.returnValue(ret) From 4262aba17b643bc82c5cce92298dac0a27b2727c Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 28 Mar 2018 14:40:03 +0100 Subject: [PATCH 07/12] bump schema version --- synapse/storage/prepare_database.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index c845a0cec..68675e15d 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 47 +SCHEMA_VERSION = 48 dir_path = os.path.abspath(os.path.dirname(__file__)) From 241e4e86873d5880f564791e3768247fa55c3fa8 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 28 Mar 2018 16:25:53 +0100 Subject: [PATCH 08/12] remove twisted deferral cruft --- synapse/storage/__init__.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index b651973c7..b2b85e266 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -244,7 +244,6 @@ class DataStore(RoomMemberStore, RoomStore, return [UserPresenceState(**row) for row in rows] - @defer.inlineCallbacks def count_daily_users(self): """ Counts the number of users who used this homeserver in the last 24 hours. @@ -264,10 +263,9 @@ class DataStore(RoomMemberStore, RoomStore, count, = txn.fetchone() return count - ret = yield self.runInteraction("count_users", _count_users) - defer.returnValue(ret) + return self.runInteraction("count_users", _count_users) + - @defer.inlineCallbacks def count_r30_users(self): """ Counts the number of 30 day retained users, defined as:- @@ -320,8 +318,7 @@ class DataStore(RoomMemberStore, RoomStore, results["ALL"], = txn.fetchone() return results - ret = yield self.runInteraction("count_r30_users", _count_r30_users) - defer.returnValue(ret) + return self.runInteraction("count_r30_users", _count_r30_users) def get_users(self): """Function to reterive a list of users in users table. From dc7c020b33dc9606089fa66fdec2dacb7f807f6d Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 28 Mar 2018 17:25:15 +0100 Subject: [PATCH 09/12] fix pep8 errors --- synapse/storage/__init__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index b2b85e266..70c617140 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -14,8 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from twisted.internet import defer - from synapse.storage.devices import DeviceStore from .appservice import ( ApplicationServiceStore, ApplicationServiceTransactionStore @@ -265,7 +263,6 @@ class DataStore(RoomMemberStore, RoomStore, return self.runInteraction("count_users", _count_users) - def count_r30_users(self): """ Counts the number of 30 day retained users, defined as:- From 9ee44a372d4fcf6a461b610230a285610613e8ac Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 29 Mar 2018 16:45:34 +0100 Subject: [PATCH 10/12] Remove need for sqlite specific query --- synapse/storage/__init__.py | 85 ++++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 29 deletions(-) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 70c617140..0b4693041 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -269,50 +269,77 @@ class DataStore(RoomMemberStore, RoomStore, * Users who have created their accounts more than 30 days * Where last seen at most 30 days ago * Where account creation and last_seen are > 30 days + + Returns counts globaly for a given user as well as breaking + by platform """ def _count_r30_users(txn): thirty_days_in_secs = 86400 * 30 now = int(self._clock.time_msec()) thirty_days_ago_in_secs = now - thirty_days_in_secs - # Are these filters sufficiently robust? - filters = { - "ALL": "", - "IOS": "^(Vector|Riot|Riot\.im)\/.* iOS", - "ANDROID": "^(Dalvik|Riot|Riot\.im)\/.* Android", - "ELECTRON": "Electron", - "WEB": "(Gecko|Mozilla)", - } + sql = """ + SELECT platform, COALESCE(count(*), 0) FROM ( + SELECT users.name, platform, users.creation_ts * 1000, MAX(uip.last_seen) + FROM users + INNER JOIN ( + SELECT + user_id, + last_seen, + CASE + WHEN user_agent LIKE '%Android%' THEN 'android' + WHEN user_agent LIKE '%iOS%' THEN 'ios' + WHEN user_agent LIKE '%Electron%' THEN 'electron' + WHEN user_agent LIKE '%Mozilla%' THEN 'web' + WHEN user_agent LIKE '%Gecko%' THEN 'web' + ELSE 'unknown' + END + AS platform + FROM user_ips + ) uip + ON users.name = uip.user_id + AND users.appservice_id is NULL + AND users.creation_ts < ? + AND uip.last_seen/1000 > ? + AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30 + GROUP BY users.name, platform, users.creation_ts + ) u GROUP BY platform + """ + + results = {} + txn.execute(sql, (thirty_days_ago_in_secs, + thirty_days_ago_in_secs)) + rows = txn.fetchall() + for row in rows: + if row[0] is 'unknown': + pass + results[row[0]] = row[1] sql = """ SELECT COALESCE(count(*), 0) FROM ( - SELECT users.name, users.creation_ts * 1000, MAX(user_ips.last_seen) - FROM users, user_ips - WHERE users.name = user_ips.user_id + SELECT users.name, users.creation_ts * 1000, MAX(uip.last_seen) + FROM users + INNER JOIN ( + SELECT + user_id, + last_seen + FROM user_ips + ) uip + ON users.name = uip.user_id AND appservice_id is NULL AND users.creation_ts < ? - AND user_ips.last_seen/1000 > ? - AND (user_ips.last_seen/1000) - users.creation_ts > ? + AND uip.last_seen/1000 > ? + AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30 + GROUP BY users.name, users.creation_ts + ) u """ - if isinstance(self.database_engine, PostgresEngine): - sql = sql + "AND user_ips.user_agent ~ ? " - sql = sql + "GROUP BY users.name, users.creation_ts ) u" + txn.execute(sql, (thirty_days_ago_in_secs, + thirty_days_ago_in_secs)) - results = {} - if isinstance(self.database_engine, PostgresEngine): - for filter_name, user_agent_filter in filters.items(): - txn.execute(sql, (thirty_days_ago_in_secs, - thirty_days_ago_in_secs, - thirty_days_in_secs, - user_agent_filter)) - results[filter_name], = txn.fetchone() + count, = txn.fetchone() + results['all'] = count - else: - txn.execute(sql, (thirty_days_ago_in_secs, - thirty_days_ago_in_secs, - thirty_days_in_secs)) - results["ALL"], = txn.fetchone() return results return self.runInteraction("count_r30_users", _count_r30_users) From b4e37c6f50b91dd0ea90c773185884659e3a738a Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 29 Mar 2018 17:27:39 +0100 Subject: [PATCH 11/12] pep8 --- synapse/storage/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 0b4693041..f68e436df 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -280,7 +280,8 @@ class DataStore(RoomMemberStore, RoomStore, sql = """ SELECT platform, COALESCE(count(*), 0) FROM ( - SELECT users.name, platform, users.creation_ts * 1000, MAX(uip.last_seen) + SELECT users.name, platform, users.creation_ts * 1000, + MAX(uip.last_seen) FROM users INNER JOIN ( SELECT @@ -317,7 +318,8 @@ class DataStore(RoomMemberStore, RoomStore, sql = """ SELECT COALESCE(count(*), 0) FROM ( - SELECT users.name, users.creation_ts * 1000, MAX(uip.last_seen) + SELECT users.name, users.creation_ts * 1000, + MAX(uip.last_seen) FROM users INNER JOIN ( SELECT From 0e5f479fc05ef9257c1bfce033c8fb91e6244ffe Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 5 Apr 2018 12:16:46 +0100 Subject: [PATCH 12/12] Review comments Use iteritems over item to loop over dict formatting --- synapse/app/homeserver.py | 2 +- synapse/storage/__init__.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 35e2b00f1..777e9c529 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -426,7 +426,7 @@ def run(hs): stats["daily_messages"] = yield hs.get_datastore().count_daily_messages() r30_results = yield hs.get_datastore().count_r30_users() - for name, count in r30_results.items(): + for name, count in r30_results.iteritems(): stats["r30_users_" + name] = count daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages() diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index f68e436df..4800584b5 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -280,8 +280,9 @@ class DataStore(RoomMemberStore, RoomStore, sql = """ SELECT platform, COALESCE(count(*), 0) FROM ( - SELECT users.name, platform, users.creation_ts * 1000, - MAX(uip.last_seen) + SELECT + users.name, platform, users.creation_ts * 1000, + MAX(uip.last_seen) FROM users INNER JOIN ( SELECT @@ -310,8 +311,8 @@ class DataStore(RoomMemberStore, RoomStore, results = {} txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs)) - rows = txn.fetchall() - for row in rows: + + for row in txn: if row[0] is 'unknown': pass results[row[0]] = row[1]