Add a new version of the R30 phone-home metric, which removes a false impression of retention given by the old R30 metric (#10332)

Signed-off-by: Olivier Wilkinson (reivilibre) <olivier@librepush.net>
This commit is contained in:
reivilibre 2021-07-19 16:11:34 +01:00 committed by GitHub
parent 95e47b2e78
commit 4e340412c0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 416 additions and 5 deletions

View file

@ -316,6 +316,135 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore):
return await self.db_pool.runInteraction("count_r30_users", _count_r30_users)
async def count_r30v2_users(self) -> Dict[str, int]:
"""
Counts the number of 30 day retained users, defined as users that:
- Appear more than once in the past 60 days
- Have more than 30 days between the most and least recent appearances that
occurred in the past 60 days.
(This is the second version of this metric, hence R30'v2')
Returns:
A mapping from client type to the number of 30-day retained users for that client.
The dict keys are:
- "all" (a combined number of users across any and all clients)
- "android" (Element Android)
- "ios" (Element iOS)
- "electron" (Element Desktop)
- "web" (any web application -- it's not possible to distinguish Element Web here)
"""
def _count_r30v2_users(txn):
thirty_days_in_secs = 86400 * 30
now = int(self._clock.time())
sixty_days_ago_in_secs = now - 2 * thirty_days_in_secs
one_day_from_now_in_secs = now + 86400
# This is the 'per-platform' count.
sql = """
SELECT
client_type,
count(client_type)
FROM
(
SELECT
user_id,
CASE
WHEN
LOWER(user_agent) LIKE '%%riot%%' OR
LOWER(user_agent) LIKE '%%element%%'
THEN CASE
WHEN
LOWER(user_agent) LIKE '%%electron%%'
THEN 'electron'
WHEN
LOWER(user_agent) LIKE '%%android%%'
THEN 'android'
WHEN
LOWER(user_agent) LIKE '%%ios%%'
THEN 'ios'
ELSE 'unknown'
END
WHEN
LOWER(user_agent) LIKE '%%mozilla%%' OR
LOWER(user_agent) LIKE '%%gecko%%'
THEN 'web'
ELSE 'unknown'
END as client_type
FROM
user_daily_visits
WHERE
timestamp > ?
AND
timestamp < ?
GROUP BY
user_id,
client_type
HAVING
max(timestamp) - min(timestamp) > ?
) AS temp
GROUP BY
client_type
;
"""
# We initialise all the client types to zero, so we get an explicit
# zero if they don't appear in the query results
results = {"ios": 0, "android": 0, "web": 0, "electron": 0}
txn.execute(
sql,
(
sixty_days_ago_in_secs * 1000,
one_day_from_now_in_secs * 1000,
thirty_days_in_secs * 1000,
),
)
for row in txn:
if row[0] == "unknown":
continue
results[row[0]] = row[1]
# This is the 'all users' count.
sql = """
SELECT COUNT(*) FROM (
SELECT
1
FROM
user_daily_visits
WHERE
timestamp > ?
AND
timestamp < ?
GROUP BY
user_id
HAVING
max(timestamp) - min(timestamp) > ?
) AS r30_users
"""
txn.execute(
sql,
(
sixty_days_ago_in_secs * 1000,
one_day_from_now_in_secs * 1000,
thirty_days_in_secs * 1000,
),
)
row = txn.fetchone()
if row is None:
results["all"] = 0
else:
results["all"] = row[0]
return results
return await self.db_pool.runInteraction(
"count_r30v2_users", _count_r30v2_users
)
def _get_start_of_day(self):
"""
Returns millisecond unixtime for start of UTC day.