mirror of
https://git.anonymousland.org/anonymousland/synapse.git
synced 2025-05-06 09:34:56 -04:00
Allow background tasks to be run on a separate worker. (#8369)
This commit is contained in:
parent
462e681c79
commit
62894673e6
19 changed files with 537 additions and 449 deletions
|
@ -12,6 +12,10 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import calendar
|
||||
import logging
|
||||
import time
|
||||
from typing import Dict
|
||||
|
||||
from synapse.metrics import GaugeBucketCollector
|
||||
from synapse.metrics.background_process_metrics import run_as_background_process
|
||||
|
@ -21,6 +25,8 @@ from synapse.storage.databases.main.event_push_actions import (
|
|||
EventPushActionsWorkerStore,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Collect metrics on the number of forward extremities that exist.
|
||||
_extremities_collecter = GaugeBucketCollector(
|
||||
"synapse_forward_extremities",
|
||||
|
@ -60,6 +66,9 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore):
|
|||
|
||||
hs.get_clock().looping_call(read_forward_extremities, 60 * 60 * 1000)
|
||||
|
||||
# Used in _generate_user_daily_visits to keep track of progress
|
||||
self._last_user_visit_update = self._get_start_of_day()
|
||||
|
||||
async def _read_forward_extremities(self):
|
||||
def fetch(txn):
|
||||
txn.execute(
|
||||
|
@ -137,3 +146,189 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore):
|
|||
return count
|
||||
|
||||
return await self.db_pool.runInteraction("count_daily_active_rooms", _count)
|
||||
|
||||
async def count_daily_users(self) -> int:
|
||||
"""
|
||||
Counts the number of users who used this homeserver in the last 24 hours.
|
||||
"""
|
||||
yesterday = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24)
|
||||
return await self.db_pool.runInteraction(
|
||||
"count_daily_users", self._count_users, yesterday
|
||||
)
|
||||
|
||||
async def count_monthly_users(self) -> int:
|
||||
"""
|
||||
Counts the number of users who used this homeserver in the last 30 days.
|
||||
Note this method is intended for phonehome metrics only and is different
|
||||
from the mau figure in synapse.storage.monthly_active_users which,
|
||||
amongst other things, includes a 3 day grace period before a user counts.
|
||||
"""
|
||||
thirty_days_ago = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30)
|
||||
return await self.db_pool.runInteraction(
|
||||
"count_monthly_users", self._count_users, thirty_days_ago
|
||||
)
|
||||
|
||||
def _count_users(self, txn, time_from):
|
||||
"""
|
||||
Returns number of users seen in the past time_from period
|
||||
"""
|
||||
sql = """
|
||||
SELECT COALESCE(count(*), 0) FROM (
|
||||
SELECT user_id FROM user_ips
|
||||
WHERE last_seen > ?
|
||||
GROUP BY user_id
|
||||
) u
|
||||
"""
|
||||
txn.execute(sql, (time_from,))
|
||||
(count,) = txn.fetchone()
|
||||
return count
|
||||
|
||||
async def count_r30_users(self) -> Dict[str, int]:
|
||||
"""
|
||||
Counts the number of 30 day retained users, defined as:-
|
||||
* Users who have created their accounts more than 30 days ago
|
||||
* Where last seen at most 30 days ago
|
||||
* Where account creation and last_seen are > 30 days apart
|
||||
|
||||
Returns:
|
||||
A mapping of counts globally as well as broken out by platform.
|
||||
"""
|
||||
|
||||
def _count_r30_users(txn):
|
||||
thirty_days_in_secs = 86400 * 30
|
||||
now = int(self._clock.time())
|
||||
thirty_days_ago_in_secs = now - thirty_days_in_secs
|
||||
|
||||
sql = """
|
||||
SELECT platform, COALESCE(count(*), 0) FROM (
|
||||
SELECT
|
||||
users.name, platform, users.creation_ts * 1000,
|
||||
MAX(uip.last_seen)
|
||||
FROM users
|
||||
INNER JOIN (
|
||||
SELECT
|
||||
user_id,
|
||||
last_seen,
|
||||
CASE
|
||||
WHEN user_agent LIKE '%%Android%%' THEN 'android'
|
||||
WHEN user_agent LIKE '%%iOS%%' THEN 'ios'
|
||||
WHEN user_agent LIKE '%%Electron%%' THEN 'electron'
|
||||
WHEN user_agent LIKE '%%Mozilla%%' THEN 'web'
|
||||
WHEN user_agent LIKE '%%Gecko%%' THEN 'web'
|
||||
ELSE 'unknown'
|
||||
END
|
||||
AS platform
|
||||
FROM user_ips
|
||||
) uip
|
||||
ON users.name = uip.user_id
|
||||
AND users.appservice_id is NULL
|
||||
AND users.creation_ts < ?
|
||||
AND uip.last_seen/1000 > ?
|
||||
AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30
|
||||
GROUP BY users.name, platform, users.creation_ts
|
||||
) u GROUP BY platform
|
||||
"""
|
||||
|
||||
results = {}
|
||||
txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs))
|
||||
|
||||
for row in txn:
|
||||
if row[0] == "unknown":
|
||||
pass
|
||||
results[row[0]] = row[1]
|
||||
|
||||
sql = """
|
||||
SELECT COALESCE(count(*), 0) FROM (
|
||||
SELECT users.name, users.creation_ts * 1000,
|
||||
MAX(uip.last_seen)
|
||||
FROM users
|
||||
INNER JOIN (
|
||||
SELECT
|
||||
user_id,
|
||||
last_seen
|
||||
FROM user_ips
|
||||
) uip
|
||||
ON users.name = uip.user_id
|
||||
AND appservice_id is NULL
|
||||
AND users.creation_ts < ?
|
||||
AND uip.last_seen/1000 > ?
|
||||
AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30
|
||||
GROUP BY users.name, users.creation_ts
|
||||
) u
|
||||
"""
|
||||
|
||||
txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs))
|
||||
|
||||
(count,) = txn.fetchone()
|
||||
results["all"] = count
|
||||
|
||||
return results
|
||||
|
||||
return await self.db_pool.runInteraction("count_r30_users", _count_r30_users)
|
||||
|
||||
def _get_start_of_day(self):
|
||||
"""
|
||||
Returns millisecond unixtime for start of UTC day.
|
||||
"""
|
||||
now = time.gmtime()
|
||||
today_start = calendar.timegm((now.tm_year, now.tm_mon, now.tm_mday, 0, 0, 0))
|
||||
return today_start * 1000
|
||||
|
||||
async def generate_user_daily_visits(self) -> None:
|
||||
"""
|
||||
Generates daily visit data for use in cohort/ retention analysis
|
||||
"""
|
||||
|
||||
def _generate_user_daily_visits(txn):
|
||||
logger.info("Calling _generate_user_daily_visits")
|
||||
today_start = self._get_start_of_day()
|
||||
a_day_in_milliseconds = 24 * 60 * 60 * 1000
|
||||
now = self._clock.time_msec()
|
||||
|
||||
sql = """
|
||||
INSERT INTO user_daily_visits (user_id, device_id, timestamp)
|
||||
SELECT u.user_id, u.device_id, ?
|
||||
FROM user_ips AS u
|
||||
LEFT JOIN (
|
||||
SELECT user_id, device_id, timestamp FROM user_daily_visits
|
||||
WHERE timestamp = ?
|
||||
) udv
|
||||
ON u.user_id = udv.user_id AND u.device_id=udv.device_id
|
||||
INNER JOIN users ON users.name=u.user_id
|
||||
WHERE last_seen > ? AND last_seen <= ?
|
||||
AND udv.timestamp IS NULL AND users.is_guest=0
|
||||
AND users.appservice_id IS NULL
|
||||
GROUP BY u.user_id, u.device_id
|
||||
"""
|
||||
|
||||
# This means that the day has rolled over but there could still
|
||||
# be entries from the previous day. There is an edge case
|
||||
# where if the user logs in at 23:59 and overwrites their
|
||||
# last_seen at 00:01 then they will not be counted in the
|
||||
# previous day's stats - it is important that the query is run
|
||||
# often to minimise this case.
|
||||
if today_start > self._last_user_visit_update:
|
||||
yesterday_start = today_start - a_day_in_milliseconds
|
||||
txn.execute(
|
||||
sql,
|
||||
(
|
||||
yesterday_start,
|
||||
yesterday_start,
|
||||
self._last_user_visit_update,
|
||||
today_start,
|
||||
),
|
||||
)
|
||||
self._last_user_visit_update = today_start
|
||||
|
||||
txn.execute(
|
||||
sql, (today_start, today_start, self._last_user_visit_update, now)
|
||||
)
|
||||
# Update _last_user_visit_update to now. The reason to do this
|
||||
# rather just clamping to the beginning of the day is to limit
|
||||
# the size of the join - meaning that the query can be run more
|
||||
# frequently
|
||||
self._last_user_visit_update = now
|
||||
|
||||
await self.db_pool.runInteraction(
|
||||
"generate_user_daily_visits", _generate_user_daily_visits
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue