mirror of
https://git.anonymousland.org/anonymousland/synapse.git
synced 2024-12-26 11:29:21 -05:00
Limit query load of generate_user_daily_visits
The aim is to keep track of when it was last called and only query from that point in time
This commit is contained in:
parent
f077e97914
commit
05ac15ae82
@ -476,23 +476,16 @@ def run(hs):
|
|||||||
" changes across releases."
|
" changes across releases."
|
||||||
)
|
)
|
||||||
|
|
||||||
# def recurring_user_daily_visit_stats():
|
|
||||||
|
|
||||||
def generate_user_daily_visit_stats():
|
def generate_user_daily_visit_stats():
|
||||||
hs.get_datastore().generate_user_daily_visits()
|
hs.get_datastore().generate_user_daily_visits()
|
||||||
|
|
||||||
# Since user daily stats are bucketed at midnight UTC,
|
def recurring_user_daily_visit_stats():
|
||||||
# and user_ips.last_seen can be updated at any time, it is important to call
|
clock.looping_call(generate_user_daily_visit_stats, 60 * 60 * 1000)
|
||||||
# generate_user_daily_visit_stats immediately prior to the day end. Assuming
|
|
||||||
# an hourly cadence, the simplist way is to allign all calls to the hour
|
|
||||||
# end
|
|
||||||
end_of_hour = datetime.datetime.now().replace(microsecond=0, second=0, minute=0) \
|
|
||||||
+ datetime.timedelta(hours=1) \
|
|
||||||
- datetime.timedelta(seconds=10) # Ensure method fires before day transistion
|
|
||||||
|
|
||||||
time_to_next_hour = end_of_hour - datetime.datetime.now()
|
# Rather than update on per session basis, batch up the requests.
|
||||||
clock.call_later(time_to_next_hour.seconds,
|
# If you increase the loop period, the accuracy of user_daily_visits
|
||||||
clock.looping_call(generate_user_daily_visit_stats, 60 * 60 * 1000))
|
# table will decrease
|
||||||
|
clock.looping_call(generate_user_daily_visit_stats, 5 * 60 * 1000)
|
||||||
|
|
||||||
if hs.config.report_stats:
|
if hs.config.report_stats:
|
||||||
logger.info("Scheduling stats reporting for 3 hour intervals")
|
logger.info("Scheduling stats reporting for 3 hour intervals")
|
||||||
|
@ -214,6 +214,9 @@ class DataStore(RoomMemberStore, RoomStore,
|
|||||||
self._stream_order_on_start = self.get_room_max_stream_ordering()
|
self._stream_order_on_start = self.get_room_max_stream_ordering()
|
||||||
self._min_stream_order_on_start = self.get_room_min_stream_ordering()
|
self._min_stream_order_on_start = self.get_room_min_stream_ordering()
|
||||||
|
|
||||||
|
# Used in _generate_user_daily_visits to keep track of progress
|
||||||
|
self._last_user_visit_update = self._get_start_of_day()
|
||||||
|
|
||||||
super(DataStore, self).__init__(db_conn, hs)
|
super(DataStore, self).__init__(db_conn, hs)
|
||||||
|
|
||||||
def take_presence_startup_info(self):
|
def take_presence_startup_info(self):
|
||||||
@ -348,27 +351,58 @@ class DataStore(RoomMemberStore, RoomStore,
|
|||||||
|
|
||||||
return self.runInteraction("count_r30_users", _count_r30_users)
|
return self.runInteraction("count_r30_users", _count_r30_users)
|
||||||
|
|
||||||
|
def _get_start_of_day(self):
|
||||||
|
"""
|
||||||
|
Returns millisecond unixtime for start of UTC day.
|
||||||
|
"""
|
||||||
|
now = datetime.datetime.utcnow()
|
||||||
|
today_start = datetime.datetime(now.year, now.month,
|
||||||
|
now.day, tzinfo=tz.tzutc())
|
||||||
|
return int(time.mktime(today_start.timetuple())) * 1000
|
||||||
|
|
||||||
def generate_user_daily_visits(self):
|
def generate_user_daily_visits(self):
|
||||||
"""
|
"""
|
||||||
Generates daily visit data for use in cohort/ retention analysis
|
Generates daily visit data for use in cohort/ retention analysis
|
||||||
"""
|
"""
|
||||||
def _generate_user_daily_visits(txn):
|
def _generate_user_daily_visits(txn):
|
||||||
|
logger.info("Calling _generate_user_daily_visits")
|
||||||
|
today_start = self._get_start_of_day()
|
||||||
|
a_day_in_milliseconds = 24 * 60 * 60 * 1000
|
||||||
|
|
||||||
# determine timestamp of the day start
|
|
||||||
now = datetime.datetime.utcnow()
|
|
||||||
today_start = datetime.datetime(now.year, now.month,
|
|
||||||
now.day, tzinfo=tz.tzutc())
|
|
||||||
today_start_time = int(time.mktime(today_start.timetuple())) * 1000
|
|
||||||
logger.info(today_start_time)
|
|
||||||
sql = """
|
sql = """
|
||||||
INSERT INTO user_daily_visits (user_id, device_id, timestamp)
|
INSERT INTO user_daily_visits (user_id, device_id, timestamp)
|
||||||
SELECT user_id, device_id, ?
|
SELECT u.user_id, u.device_id, ?
|
||||||
FROM user_ips AS u
|
FROM user_ips AS u
|
||||||
LEFT JOIN user_daily_visits USING (user_id, device_id)
|
LEFT JOIN (
|
||||||
WHERE last_seen > ? AND timestamp IS NULL
|
SELECT user_id, device_id, timestamp FROM user_daily_visits
|
||||||
GROUP BY user_id, device_id;
|
WHERE timestamp IS ?
|
||||||
"""
|
) udv
|
||||||
txn.execute(sql, (today_start_time, today_start_time))
|
ON u.user_id = udv.user_id AND u.device_id=udv.device_id
|
||||||
|
WHERE last_seen > ? AND last_seen <= ? AND udv.timestamp IS NULL
|
||||||
|
"""
|
||||||
|
|
||||||
|
# This means that the day has rolled over but there could still
|
||||||
|
# be entries from the previous day. There is an edge case
|
||||||
|
# where if the user logs in at 23:59 and overwrites their
|
||||||
|
# last_seen at 00:01 then they will not be counted in the
|
||||||
|
# previous day's stats - it is important that the query is run
|
||||||
|
# to minimise this case.
|
||||||
|
if today_start > self._last_user_visit_update:
|
||||||
|
yesterday_start = today_start - a_day_in_milliseconds
|
||||||
|
txn.execute(sql, (yesterday_start, yesterday_start,
|
||||||
|
self._last_user_visit_update, today_start))
|
||||||
|
self._last_user_visit_update = today_start
|
||||||
|
|
||||||
|
txn.execute(sql, (today_start, today_start,
|
||||||
|
self._last_user_visit_update,
|
||||||
|
today_start + a_day_in_milliseconds))
|
||||||
|
# Update _last_user_visit_update to now. The reason to do this
|
||||||
|
# rather just clamping to the beginning of the day is to limit
|
||||||
|
# the size of the join - meaning that the query can be run more
|
||||||
|
# frequently
|
||||||
|
|
||||||
|
now = datetime.datetime.utcnow()
|
||||||
|
self._last_user_visit_update = int(time.mktime(now.timetuple())) * 1000
|
||||||
|
|
||||||
return self.runInteraction("generate_user_daily_visits",
|
return self.runInteraction("generate_user_daily_visits",
|
||||||
_generate_user_daily_visits)
|
_generate_user_daily_visits)
|
||||||
|
Loading…
Reference in New Issue
Block a user