Generate user daily stats

This commit is contained in:
Neil Johnson 2018-04-25 17:37:29 +01:00
parent 48c01ae851
commit 617bf40924
6 changed files with 112 additions and 5 deletions

View file

@ -14,6 +14,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import datetime
import time
import logging
from synapse.storage.devices import DeviceStore
from .appservice import (
ApplicationServiceStore, ApplicationServiceTransactionStore
@ -55,10 +59,6 @@ from .engines import PostgresEngine
from synapse.api.constants import PresenceState
from synapse.util.caches.stream_change_cache import StreamChangeCache
import logging
logger = logging.getLogger(__name__)
@ -347,6 +347,58 @@ class DataStore(RoomMemberStore, RoomStore,
return self.runInteraction("count_r30_users", _count_r30_users)
def generate_user_daily_visits(self):
"""
Generates daily visit data for use in cohort/ retention analysis
"""
def _generate_user_daily_visits(txn):
logger.info("Calling _generate_user_daily_visits")
# determine timestamp of previous days
yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
yesterday_start = datetime.datetime(yesterday.year,
yesterday.month,
yesterday.day, 0, 0, 0, 0)
yesterday_start_time = int(time.mktime(yesterday_start.timetuple())) * 1000
# Check that this job has not already been completed
sql = """
SELECT timestamp
FROM user_daily_visits
ORDER by timestamp desc limit 1
"""
txn.execute(sql)
row = txn.fetchone()
# Bail if the most recent time is yesterday
if row and row[0] == yesterday_start_time:
logger.info("Bailing from _generate_user_daily_visits, already completed")
return
logger.info("inserting into user_daily_visits")
# Not specificying an upper bound means that if the update is run at
# 10 mins past midnight and the user is active during a 30 min session
# that the user is still included in the previous days stats
# This does mean that if the update is run hours late, then it is possible
# to overstate the cohort, but this seems a reasonable trade off
# The alternative is to insert on every request - but prefer to avoid
# for performance reasons
sql = """
SELECT user_id, user_agent, device_id
FROM user_ips
WHERE last_seen > ?
"""
txn.execute(sql, (yesterday_start_time,))
sql = """
INSERT INTO user_daily_visits (user_id, user_agent, device_id, timestamp)
VALUES (?, ?, ?, ?)
"""
for row in txn:
txn.execute(sql, (row + (yesterday_start_time,)))
return self.runInteraction("generate_user_daily_visits", _generate_user_daily_visits)
def get_users(self):
"""Function to reterive a list of users in users table.