synapse-product/synapse/storage/__init__.py

503 lines
19 KiB
Python
Raw Normal View History

2014-08-12 10:10:52 -04:00
# -*- coding: utf-8 -*-
2016-01-06 23:26:29 -05:00
# Copyright 2014-2016 OpenMarket Ltd
2018-02-23 05:39:19 -05:00
# Copyright 2018 New Vector Ltd
2014-08-12 10:10:52 -04:00
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
2018-04-25 12:37:29 -04:00
import datetime
import logging
2018-07-09 02:09:20 -04:00
import time
2018-04-25 12:37:29 -04:00
2018-07-09 02:09:20 -04:00
from dateutil import tz
from synapse.api.constants import PresenceState
from synapse.storage.devices import DeviceStore
from synapse.storage.user_erasure_store import UserErasureStore
2018-07-09 02:09:20 -04:00
from synapse.util.caches.stream_change_cache import StreamChangeCache
from .account_data import AccountDataStore
from .appservice import ApplicationServiceStore, ApplicationServiceTransactionStore
from .client_ips import ClientIpStore
from .deviceinbox import DeviceInboxStore
2014-08-12 10:10:52 -04:00
from .directory import DirectoryStore
2018-07-09 02:09:20 -04:00
from .end_to_end_keys import EndToEndKeyStore
from .engines import PostgresEngine
from .event_federation import EventFederationStore
from .event_push_actions import EventPushActionsStore
2015-03-20 09:52:56 -04:00
from .events import EventsStore
2018-07-09 02:09:20 -04:00
from .filtering import FilteringStore
from .group_server import GroupServerStore
from .keys import KeyStore
from .media_repository import MediaRepositoryStore
from .openid import OpenIdStore
2016-02-15 12:10:40 -05:00
from .presence import PresenceStore, UserPresenceState
2014-08-12 10:10:52 -04:00
from .profile import ProfileStore
2018-07-09 02:09:20 -04:00
from .push_rule import PushRuleStore
from .pusher import PusherStore
from .receipts import ReceiptsStore
2014-08-12 10:10:52 -04:00
from .registration import RegistrationStore
2018-07-09 02:09:20 -04:00
from .rejections import RejectionsStore
2014-08-12 10:10:52 -04:00
from .room import RoomStore
from .roommember import RoomMemberStore
2015-10-09 10:48:31 -04:00
from .search import SearchStore
2018-07-09 02:09:20 -04:00
from .signatures import SignatureStore
from .state import StateStore
from .stream import StreamStore
from .tags import TagsStore
2018-07-09 02:09:20 -04:00
from .transactions import TransactionStore
2017-05-31 06:51:01 -04:00
from .user_directory import UserDirectoryStore
2018-07-09 02:09:20 -04:00
from .util.id_generators import ChainedIdGenerator, IdGenerator, StreamIdGenerator
2016-01-29 09:37:59 -05:00
2014-08-19 09:20:03 -04:00
logger = logging.getLogger(__name__)
class DataStore(RoomMemberStore, RoomStore,
2015-03-20 09:52:56 -04:00
RegistrationStore, StreamStore, ProfileStore,
2014-10-31 10:00:14 -04:00
PresenceStore, TransactionStore,
DirectoryStore, KeyStore, StateStore, SignatureStore,
2015-02-02 11:05:34 -05:00
ApplicationServiceStore,
EventsStore,
EventFederationStore,
MediaRepositoryStore,
RejectionsStore,
FilteringStore,
PusherStore,
2015-03-20 09:52:56 -04:00
PushRuleStore,
2015-03-16 06:16:59 -04:00
ApplicationServiceTransactionStore,
2015-07-07 05:55:31 -04:00
ReceiptsStore,
2015-07-06 13:46:47 -04:00
EndToEndKeyStore,
2015-10-09 10:48:31 -04:00
SearchStore,
TagsStore,
AccountDataStore,
EventPushActionsStore,
OpenIdStore,
ClientIpStore,
DeviceStore,
DeviceInboxStore,
2017-05-31 06:51:01 -04:00
UserDirectoryStore,
2017-07-10 10:44:15 -04:00
GroupServerStore,
UserErasureStore,
):
2014-08-12 10:10:52 -04:00
2016-01-28 09:32:05 -05:00
def __init__(self, db_conn, hs):
self.hs = hs
self._clock = hs.get_clock()
2016-01-29 09:41:16 -05:00
self.database_engine = hs.database_engine
2014-08-12 10:10:52 -04:00
self._stream_id_gen = StreamIdGenerator(
db_conn, "events", "stream_ordering",
extra_tables=[("local_invites", "stream_id")]
)
self._backfill_id_gen = StreamIdGenerator(
db_conn, "events", "stream_ordering", step=-1,
extra_tables=[("ex_outlier_stream", "event_stream_ordering")]
)
2016-02-15 12:10:40 -05:00
self._presence_id_gen = StreamIdGenerator(
db_conn, "presence_stream", "stream_id"
)
self._device_inbox_id_gen = StreamIdGenerator(
db_conn, "device_max_stream_id", "stream_id"
)
2016-09-14 12:01:02 -04:00
self._public_room_id_gen = StreamIdGenerator(
db_conn, "public_room_list_stream", "stream_id"
)
self._device_list_id_gen = StreamIdGenerator(
db_conn, "device_lists_stream", "stream_id",
)
self._transaction_id_gen = IdGenerator(db_conn, "sent_transactions", "id")
self._access_tokens_id_gen = IdGenerator(db_conn, "access_tokens", "id")
2016-05-04 10:19:12 -04:00
self._event_reports_id_gen = IdGenerator(db_conn, "event_reports", "id")
self._push_rule_id_gen = IdGenerator(db_conn, "push_rules", "id")
self._push_rules_enable_id_gen = IdGenerator(db_conn, "push_rules_enable", "id")
2016-03-01 08:35:37 -05:00
self._push_rules_stream_id_gen = ChainedIdGenerator(
self._stream_id_gen, db_conn, "push_rules_stream", "stream_id"
)
2016-03-15 13:01:43 -04:00
self._pushers_id_gen = StreamIdGenerator(
db_conn, "pushers", "id",
extra_tables=[("deleted_pushers", "stream_id")],
)
2017-07-10 09:53:19 -04:00
self._group_updates_id_gen = StreamIdGenerator(
db_conn, "local_group_updates", "stream_id",
)
2016-08-15 06:16:45 -04:00
if isinstance(self.database_engine, PostgresEngine):
self._cache_id_gen = StreamIdGenerator(
2016-08-15 06:45:57 -04:00
db_conn, "cache_invalidation_stream", "stream_id",
2016-08-15 06:16:45 -04:00
)
else:
self._cache_id_gen = None
2016-06-03 06:05:53 -04:00
self._presence_on_startup = self._get_active_presence(db_conn)
2016-02-15 12:10:40 -05:00
presence_cache_prefill, min_presence_val = self._get_cache_dict(
db_conn, "presence_stream",
entity_column="user_id",
stream_column="stream_id",
max_value=self._presence_id_gen.get_current_token(),
2016-02-15 12:10:40 -05:00
)
self.presence_stream_cache = StreamChangeCache(
"PresenceStreamChangeCache", min_presence_val,
prefilled_cache=presence_cache_prefill
)
max_device_inbox_id = self._device_inbox_id_gen.get_current_token()
device_inbox_prefill, min_device_inbox_id = self._get_cache_dict(
db_conn, "device_inbox",
entity_column="user_id",
stream_column="stream_id",
max_value=max_device_inbox_id,
limit=1000,
)
self._device_inbox_stream_cache = StreamChangeCache(
"DeviceInboxStreamChangeCache", min_device_inbox_id,
prefilled_cache=device_inbox_prefill,
)
# The federation outbox and the local device inbox uses the same
# stream_id generator.
device_outbox_prefill, min_device_outbox_id = self._get_cache_dict(
db_conn, "device_federation_outbox",
entity_column="destination",
stream_column="stream_id",
max_value=max_device_inbox_id,
limit=1000,
)
self._device_federation_outbox_stream_cache = StreamChangeCache(
"DeviceFederationOutboxStreamChangeCache", min_device_outbox_id,
prefilled_cache=device_outbox_prefill,
)
device_list_max = self._device_list_id_gen.get_current_token()
self._device_list_stream_cache = StreamChangeCache(
"DeviceListStreamChangeCache", device_list_max,
)
self._device_list_federation_stream_cache = StreamChangeCache(
"DeviceListFederationStreamChangeCache", device_list_max,
)
2018-03-01 11:23:48 -05:00
events_max = self._stream_id_gen.get_current_token()
2017-05-31 10:46:36 -04:00
curr_state_delta_prefill, min_curr_state_delta_id = self._get_cache_dict(
db_conn, "current_state_delta_stream",
entity_column="room_id",
stream_column="stream_id",
max_value=events_max, # As we share the stream id with events token
limit=1000,
)
self._curr_state_delta_stream_cache = StreamChangeCache(
"_curr_state_delta_stream_cache", min_curr_state_delta_id,
prefilled_cache=curr_state_delta_prefill,
)
2017-07-10 09:53:19 -04:00
_group_updates_prefill, min_group_updates_id = self._get_cache_dict(
db_conn, "local_group_updates",
entity_column="user_id",
stream_column="stream_id",
max_value=self._group_updates_id_gen.get_current_token(),
limit=1000,
)
self._group_updates_stream_cache = StreamChangeCache(
"_group_updates_stream_cache", min_group_updates_id,
prefilled_cache=_group_updates_prefill,
)
self._stream_order_on_start = self.get_room_max_stream_ordering()
self._min_stream_order_on_start = self.get_room_min_stream_ordering()
# Used in _generate_user_daily_visits to keep track of progress
self._last_user_visit_update = self._get_start_of_day()
super(DataStore, self).__init__(db_conn, hs)
2016-02-15 12:10:40 -05:00
def take_presence_startup_info(self):
2016-06-03 06:05:53 -04:00
active_on_startup = self._presence_on_startup
self._presence_on_startup = None
2016-02-15 12:10:40 -05:00
return active_on_startup
def _get_active_presence(self, db_conn):
"""Fetch non-offline presence from the database so that we can register
the appropriate time outs.
"""
sql = (
2016-02-18 05:11:43 -05:00
"SELECT user_id, state, last_active_ts, last_federation_update_ts,"
" last_user_sync_ts, status_msg, currently_active FROM presence_stream"
2016-02-15 12:10:40 -05:00
" WHERE state != ?"
)
sql = self.database_engine.convert_param_style(sql)
txn = db_conn.cursor()
txn.execute(sql, (PresenceState.OFFLINE,))
rows = self.cursor_to_dict(txn)
2016-02-18 11:39:28 -05:00
txn.close()
2016-02-15 12:10:40 -05:00
for row in rows:
row["currently_active"] = bool(row["currently_active"])
return [UserPresenceState(**row) for row in rows]
def count_daily_users(self):
2015-09-22 08:47:40 -04:00
"""
Counts the number of users who used this homeserver in the last 24 hours.
"""
def _count_users(txn):
2018-03-28 06:49:57 -04:00
yesterday = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24)
2017-06-14 14:37:17 -04:00
sql = """
SELECT COALESCE(count(*), 0) FROM (
SELECT user_id FROM user_ips
WHERE last_seen > ?
GROUP BY user_id
) u
"""
2018-03-28 06:49:57 -04:00
txn.execute(sql, (yesterday,))
2017-06-14 14:37:17 -04:00
count, = txn.fetchone()
return count
2018-03-28 11:25:53 -04:00
return self.runInteraction("count_users", _count_users)
2018-03-28 05:39:13 -04:00
def count_r30_users(self):
"""
Counts the number of 30 day retained users, defined as:-
2018-04-10 12:38:16 -04:00
* Users who have created their accounts more than 30 days ago
2018-03-28 05:39:13 -04:00
* Where last seen at most 30 days ago
2018-04-10 12:38:16 -04:00
* Where account creation and last_seen are > 30 days apart
2018-03-29 11:45:34 -04:00
Returns counts globaly for a given user as well as breaking
by platform
2018-03-28 05:39:13 -04:00
"""
def _count_r30_users(txn):
thirty_days_in_secs = 86400 * 30
2018-04-09 13:43:48 -04:00
now = int(self._clock.time())
thirty_days_ago_in_secs = now - thirty_days_in_secs
2018-04-09 13:44:20 -04:00
2018-03-29 11:45:34 -04:00
sql = """
SELECT platform, COALESCE(count(*), 0) FROM (
SELECT
users.name, platform, users.creation_ts * 1000,
MAX(uip.last_seen)
2018-03-29 11:45:34 -04:00
FROM users
INNER JOIN (
SELECT
user_id,
last_seen,
CASE
2018-04-09 12:50:36 -04:00
WHEN user_agent LIKE '%%Android%%' THEN 'android'
WHEN user_agent LIKE '%%iOS%%' THEN 'ios'
WHEN user_agent LIKE '%%Electron%%' THEN 'electron'
WHEN user_agent LIKE '%%Mozilla%%' THEN 'web'
WHEN user_agent LIKE '%%Gecko%%' THEN 'web'
2018-03-29 11:45:34 -04:00
ELSE 'unknown'
END
AS platform
FROM user_ips
) uip
ON users.name = uip.user_id
AND users.appservice_id is NULL
AND users.creation_ts < ?
AND uip.last_seen/1000 > ?
AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30
GROUP BY users.name, platform, users.creation_ts
) u GROUP BY platform
"""
results = {}
txn.execute(sql, (thirty_days_ago_in_secs,
thirty_days_ago_in_secs))
for row in txn:
2018-03-29 11:45:34 -04:00
if row[0] is 'unknown':
pass
results[row[0]] = row[1]
2018-03-28 09:36:53 -04:00
2018-03-28 05:39:13 -04:00
sql = """
SELECT COALESCE(count(*), 0) FROM (
2018-03-29 12:27:39 -04:00
SELECT users.name, users.creation_ts * 1000,
MAX(uip.last_seen)
2018-03-29 11:45:34 -04:00
FROM users
INNER JOIN (
SELECT
user_id,
last_seen
FROM user_ips
) uip
ON users.name = uip.user_id
2018-03-28 05:39:13 -04:00
AND appservice_id is NULL
AND users.creation_ts < ?
2018-03-29 11:45:34 -04:00
AND uip.last_seen/1000 > ?
AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30
GROUP BY users.name, users.creation_ts
) u
2018-03-28 05:39:13 -04:00
"""
2018-03-29 11:45:34 -04:00
txn.execute(sql, (thirty_days_ago_in_secs,
thirty_days_ago_in_secs))
count, = txn.fetchone()
results['all'] = count
2018-03-28 09:36:53 -04:00
return results
2018-03-28 05:39:13 -04:00
2018-03-28 11:25:53 -04:00
return self.runInteraction("count_r30_users", _count_r30_users)
2018-03-28 05:39:13 -04:00
def _get_start_of_day(self):
"""
Returns millisecond unixtime for start of UTC day.
"""
now = datetime.datetime.utcnow()
today_start = datetime.datetime(now.year, now.month,
now.day, tzinfo=tz.tzutc())
return int(time.mktime(today_start.timetuple())) * 1000
2018-04-25 12:37:29 -04:00
def generate_user_daily_visits(self):
"""
Generates daily visit data for use in cohort/ retention analysis
"""
def _generate_user_daily_visits(txn):
logger.info("Calling _generate_user_daily_visits")
today_start = self._get_start_of_day()
a_day_in_milliseconds = 24 * 60 * 60 * 1000
2018-05-16 04:46:43 -04:00
now = self.clock.time_msec()
2018-04-25 12:37:29 -04:00
sql = """
INSERT INTO user_daily_visits (user_id, device_id, timestamp)
SELECT u.user_id, u.device_id, ?
FROM user_ips AS u
LEFT JOIN (
SELECT user_id, device_id, timestamp FROM user_daily_visits
2018-05-18 10:51:21 -04:00
WHERE timestamp = ?
) udv
ON u.user_id = udv.user_id AND u.device_id=udv.device_id
INNER JOIN users ON users.name=u.user_id
WHERE last_seen > ? AND last_seen <= ?
AND udv.timestamp IS NULL AND users.is_guest=0
AND users.appservice_id IS NULL
GROUP BY u.user_id, u.device_id
"""
# This means that the day has rolled over but there could still
# be entries from the previous day. There is an edge case
# where if the user logs in at 23:59 and overwrites their
# last_seen at 00:01 then they will not be counted in the
# previous day's stats - it is important that the query is run
2018-05-16 04:46:43 -04:00
# often to minimise this case.
if today_start > self._last_user_visit_update:
yesterday_start = today_start - a_day_in_milliseconds
2018-05-16 04:46:43 -04:00
txn.execute(sql, (
yesterday_start, yesterday_start,
self._last_user_visit_update, today_start
))
self._last_user_visit_update = today_start
2018-05-16 04:46:43 -04:00
txn.execute(sql, (
today_start, today_start,
self._last_user_visit_update,
now
))
# Update _last_user_visit_update to now. The reason to do this
# rather just clamping to the beginning of the day is to limit
# the size of the join - meaning that the query can be run more
# frequently
2018-05-16 04:46:43 -04:00
self._last_user_visit_update = now
2018-04-25 12:37:29 -04:00
2018-04-25 12:56:11 -04:00
return self.runInteraction("generate_user_daily_visits",
_generate_user_daily_visits)
2018-04-25 12:37:29 -04:00
def get_users(self):
"""Function to reterive a list of users in users table.
Args:
Returns:
defer.Deferred: resolves to list[dict[str, Any]]
"""
return self._simple_select_list(
table="users",
keyvalues={},
retcols=[
"name",
"password_hash",
"is_guest",
"admin"
],
desc="get_users",
)
def get_users_paginate(self, order, start, limit):
"""Function to reterive a paginated list of users from
users list. This will return a json object, which contains
list of users and the total number of users in users table.
Args:
order (str): column name to order the select by this column
start (int): start number to begin the query from
limit (int): number of rows to reterive
Returns:
defer.Deferred: resolves to json object {list[dict[str, Any]], count}
"""
is_guest = 0
i_start = (int)(start)
i_limit = (int)(limit)
return self.get_user_list_paginate(
table="users",
keyvalues={
"is_guest": is_guest
},
pagevalues=[
order,
i_limit,
i_start
],
retcols=[
"name",
"password_hash",
"is_guest",
"admin"
],
desc="get_users_paginate",
)
def search_users(self, term):
"""Function to search users list for one or more users with
the matched term.
Args:
term (str): search term
col (str): column to query term should be matched to
Returns:
defer.Deferred: resolves to list[dict[str, Any]]
"""
return self._simple_search_list(
table="users",
term=term,
col="name",
retcols=[
"name",
"password_hash",
"is_guest",
"admin"
],
desc="search_users",
)
def are_all_users_on_domain(txn, database_engine, domain):
sql = database_engine.convert_param_style(
"SELECT COUNT(*) FROM users WHERE name NOT LIKE ?"
)
pat = "%:" + domain
txn.execute(sql, (pat,))
num_not_matching = txn.fetchall()[0][0]
if num_not_matching == 0:
return True
2015-04-27 06:49:18 -04:00
return False