mirror of
https://git.anonymousland.org/anonymousland/synapse.git
synced 2025-05-02 08:46:04 -04:00
Merge branch 'develop' of github.com:matrix-org/synapse into erikj/split_profiles
This commit is contained in:
commit
484a0ebdfc
67 changed files with 1156 additions and 408 deletions
|
@ -39,6 +39,7 @@ from .filtering import FilteringStore
|
|||
from .group_server import GroupServerStore
|
||||
from .keys import KeyStore
|
||||
from .media_repository import MediaRepositoryStore
|
||||
from .monthly_active_users import MonthlyActiveUsersStore
|
||||
from .openid import OpenIdStore
|
||||
from .presence import PresenceStore, UserPresenceState
|
||||
from .profile import ProfileStore
|
||||
|
@ -87,6 +88,7 @@ class DataStore(RoomMemberStore, RoomStore,
|
|||
UserDirectoryStore,
|
||||
GroupServerStore,
|
||||
UserErasureStore,
|
||||
MonthlyActiveUsersStore,
|
||||
):
|
||||
|
||||
def __init__(self, db_conn, hs):
|
||||
|
@ -94,7 +96,6 @@ class DataStore(RoomMemberStore, RoomStore,
|
|||
self._clock = hs.get_clock()
|
||||
self.database_engine = hs.database_engine
|
||||
|
||||
self.db_conn = db_conn
|
||||
self._stream_id_gen = StreamIdGenerator(
|
||||
db_conn, "events", "stream_ordering",
|
||||
extra_tables=[("local_invites", "stream_id")]
|
||||
|
@ -267,31 +268,6 @@ class DataStore(RoomMemberStore, RoomStore,
|
|||
|
||||
return self.runInteraction("count_users", _count_users)
|
||||
|
||||
def count_monthly_users(self):
|
||||
"""Counts the number of users who used this homeserver in the last 30 days
|
||||
|
||||
This method should be refactored with count_daily_users - the only
|
||||
reason not to is waiting on definition of mau
|
||||
|
||||
Returns:
|
||||
Defered[int]
|
||||
"""
|
||||
def _count_monthly_users(txn):
|
||||
thirty_days_ago = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30)
|
||||
sql = """
|
||||
SELECT COALESCE(count(*), 0) FROM (
|
||||
SELECT user_id FROM user_ips
|
||||
WHERE last_seen > ?
|
||||
GROUP BY user_id
|
||||
) u
|
||||
"""
|
||||
|
||||
txn.execute(sql, (thirty_days_ago,))
|
||||
count, = txn.fetchone()
|
||||
return count
|
||||
|
||||
return self.runInteraction("count_monthly_users", _count_monthly_users)
|
||||
|
||||
def count_r30_users(self):
|
||||
"""
|
||||
Counts the number of 30 day retained users, defined as:-
|
||||
|
|
|
@ -35,6 +35,7 @@ LAST_SEEN_GRANULARITY = 120 * 1000
|
|||
|
||||
class ClientIpStore(background_updates.BackgroundUpdateStore):
|
||||
def __init__(self, db_conn, hs):
|
||||
|
||||
self.client_ip_last_seen = Cache(
|
||||
name="client_ip_last_seen",
|
||||
keylen=4,
|
||||
|
@ -74,6 +75,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
|
|||
"before", "shutdown", self._update_client_ips_batch
|
||||
)
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def insert_client_ip(self, user_id, access_token, ip, user_agent, device_id,
|
||||
now=None):
|
||||
if not now:
|
||||
|
@ -84,7 +86,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
|
|||
last_seen = self.client_ip_last_seen.get(key)
|
||||
except KeyError:
|
||||
last_seen = None
|
||||
|
||||
yield self.populate_monthly_active_users(user_id)
|
||||
# Rate-limited inserts
|
||||
if last_seen is not None and (now - last_seen) < LAST_SEEN_GRANULARITY:
|
||||
return
|
||||
|
|
|
@ -485,9 +485,14 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
|
|||
new_forward_extremeties=new_forward_extremeties,
|
||||
)
|
||||
persist_event_counter.inc(len(chunk))
|
||||
synapse.metrics.event_persisted_position.set(
|
||||
chunk[-1][0].internal_metadata.stream_ordering,
|
||||
)
|
||||
|
||||
if not backfilled:
|
||||
# backfilled events have negative stream orderings, so we don't
|
||||
# want to set the event_persisted_position to that.
|
||||
synapse.metrics.event_persisted_position.set(
|
||||
chunk[-1][0].internal_metadata.stream_ordering,
|
||||
)
|
||||
|
||||
for event, context in chunk:
|
||||
if context.app_service:
|
||||
origin_type = "local"
|
||||
|
|
201
synapse/storage/monthly_active_users.py
Normal file
201
synapse/storage/monthly_active_users.py
Normal file
|
@ -0,0 +1,201 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright 2018 New Vector
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import logging
|
||||
|
||||
from twisted.internet import defer
|
||||
|
||||
from synapse.util.caches.descriptors import cached
|
||||
|
||||
from ._base import SQLBaseStore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Number of msec of granularity to store the monthly_active_user timestamp
|
||||
# This means it is not necessary to update the table on every request
|
||||
LAST_SEEN_GRANULARITY = 60 * 60 * 1000
|
||||
|
||||
|
||||
class MonthlyActiveUsersStore(SQLBaseStore):
|
||||
def __init__(self, dbconn, hs):
|
||||
super(MonthlyActiveUsersStore, self).__init__(None, hs)
|
||||
self._clock = hs.get_clock()
|
||||
self.hs = hs
|
||||
self.reserved_users = ()
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def initialise_reserved_users(self, threepids):
|
||||
# TODO Why can't I do this in init?
|
||||
store = self.hs.get_datastore()
|
||||
reserved_user_list = []
|
||||
|
||||
# Do not add more reserved users than the total allowable number
|
||||
for tp in threepids[:self.hs.config.max_mau_value]:
|
||||
user_id = yield store.get_user_id_by_threepid(
|
||||
tp["medium"], tp["address"]
|
||||
)
|
||||
if user_id:
|
||||
self.upsert_monthly_active_user(user_id)
|
||||
reserved_user_list.append(user_id)
|
||||
else:
|
||||
logger.warning(
|
||||
"mau limit reserved threepid %s not found in db" % tp
|
||||
)
|
||||
self.reserved_users = tuple(reserved_user_list)
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def reap_monthly_active_users(self):
|
||||
"""
|
||||
Cleans out monthly active user table to ensure that no stale
|
||||
entries exist.
|
||||
|
||||
Returns:
|
||||
Deferred[]
|
||||
"""
|
||||
def _reap_users(txn):
|
||||
|
||||
thirty_days_ago = (
|
||||
int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30)
|
||||
)
|
||||
# Purge stale users
|
||||
|
||||
# questionmarks is a hack to overcome sqlite not supporting
|
||||
# tuples in 'WHERE IN %s'
|
||||
questionmarks = '?' * len(self.reserved_users)
|
||||
query_args = [thirty_days_ago]
|
||||
query_args.extend(self.reserved_users)
|
||||
|
||||
sql = """
|
||||
DELETE FROM monthly_active_users
|
||||
WHERE timestamp < ?
|
||||
AND user_id NOT IN ({})
|
||||
""".format(','.join(questionmarks))
|
||||
|
||||
txn.execute(sql, query_args)
|
||||
|
||||
# If MAU user count still exceeds the MAU threshold, then delete on
|
||||
# a least recently active basis.
|
||||
# Note it is not possible to write this query using OFFSET due to
|
||||
# incompatibilities in how sqlite and postgres support the feature.
|
||||
# sqlite requires 'LIMIT -1 OFFSET ?', the LIMIT must be present
|
||||
# While Postgres does not require 'LIMIT', but also does not support
|
||||
# negative LIMIT values. So there is no way to write it that both can
|
||||
# support
|
||||
query_args = [self.hs.config.max_mau_value]
|
||||
query_args.extend(self.reserved_users)
|
||||
sql = """
|
||||
DELETE FROM monthly_active_users
|
||||
WHERE user_id NOT IN (
|
||||
SELECT user_id FROM monthly_active_users
|
||||
ORDER BY timestamp DESC
|
||||
LIMIT ?
|
||||
)
|
||||
AND user_id NOT IN ({})
|
||||
""".format(','.join(questionmarks))
|
||||
txn.execute(sql, query_args)
|
||||
|
||||
yield self.runInteraction("reap_monthly_active_users", _reap_users)
|
||||
# It seems poor to invalidate the whole cache, Postgres supports
|
||||
# 'Returning' which would allow me to invalidate only the
|
||||
# specific users, but sqlite has no way to do this and instead
|
||||
# I would need to SELECT and the DELETE which without locking
|
||||
# is racy.
|
||||
# Have resolved to invalidate the whole cache for now and do
|
||||
# something about it if and when the perf becomes significant
|
||||
self._user_last_seen_monthly_active.invalidate_all()
|
||||
self.get_monthly_active_count.invalidate_all()
|
||||
|
||||
@cached(num_args=0)
|
||||
def get_monthly_active_count(self):
|
||||
"""Generates current count of monthly active users
|
||||
|
||||
Returns:
|
||||
Defered[int]: Number of current monthly active users
|
||||
"""
|
||||
|
||||
def _count_users(txn):
|
||||
sql = "SELECT COALESCE(count(*), 0) FROM monthly_active_users"
|
||||
|
||||
txn.execute(sql)
|
||||
count, = txn.fetchone()
|
||||
return count
|
||||
return self.runInteraction("count_users", _count_users)
|
||||
|
||||
def upsert_monthly_active_user(self, user_id):
|
||||
"""
|
||||
Updates or inserts monthly active user member
|
||||
Arguments:
|
||||
user_id (str): user to add/update
|
||||
Deferred[bool]: True if a new entry was created, False if an
|
||||
existing one was updated.
|
||||
"""
|
||||
is_insert = self._simple_upsert(
|
||||
desc="upsert_monthly_active_user",
|
||||
table="monthly_active_users",
|
||||
keyvalues={
|
||||
"user_id": user_id,
|
||||
},
|
||||
values={
|
||||
"timestamp": int(self._clock.time_msec()),
|
||||
},
|
||||
lock=False,
|
||||
)
|
||||
if is_insert:
|
||||
self._user_last_seen_monthly_active.invalidate((user_id,))
|
||||
self.get_monthly_active_count.invalidate(())
|
||||
|
||||
@cached(num_args=1)
|
||||
def _user_last_seen_monthly_active(self, user_id):
|
||||
"""
|
||||
Checks if a given user is part of the monthly active user group
|
||||
Arguments:
|
||||
user_id (str): user to add/update
|
||||
Return:
|
||||
Deferred[int] : timestamp since last seen, None if never seen
|
||||
|
||||
"""
|
||||
|
||||
return(self._simple_select_one_onecol(
|
||||
table="monthly_active_users",
|
||||
keyvalues={
|
||||
"user_id": user_id,
|
||||
},
|
||||
retcol="timestamp",
|
||||
allow_none=True,
|
||||
desc="_user_last_seen_monthly_active",
|
||||
))
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def populate_monthly_active_users(self, user_id):
|
||||
"""Checks on the state of monthly active user limits and optionally
|
||||
add the user to the monthly active tables
|
||||
|
||||
Args:
|
||||
user_id(str): the user_id to query
|
||||
"""
|
||||
if self.hs.config.limit_usage_by_mau:
|
||||
last_seen_timestamp = yield self._user_last_seen_monthly_active(user_id)
|
||||
now = self.hs.get_clock().time_msec()
|
||||
|
||||
# We want to reduce to the total number of db writes, and are happy
|
||||
# to trade accuracy of timestamp in order to lighten load. This means
|
||||
# We always insert new users (where MAU threshold has not been reached),
|
||||
# but only update if we have not previously seen the user for
|
||||
# LAST_SEEN_GRANULARITY ms
|
||||
if last_seen_timestamp is None:
|
||||
count = yield self.get_monthly_active_count()
|
||||
if count < self.hs.config.max_mau_value:
|
||||
yield self.upsert_monthly_active_user(user_id)
|
||||
elif now - last_seen_timestamp > LAST_SEEN_GRANULARITY:
|
||||
yield self.upsert_monthly_active_user(user_id)
|
|
@ -25,7 +25,7 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
# Remember to update this number every time a change is made to database
|
||||
# schema files, so the users will be informed on server restarts.
|
||||
SCHEMA_VERSION = 50
|
||||
SCHEMA_VERSION = 51
|
||||
|
||||
dir_path = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
|
|
27
synapse/storage/schema/delta/51/monthly_active_users.sql
Normal file
27
synapse/storage/schema/delta/51/monthly_active_users.sql
Normal file
|
@ -0,0 +1,27 @@
|
|||
/* Copyright 2018 New Vector Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
-- a table of monthly active users, for use where blocking based on mau limits
|
||||
CREATE TABLE monthly_active_users (
|
||||
user_id TEXT NOT NULL,
|
||||
-- Last time we saw the user. Not guaranteed to be accurate due to rate limiting
|
||||
-- on updates, Granularity of updates governed by
|
||||
-- synapse.storage.monthly_active_users.LAST_SEEN_GRANULARITY
|
||||
-- Measured in ms since epoch.
|
||||
timestamp BIGINT NOT NULL
|
||||
);
|
||||
|
||||
CREATE UNIQUE INDEX monthly_active_users_users ON monthly_active_users(user_id);
|
||||
CREATE INDEX monthly_active_users_time_stamp ON monthly_active_users(timestamp);
|
|
@ -21,15 +21,17 @@ from six.moves import range
|
|||
|
||||
from twisted.internet import defer
|
||||
|
||||
from synapse.api.constants import EventTypes
|
||||
from synapse.api.errors import NotFoundError
|
||||
from synapse.storage._base import SQLBaseStore
|
||||
from synapse.storage.background_updates import BackgroundUpdateStore
|
||||
from synapse.storage.engines import PostgresEngine
|
||||
from synapse.storage.events_worker import EventsWorkerStore
|
||||
from synapse.util.caches import get_cache_factor_for, intern_string
|
||||
from synapse.util.caches.descriptors import cached, cachedList
|
||||
from synapse.util.caches.dictionary_cache import DictionaryCache
|
||||
from synapse.util.stringutils import to_ascii
|
||||
|
||||
from ._base import SQLBaseStore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -46,7 +48,8 @@ class _GetStateGroupDelta(namedtuple("_GetStateGroupDelta", ("prev_group", "delt
|
|||
return len(self.delta_ids) if self.delta_ids else 0
|
||||
|
||||
|
||||
class StateGroupWorkerStore(SQLBaseStore):
|
||||
# this inherits from EventsWorkerStore because it calls self.get_events
|
||||
class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
|
||||
"""The parts of StateGroupStore that can be called from workers.
|
||||
"""
|
||||
|
||||
|
@ -61,6 +64,30 @@ class StateGroupWorkerStore(SQLBaseStore):
|
|||
"*stateGroupCache*", 500000 * get_cache_factor_for("stateGroupCache")
|
||||
)
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def get_room_version(self, room_id):
|
||||
"""Get the room_version of a given room
|
||||
|
||||
Args:
|
||||
room_id (str)
|
||||
|
||||
Returns:
|
||||
Deferred[str]
|
||||
|
||||
Raises:
|
||||
NotFoundError if the room is unknown
|
||||
"""
|
||||
# for now we do this by looking at the create event. We may want to cache this
|
||||
# more intelligently in future.
|
||||
state_ids = yield self.get_current_state_ids(room_id)
|
||||
create_id = state_ids.get((EventTypes.Create, ""))
|
||||
|
||||
if not create_id:
|
||||
raise NotFoundError("Unknown room")
|
||||
|
||||
create_event = yield self.get_event(create_id)
|
||||
defer.returnValue(create_event.content.get("room_version", "1"))
|
||||
|
||||
@cached(max_entries=100000, iterable=True)
|
||||
def get_current_state_ids(self, room_id):
|
||||
"""Get the current state event ids for a room based on the
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue