2016-01-06 23:26:29 -05:00
|
|
|
# Copyright 2014-2016 OpenMarket Ltd
|
2014-09-29 09:59:52 -04:00
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
2020-12-29 17:42:10 -05:00
|
|
|
import abc
|
2018-07-09 02:09:20 -04:00
|
|
|
import logging
|
2020-12-29 17:42:10 -05:00
|
|
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set
|
2018-07-09 02:09:20 -04:00
|
|
|
|
2019-07-01 12:55:11 -04:00
|
|
|
from synapse.api.constants import Membership
|
2020-12-29 17:42:10 -05:00
|
|
|
from synapse.events import EventBase
|
|
|
|
from synapse.types import JsonDict, RoomStreamToken, StateMap, UserID
|
2019-07-01 12:55:11 -04:00
|
|
|
from synapse.visibility import filter_events_for_client
|
|
|
|
|
2014-09-29 09:59:52 -04:00
|
|
|
from ._base import BaseHandler
|
|
|
|
|
2020-12-29 17:42:10 -05:00
|
|
|
if TYPE_CHECKING:
|
2021-03-23 07:12:48 -04:00
|
|
|
from synapse.server import HomeServer
|
2020-12-29 17:42:10 -05:00
|
|
|
|
2014-09-29 09:59:52 -04:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
class AdminHandler(BaseHandler):
|
2020-12-29 17:42:10 -05:00
|
|
|
def __init__(self, hs: "HomeServer"):
|
2020-09-18 09:56:44 -04:00
|
|
|
super().__init__(hs)
|
2014-09-29 09:59:52 -04:00
|
|
|
|
2019-10-23 12:25:54 -04:00
|
|
|
self.storage = hs.get_storage()
|
|
|
|
self.state_store = self.storage.state
|
|
|
|
|
2020-12-29 17:42:10 -05:00
|
|
|
async def get_whois(self, user: UserID) -> JsonDict:
|
2015-12-02 12:27:49 -05:00
|
|
|
connections = []
|
|
|
|
|
2019-12-19 10:07:28 -05:00
|
|
|
sessions = await self.store.get_user_ip_and_agents(user)
|
2015-12-02 12:27:49 -05:00
|
|
|
for session in sessions:
|
2019-06-20 05:32:02 -04:00
|
|
|
connections.append(
|
|
|
|
{
|
|
|
|
"ip": session["ip"],
|
|
|
|
"last_seen": session["last_seen"],
|
|
|
|
"user_agent": session["user_agent"],
|
|
|
|
}
|
|
|
|
)
|
2014-09-29 09:59:52 -04:00
|
|
|
|
|
|
|
ret = {
|
|
|
|
"user_id": user.to_string(),
|
2019-06-20 05:32:02 -04:00
|
|
|
"devices": {"": {"sessions": [{"connections": connections}]}},
|
2014-09-29 09:59:52 -04:00
|
|
|
}
|
|
|
|
|
2019-07-23 09:00:55 -04:00
|
|
|
return ret
|
2017-02-02 08:02:26 -05:00
|
|
|
|
2020-12-29 17:42:10 -05:00
|
|
|
async def get_user(self, user: UserID) -> Optional[JsonDict]:
|
2020-01-09 08:31:00 -05:00
|
|
|
"""Function to get user details"""
|
|
|
|
ret = await self.store.get_user_by_id(user.to_string())
|
|
|
|
if ret:
|
|
|
|
profile = await self.store.get_profileinfo(user.localpart)
|
2020-02-07 05:29:36 -05:00
|
|
|
threepids = await self.store.user_get_threepids(user.to_string())
|
2020-01-09 08:31:00 -05:00
|
|
|
ret["displayname"] = profile.display_name
|
|
|
|
ret["avatar_url"] = profile.avatar_url
|
2020-02-07 05:29:36 -05:00
|
|
|
ret["threepids"] = threepids
|
2020-01-09 08:31:00 -05:00
|
|
|
return ret
|
|
|
|
|
2020-12-29 17:42:10 -05:00
|
|
|
async def export_user_data(self, user_id: str, writer: "ExfiltrationWriter") -> Any:
|
2019-07-03 10:03:38 -04:00
|
|
|
"""Write all data we have on the user to the given writer.
|
2019-07-01 12:55:11 -04:00
|
|
|
|
|
|
|
Args:
|
2020-12-29 17:42:10 -05:00
|
|
|
user_id: The user ID to fetch data of.
|
|
|
|
writer: The writer to write to.
|
2019-07-01 12:55:11 -04:00
|
|
|
|
|
|
|
Returns:
|
2020-07-24 10:53:25 -04:00
|
|
|
Resolves when all data for a user has been written.
|
2019-07-04 06:07:09 -04:00
|
|
|
The returned value is that returned by `writer.finished()`.
|
2019-07-01 12:55:11 -04:00
|
|
|
"""
|
|
|
|
# Get all rooms the user is in or has been in
|
2020-01-15 09:59:33 -05:00
|
|
|
rooms = await self.store.get_rooms_for_local_user_where_membership_is(
|
2019-07-01 12:55:11 -04:00
|
|
|
user_id,
|
|
|
|
membership_list=(
|
|
|
|
Membership.JOIN,
|
|
|
|
Membership.LEAVE,
|
|
|
|
Membership.BAN,
|
|
|
|
Membership.INVITE,
|
|
|
|
),
|
|
|
|
)
|
|
|
|
|
|
|
|
# We only try and fetch events for rooms the user has been in. If
|
|
|
|
# they've been e.g. invited to a room without joining then we handle
|
2020-10-23 12:38:40 -04:00
|
|
|
# those separately.
|
2019-12-19 10:07:28 -05:00
|
|
|
rooms_user_has_been_in = await self.store.get_rooms_user_has_been_in(user_id)
|
2019-07-01 12:55:11 -04:00
|
|
|
|
|
|
|
for index, room in enumerate(rooms):
|
|
|
|
room_id = room.room_id
|
|
|
|
|
|
|
|
logger.info(
|
|
|
|
"[%s] Handling room %s, %d/%d", user_id, room_id, index + 1, len(rooms)
|
|
|
|
)
|
|
|
|
|
2019-12-19 10:07:28 -05:00
|
|
|
forgotten = await self.store.did_forget(user_id, room_id)
|
2019-07-01 12:55:11 -04:00
|
|
|
if forgotten:
|
2019-07-04 06:07:09 -04:00
|
|
|
logger.info("[%s] User forgot room %d, ignoring", user_id, room_id)
|
2019-07-01 12:55:11 -04:00
|
|
|
continue
|
|
|
|
|
|
|
|
if room_id not in rooms_user_has_been_in:
|
|
|
|
# If we haven't been in the rooms then the filtering code below
|
|
|
|
# won't return anything, so we need to handle these cases
|
|
|
|
# explicitly.
|
|
|
|
|
|
|
|
if room.membership == Membership.INVITE:
|
|
|
|
event_id = room.event_id
|
2019-12-19 10:07:28 -05:00
|
|
|
invite = await self.store.get_event(event_id, allow_none=True)
|
2019-07-01 12:55:11 -04:00
|
|
|
if invite:
|
|
|
|
invited_state = invite.unsigned["invite_room_state"]
|
|
|
|
writer.write_invite(room_id, invite, invited_state)
|
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
# We only want to bother fetching events up to the last time they
|
|
|
|
# were joined. We estimate that point by looking at the
|
|
|
|
# stream_ordering of the last membership if it wasn't a join.
|
|
|
|
if room.membership == Membership.JOIN:
|
2019-12-19 10:07:28 -05:00
|
|
|
stream_ordering = self.store.get_room_max_stream_ordering()
|
2019-07-01 12:55:11 -04:00
|
|
|
else:
|
|
|
|
stream_ordering = room.stream_ordering
|
|
|
|
|
2020-09-11 07:22:55 -04:00
|
|
|
from_key = RoomStreamToken(0, 0)
|
|
|
|
to_key = RoomStreamToken(None, stream_ordering)
|
2019-07-01 12:55:11 -04:00
|
|
|
|
2020-12-29 17:42:10 -05:00
|
|
|
# Events that we've processed in this room
|
|
|
|
written_events = set() # type: Set[str]
|
2019-07-01 12:55:11 -04:00
|
|
|
|
|
|
|
# We need to track gaps in the events stream so that we can then
|
|
|
|
# write out the state at those events. We do this by keeping track
|
|
|
|
# of events whose prev events we haven't seen.
|
|
|
|
|
|
|
|
# Map from event ID to prev events that haven't been processed,
|
|
|
|
# dict[str, set[str]].
|
|
|
|
event_to_unseen_prevs = {}
|
|
|
|
|
2019-07-04 06:07:09 -04:00
|
|
|
# The reverse mapping to above, i.e. map from unseen event to events
|
|
|
|
# that have the unseen event in their prev_events, i.e. the unseen
|
2020-12-29 17:42:10 -05:00
|
|
|
# events "children".
|
|
|
|
unseen_to_child_events = {} # type: Dict[str, Set[str]]
|
2019-07-01 12:55:11 -04:00
|
|
|
|
|
|
|
# We fetch events in the room the user could see by fetching *all*
|
|
|
|
# events that we have and then filtering, this isn't the most
|
2019-07-03 10:03:38 -04:00
|
|
|
# efficient method perhaps but it does guarantee we get everything.
|
2019-07-01 12:55:11 -04:00
|
|
|
while True:
|
2019-12-19 10:07:28 -05:00
|
|
|
events, _ = await self.store.paginate_room_events(
|
2019-07-01 12:55:11 -04:00
|
|
|
room_id, from_key, to_key, limit=100, direction="f"
|
|
|
|
)
|
|
|
|
if not events:
|
|
|
|
break
|
|
|
|
|
2020-09-29 16:48:33 -04:00
|
|
|
from_key = events[-1].internal_metadata.after
|
2019-07-01 12:55:11 -04:00
|
|
|
|
2019-12-19 10:07:28 -05:00
|
|
|
events = await filter_events_for_client(self.storage, user_id, events)
|
2019-07-01 12:55:11 -04:00
|
|
|
|
|
|
|
writer.write_events(room_id, events)
|
|
|
|
|
|
|
|
# Update the extremity tracking dicts
|
|
|
|
for event in events:
|
|
|
|
# Check if we have any prev events that haven't been
|
|
|
|
# processed yet, and add those to the appropriate dicts.
|
|
|
|
unseen_events = set(event.prev_event_ids()) - written_events
|
|
|
|
if unseen_events:
|
|
|
|
event_to_unseen_prevs[event.event_id] = unseen_events
|
|
|
|
for unseen in unseen_events:
|
2019-07-04 06:07:09 -04:00
|
|
|
unseen_to_child_events.setdefault(unseen, set()).add(
|
2019-07-01 12:55:11 -04:00
|
|
|
event.event_id
|
|
|
|
)
|
|
|
|
|
|
|
|
# Now check if this event is an unseen prev event, if so
|
|
|
|
# then we remove this event from the appropriate dicts.
|
2019-07-04 06:07:09 -04:00
|
|
|
for child_id in unseen_to_child_events.pop(event.event_id, []):
|
2019-07-05 09:07:56 -04:00
|
|
|
event_to_unseen_prevs[child_id].discard(event.event_id)
|
2019-07-01 12:55:11 -04:00
|
|
|
|
|
|
|
written_events.add(event.event_id)
|
|
|
|
|
|
|
|
logger.info(
|
|
|
|
"Written %d events in room %s", len(written_events), room_id
|
|
|
|
)
|
|
|
|
|
|
|
|
# Extremities are the events who have at least one unseen prev event.
|
|
|
|
extremities = (
|
|
|
|
event_id
|
|
|
|
for event_id, unseen_prevs in event_to_unseen_prevs.items()
|
|
|
|
if unseen_prevs
|
|
|
|
)
|
|
|
|
for event_id in extremities:
|
|
|
|
if not event_to_unseen_prevs[event_id]:
|
|
|
|
continue
|
2019-12-19 10:07:28 -05:00
|
|
|
state = await self.state_store.get_state_for_event(event_id)
|
2019-07-01 12:55:11 -04:00
|
|
|
writer.write_state(room_id, event_id, state)
|
|
|
|
|
2019-07-23 09:00:55 -04:00
|
|
|
return writer.finished()
|
2019-07-01 12:55:11 -04:00
|
|
|
|
|
|
|
|
2020-12-29 17:42:10 -05:00
|
|
|
class ExfiltrationWriter(metaclass=abc.ABCMeta):
|
2021-02-16 17:32:34 -05:00
|
|
|
"""Interface used to specify how to write exported data."""
|
2019-07-01 12:55:11 -04:00
|
|
|
|
2020-12-29 17:42:10 -05:00
|
|
|
@abc.abstractmethod
|
|
|
|
def write_events(self, room_id: str, events: List[EventBase]) -> None:
|
2021-02-16 17:32:34 -05:00
|
|
|
"""Write a batch of events for a room."""
|
2020-12-29 17:42:10 -05:00
|
|
|
raise NotImplementedError()
|
2019-07-01 12:55:11 -04:00
|
|
|
|
2020-12-29 17:42:10 -05:00
|
|
|
@abc.abstractmethod
|
|
|
|
def write_state(
|
|
|
|
self, room_id: str, event_id: str, state: StateMap[EventBase]
|
|
|
|
) -> None:
|
2019-07-01 12:55:11 -04:00
|
|
|
"""Write the state at the given event in the room.
|
|
|
|
|
|
|
|
This only gets called for backward extremities rather than for each
|
|
|
|
event.
|
|
|
|
"""
|
2020-12-29 17:42:10 -05:00
|
|
|
raise NotImplementedError()
|
2019-07-01 12:55:11 -04:00
|
|
|
|
2020-12-29 17:42:10 -05:00
|
|
|
@abc.abstractmethod
|
|
|
|
def write_invite(
|
|
|
|
self, room_id: str, event: EventBase, state: StateMap[dict]
|
|
|
|
) -> None:
|
2019-07-01 12:55:11 -04:00
|
|
|
"""Write an invite for the room, with associated invite state.
|
|
|
|
|
|
|
|
Args:
|
2020-12-29 17:42:10 -05:00
|
|
|
room_id: The room ID the invite is for.
|
|
|
|
event: The invite event.
|
|
|
|
state: A subset of the state at the invite, with a subset of the
|
|
|
|
event keys (type, state_key content and sender).
|
2019-07-01 12:55:11 -04:00
|
|
|
"""
|
2020-12-29 17:42:10 -05:00
|
|
|
raise NotImplementedError()
|
2019-07-01 12:55:11 -04:00
|
|
|
|
2020-12-29 17:42:10 -05:00
|
|
|
@abc.abstractmethod
|
|
|
|
def finished(self) -> Any:
|
2020-10-23 12:38:40 -04:00
|
|
|
"""Called when all data has successfully been exported and written.
|
2019-07-04 06:07:09 -04:00
|
|
|
|
|
|
|
This functions return value is passed to the caller of
|
|
|
|
`export_user_data`.
|
2019-07-01 12:55:11 -04:00
|
|
|
"""
|
2020-12-29 17:42:10 -05:00
|
|
|
raise NotImplementedError()
|