2019-07-01 12:55:26 -04:00
|
|
|
#
|
2023-11-21 15:29:58 -05:00
|
|
|
# This file is licensed under the Affero General Public License (AGPL) version 3.
|
|
|
|
#
|
|
|
|
# Copyright (C) 2023 New Vector, Ltd
|
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU Affero General Public License as
|
|
|
|
# published by the Free Software Foundation, either version 3 of the
|
|
|
|
# License, or (at your option) any later version.
|
|
|
|
#
|
|
|
|
# See the GNU Affero General Public License for more details:
|
|
|
|
# <https://www.gnu.org/licenses/agpl-3.0.html>.
|
|
|
|
#
|
|
|
|
# Originally licensed under the Apache License, Version 2.0:
|
|
|
|
# <http://www.apache.org/licenses/LICENSE-2.0>.
|
|
|
|
#
|
|
|
|
# [This file includes modifications made by New Vector Limited]
|
2019-07-01 12:55:26 -04:00
|
|
|
#
|
|
|
|
#
|
2019-07-15 08:43:25 -04:00
|
|
|
import argparse
|
2020-09-08 07:33:48 -04:00
|
|
|
import json
|
2019-07-01 12:55:26 -04:00
|
|
|
import logging
|
2019-07-15 09:15:22 -04:00
|
|
|
import os
|
2019-07-01 12:55:26 -04:00
|
|
|
import sys
|
2019-07-15 09:15:22 -04:00
|
|
|
import tempfile
|
2023-09-18 09:55:04 -04:00
|
|
|
from typing import List, Mapping, Optional, Sequence
|
2019-07-15 09:15:22 -04:00
|
|
|
|
2019-07-01 12:55:26 -04:00
|
|
|
from twisted.internet import defer, task
|
|
|
|
|
|
|
|
import synapse
|
|
|
|
from synapse.app import _base
|
|
|
|
from synapse.config._base import ConfigError
|
|
|
|
from synapse.config.homeserver import HomeServerConfig
|
|
|
|
from synapse.config.logger import setup_logging
|
2021-11-10 15:06:54 -05:00
|
|
|
from synapse.events import EventBase
|
2019-07-15 09:15:22 -04:00
|
|
|
from synapse.handlers.admin import ExfiltrationWriter
|
2019-07-01 12:55:26 -04:00
|
|
|
from synapse.server import HomeServer
|
2022-07-11 09:14:09 -04:00
|
|
|
from synapse.storage.database import DatabasePool, LoggingDatabaseConnection
|
2022-07-21 13:56:45 -04:00
|
|
|
from synapse.storage.databases.main.account_data import AccountDataWorkerStore
|
|
|
|
from synapse.storage.databases.main.appservice import (
|
|
|
|
ApplicationServiceTransactionWorkerStore,
|
|
|
|
ApplicationServiceWorkerStore,
|
|
|
|
)
|
2023-02-01 10:45:19 -05:00
|
|
|
from synapse.storage.databases.main.client_ips import ClientIpWorkerStore
|
2022-07-21 13:56:45 -04:00
|
|
|
from synapse.storage.databases.main.deviceinbox import DeviceInboxWorkerStore
|
2022-11-11 05:51:49 -05:00
|
|
|
from synapse.storage.databases.main.devices import DeviceWorkerStore
|
|
|
|
from synapse.storage.databases.main.event_federation import EventFederationWorkerStore
|
|
|
|
from synapse.storage.databases.main.event_push_actions import (
|
|
|
|
EventPushActionsWorkerStore,
|
|
|
|
)
|
|
|
|
from synapse.storage.databases.main.events_worker import EventsWorkerStore
|
|
|
|
from synapse.storage.databases.main.filtering import FilteringWorkerStore
|
2023-02-23 13:14:17 -05:00
|
|
|
from synapse.storage.databases.main.media_repository import MediaRepositoryStore
|
2023-02-01 10:45:19 -05:00
|
|
|
from synapse.storage.databases.main.profile import ProfileWorkerStore
|
2022-11-11 05:51:49 -05:00
|
|
|
from synapse.storage.databases.main.push_rule import PushRulesWorkerStore
|
2022-07-21 13:56:45 -04:00
|
|
|
from synapse.storage.databases.main.receipts import ReceiptsWorkerStore
|
|
|
|
from synapse.storage.databases.main.registration import RegistrationWorkerStore
|
2022-11-11 05:51:49 -05:00
|
|
|
from synapse.storage.databases.main.relations import RelationsWorkerStore
|
2021-10-18 11:14:12 -04:00
|
|
|
from synapse.storage.databases.main.room import RoomWorkerStore
|
2022-11-11 05:51:49 -05:00
|
|
|
from synapse.storage.databases.main.roommember import RoomMemberWorkerStore
|
|
|
|
from synapse.storage.databases.main.signatures import SignatureWorkerStore
|
|
|
|
from synapse.storage.databases.main.state import StateGroupWorkerStore
|
|
|
|
from synapse.storage.databases.main.stream import StreamWorkerStore
|
2022-07-21 13:56:45 -04:00
|
|
|
from synapse.storage.databases.main.tags import TagsWorkerStore
|
2022-11-11 05:51:49 -05:00
|
|
|
from synapse.storage.databases.main.user_erasure_store import UserErasureWorkerStore
|
2023-09-18 09:55:04 -04:00
|
|
|
from synapse.types import JsonMapping, StateMap
|
2022-06-07 10:24:11 -04:00
|
|
|
from synapse.util import SYNAPSE_VERSION
|
2019-07-01 12:55:26 -04:00
|
|
|
from synapse.util.logcontext import LoggingContext
|
|
|
|
|
|
|
|
logger = logging.getLogger("synapse.app.admin_cmd")
|
|
|
|
|
|
|
|
|
2023-05-16 15:56:38 -04:00
|
|
|
class AdminCmdStore(
|
2022-11-11 05:51:49 -05:00
|
|
|
FilteringWorkerStore,
|
2023-02-01 10:45:19 -05:00
|
|
|
ClientIpWorkerStore,
|
2022-11-11 05:51:49 -05:00
|
|
|
DeviceWorkerStore,
|
2022-07-21 13:56:45 -04:00
|
|
|
TagsWorkerStore,
|
|
|
|
DeviceInboxWorkerStore,
|
|
|
|
AccountDataWorkerStore,
|
2022-11-11 05:51:49 -05:00
|
|
|
PushRulesWorkerStore,
|
2022-07-21 13:56:45 -04:00
|
|
|
ApplicationServiceTransactionWorkerStore,
|
|
|
|
ApplicationServiceWorkerStore,
|
2022-11-11 05:51:49 -05:00
|
|
|
RoomMemberWorkerStore,
|
|
|
|
RelationsWorkerStore,
|
|
|
|
EventFederationWorkerStore,
|
|
|
|
EventPushActionsWorkerStore,
|
|
|
|
StateGroupWorkerStore,
|
|
|
|
SignatureWorkerStore,
|
|
|
|
UserErasureWorkerStore,
|
2022-07-21 13:56:45 -04:00
|
|
|
ReceiptsWorkerStore,
|
2022-11-11 05:51:49 -05:00
|
|
|
StreamWorkerStore,
|
|
|
|
EventsWorkerStore,
|
|
|
|
RegistrationWorkerStore,
|
2021-10-18 11:14:12 -04:00
|
|
|
RoomWorkerStore,
|
2023-02-01 10:45:19 -05:00
|
|
|
ProfileWorkerStore,
|
2023-02-23 13:14:17 -05:00
|
|
|
MediaRepositoryStore,
|
2019-07-01 12:55:26 -04:00
|
|
|
):
|
2022-07-11 09:14:09 -04:00
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
database: DatabasePool,
|
|
|
|
db_conn: LoggingDatabaseConnection,
|
|
|
|
hs: "HomeServer",
|
|
|
|
):
|
|
|
|
super().__init__(database, db_conn, hs)
|
|
|
|
|
|
|
|
# Annoyingly `filter_events_for_client` assumes that this exists. We
|
|
|
|
# should refactor it to take a `Clock` directly.
|
|
|
|
self.clock = hs.get_clock()
|
2019-07-01 12:55:26 -04:00
|
|
|
|
|
|
|
|
|
|
|
class AdminCmdServer(HomeServer):
|
2023-05-16 15:56:38 -04:00
|
|
|
DATASTORE_CLASS = AdminCmdStore # type: ignore
|
2019-07-01 12:55:26 -04:00
|
|
|
|
|
|
|
|
2021-11-10 15:06:54 -05:00
|
|
|
async def export_data_command(hs: HomeServer, args: argparse.Namespace) -> None:
|
|
|
|
"""Export data for a user."""
|
2019-07-01 12:55:26 -04:00
|
|
|
|
2019-07-15 08:49:18 -04:00
|
|
|
user_id = args.user_id
|
|
|
|
directory = args.output_directory
|
|
|
|
|
2020-10-09 07:24:34 -04:00
|
|
|
res = await hs.get_admin_handler().export_user_data(
|
2020-09-02 07:44:50 -04:00
|
|
|
user_id, FileExfiltrationWriter(user_id, directory=directory)
|
2019-07-01 12:55:26 -04:00
|
|
|
)
|
|
|
|
print(res)
|
|
|
|
|
|
|
|
|
2019-07-15 09:15:22 -04:00
|
|
|
class FileExfiltrationWriter(ExfiltrationWriter):
|
|
|
|
"""An ExfiltrationWriter that writes the users data to a directory.
|
|
|
|
Returns the directory location on completion.
|
|
|
|
|
|
|
|
Note: This writes to disk on the main reactor thread.
|
|
|
|
|
|
|
|
Args:
|
2021-11-10 15:06:54 -05:00
|
|
|
user_id: The user whose data is being exfiltrated.
|
|
|
|
directory: The directory to write the data to, if None then will write
|
|
|
|
to a temporary directory.
|
2019-07-15 09:15:22 -04:00
|
|
|
"""
|
|
|
|
|
2021-11-10 15:06:54 -05:00
|
|
|
def __init__(self, user_id: str, directory: Optional[str] = None):
|
2019-07-15 09:15:22 -04:00
|
|
|
self.user_id = user_id
|
|
|
|
|
|
|
|
if directory:
|
|
|
|
self.base_directory = directory
|
|
|
|
else:
|
|
|
|
self.base_directory = tempfile.mkdtemp(
|
|
|
|
prefix="synapse-exfiltrate__%s__" % (user_id,)
|
|
|
|
)
|
|
|
|
|
|
|
|
os.makedirs(self.base_directory, exist_ok=True)
|
|
|
|
if list(os.listdir(self.base_directory)):
|
|
|
|
raise Exception("Directory must be empty")
|
|
|
|
|
2021-11-10 15:06:54 -05:00
|
|
|
def write_events(self, room_id: str, events: List[EventBase]) -> None:
|
2019-07-15 09:15:22 -04:00
|
|
|
room_directory = os.path.join(self.base_directory, "rooms", room_id)
|
|
|
|
os.makedirs(room_directory, exist_ok=True)
|
|
|
|
events_file = os.path.join(room_directory, "events")
|
|
|
|
|
|
|
|
with open(events_file, "a") as f:
|
|
|
|
for event in events:
|
2023-02-22 14:29:39 -05:00
|
|
|
json.dump(event.get_pdu_json(), fp=f)
|
2019-07-15 09:15:22 -04:00
|
|
|
|
2021-11-10 15:06:54 -05:00
|
|
|
def write_state(
|
|
|
|
self, room_id: str, event_id: str, state: StateMap[EventBase]
|
|
|
|
) -> None:
|
2019-07-15 09:15:22 -04:00
|
|
|
room_directory = os.path.join(self.base_directory, "rooms", room_id)
|
|
|
|
state_directory = os.path.join(room_directory, "state")
|
|
|
|
os.makedirs(state_directory, exist_ok=True)
|
|
|
|
|
|
|
|
event_file = os.path.join(state_directory, event_id)
|
|
|
|
|
|
|
|
with open(event_file, "a") as f:
|
|
|
|
for event in state.values():
|
2023-02-22 14:29:39 -05:00
|
|
|
json.dump(event.get_pdu_json(), fp=f)
|
2019-07-15 09:15:22 -04:00
|
|
|
|
2021-11-10 15:06:54 -05:00
|
|
|
def write_invite(
|
|
|
|
self, room_id: str, event: EventBase, state: StateMap[EventBase]
|
|
|
|
) -> None:
|
2019-07-15 09:15:22 -04:00
|
|
|
self.write_events(room_id, [event])
|
|
|
|
|
|
|
|
# We write the invite state somewhere else as they aren't full events
|
|
|
|
# and are only a subset of the state at the event.
|
|
|
|
room_directory = os.path.join(self.base_directory, "rooms", room_id)
|
|
|
|
os.makedirs(room_directory, exist_ok=True)
|
|
|
|
|
|
|
|
invite_state = os.path.join(room_directory, "invite_state")
|
|
|
|
|
|
|
|
with open(invite_state, "a") as f:
|
|
|
|
for event in state.values():
|
2023-02-22 14:29:39 -05:00
|
|
|
json.dump(event, fp=f)
|
2019-07-15 09:15:22 -04:00
|
|
|
|
2021-11-10 15:06:54 -05:00
|
|
|
def write_knock(
|
|
|
|
self, room_id: str, event: EventBase, state: StateMap[EventBase]
|
|
|
|
) -> None:
|
2021-10-28 13:54:38 -04:00
|
|
|
self.write_events(room_id, [event])
|
|
|
|
|
|
|
|
# We write the knock state somewhere else as they aren't full events
|
|
|
|
# and are only a subset of the state at the event.
|
|
|
|
room_directory = os.path.join(self.base_directory, "rooms", room_id)
|
|
|
|
os.makedirs(room_directory, exist_ok=True)
|
|
|
|
|
|
|
|
knock_state = os.path.join(room_directory, "knock_state")
|
|
|
|
|
|
|
|
with open(knock_state, "a") as f:
|
|
|
|
for event in state.values():
|
2023-02-22 14:29:39 -05:00
|
|
|
json.dump(event, fp=f)
|
2021-10-28 13:54:38 -04:00
|
|
|
|
2023-09-18 09:55:04 -04:00
|
|
|
def write_profile(self, profile: JsonMapping) -> None:
|
2023-02-01 10:45:19 -05:00
|
|
|
user_directory = os.path.join(self.base_directory, "user_data")
|
|
|
|
os.makedirs(user_directory, exist_ok=True)
|
|
|
|
profile_file = os.path.join(user_directory, "profile")
|
|
|
|
|
|
|
|
with open(profile_file, "a") as f:
|
2023-02-22 14:29:39 -05:00
|
|
|
json.dump(profile, fp=f)
|
2023-02-01 10:45:19 -05:00
|
|
|
|
2023-09-18 09:55:04 -04:00
|
|
|
def write_devices(self, devices: Sequence[JsonMapping]) -> None:
|
2023-02-01 10:45:19 -05:00
|
|
|
user_directory = os.path.join(self.base_directory, "user_data")
|
|
|
|
os.makedirs(user_directory, exist_ok=True)
|
|
|
|
device_file = os.path.join(user_directory, "devices")
|
|
|
|
|
|
|
|
for device in devices:
|
|
|
|
with open(device_file, "a") as f:
|
2023-02-22 14:29:39 -05:00
|
|
|
json.dump(device, fp=f)
|
2023-02-01 10:45:19 -05:00
|
|
|
|
2023-09-18 09:55:04 -04:00
|
|
|
def write_connections(self, connections: Sequence[JsonMapping]) -> None:
|
2023-02-01 10:45:19 -05:00
|
|
|
user_directory = os.path.join(self.base_directory, "user_data")
|
|
|
|
os.makedirs(user_directory, exist_ok=True)
|
|
|
|
connection_file = os.path.join(user_directory, "connections")
|
|
|
|
|
|
|
|
for connection in connections:
|
|
|
|
with open(connection_file, "a") as f:
|
2023-02-22 14:29:39 -05:00
|
|
|
json.dump(connection, fp=f)
|
2023-02-01 10:45:19 -05:00
|
|
|
|
2023-02-17 08:54:55 -05:00
|
|
|
def write_account_data(
|
2023-09-18 09:55:04 -04:00
|
|
|
self, file_name: str, account_data: Mapping[str, JsonMapping]
|
2023-02-17 08:54:55 -05:00
|
|
|
) -> None:
|
|
|
|
account_data_directory = os.path.join(
|
|
|
|
self.base_directory, "user_data", "account_data"
|
|
|
|
)
|
|
|
|
os.makedirs(account_data_directory, exist_ok=True)
|
|
|
|
|
|
|
|
account_data_file = os.path.join(account_data_directory, file_name)
|
|
|
|
|
|
|
|
with open(account_data_file, "a") as f:
|
2023-02-22 14:29:39 -05:00
|
|
|
json.dump(account_data, fp=f)
|
2023-02-17 08:54:55 -05:00
|
|
|
|
2023-09-18 09:55:04 -04:00
|
|
|
def write_media_id(self, media_id: str, media_metadata: JsonMapping) -> None:
|
2023-02-23 13:14:17 -05:00
|
|
|
file_directory = os.path.join(self.base_directory, "media_ids")
|
|
|
|
os.makedirs(file_directory, exist_ok=True)
|
|
|
|
media_id_file = os.path.join(file_directory, media_id)
|
|
|
|
|
|
|
|
with open(media_id_file, "w") as f:
|
|
|
|
json.dump(media_metadata, fp=f)
|
|
|
|
|
2021-11-10 15:06:54 -05:00
|
|
|
def finished(self) -> str:
|
2019-07-15 09:15:22 -04:00
|
|
|
return self.base_directory
|
|
|
|
|
|
|
|
|
2021-11-10 15:06:54 -05:00
|
|
|
def start(config_options: List[str]) -> None:
|
2019-07-15 08:43:25 -04:00
|
|
|
parser = argparse.ArgumentParser(description="Synapse Admin Command")
|
|
|
|
HomeServerConfig.add_arguments_to_parser(parser)
|
2019-07-01 12:55:26 -04:00
|
|
|
|
|
|
|
subparser = parser.add_subparsers(
|
|
|
|
title="Admin Commands",
|
|
|
|
required=True,
|
|
|
|
dest="command",
|
|
|
|
metavar="<admin_command>",
|
|
|
|
help="The admin command to perform.",
|
|
|
|
)
|
|
|
|
export_data_parser = subparser.add_parser(
|
|
|
|
"export-data", help="Export all data for a user"
|
|
|
|
)
|
|
|
|
export_data_parser.add_argument("user_id", help="User to extra data from")
|
|
|
|
export_data_parser.add_argument(
|
|
|
|
"--output-directory",
|
|
|
|
action="store",
|
|
|
|
metavar="DIRECTORY",
|
|
|
|
required=False,
|
2019-07-16 04:52:56 -04:00
|
|
|
help="The directory to store the exported data in. Must be empty. Defaults"
|
2019-07-01 12:55:26 -04:00
|
|
|
" to creating a temp directory.",
|
|
|
|
)
|
2019-07-15 08:49:18 -04:00
|
|
|
export_data_parser.set_defaults(func=export_data_command)
|
2019-07-01 12:55:26 -04:00
|
|
|
|
|
|
|
try:
|
|
|
|
config, args = HomeServerConfig.load_config_with_parser(parser, config_options)
|
|
|
|
except ConfigError as e:
|
|
|
|
sys.stderr.write("\n" + str(e) + "\n")
|
|
|
|
sys.exit(1)
|
|
|
|
|
2021-09-13 13:07:12 -04:00
|
|
|
if config.worker.worker_app is not None:
|
|
|
|
assert config.worker.worker_app == "synapse.app.admin_cmd"
|
2019-07-01 12:55:26 -04:00
|
|
|
|
|
|
|
# Update the config with some basic overrides so that don't have to specify
|
|
|
|
# a full worker config.
|
2021-09-13 13:07:12 -04:00
|
|
|
config.worker.worker_app = "synapse.app.admin_cmd"
|
2019-07-01 12:55:26 -04:00
|
|
|
|
2021-10-18 11:14:12 -04:00
|
|
|
if not config.worker.worker_daemonize and not config.worker.worker_log_config:
|
2019-07-01 12:55:26 -04:00
|
|
|
# Since we're meant to be run as a "command" let's not redirect stdio
|
|
|
|
# unless we've actually set log config.
|
2021-09-23 12:03:01 -04:00
|
|
|
config.logging.no_redirect_stdio = True
|
2019-07-01 12:55:26 -04:00
|
|
|
|
|
|
|
# Explicitly disable background processes
|
2022-05-10 06:08:45 -04:00
|
|
|
config.worker.should_update_user_directory = False
|
2021-09-13 13:07:12 -04:00
|
|
|
config.worker.run_background_tasks = False
|
2021-10-06 10:47:41 -04:00
|
|
|
config.worker.start_pushers = False
|
2021-10-18 11:14:12 -04:00
|
|
|
config.worker.pusher_shard_config.instances = []
|
2021-10-06 10:47:41 -04:00
|
|
|
config.worker.send_federation = False
|
2021-10-18 11:14:12 -04:00
|
|
|
config.worker.federation_shard_config.instances = []
|
2019-07-01 12:55:26 -04:00
|
|
|
|
2021-09-29 06:44:15 -04:00
|
|
|
synapse.events.USE_FROZEN_DICTS = config.server.use_frozen_dicts
|
2019-07-01 12:55:26 -04:00
|
|
|
|
|
|
|
ss = AdminCmdServer(
|
2021-09-13 13:07:12 -04:00
|
|
|
config.server.server_name,
|
2019-07-01 12:55:26 -04:00
|
|
|
config=config,
|
2022-06-07 10:24:11 -04:00
|
|
|
version_string=f"Synapse/{SYNAPSE_VERSION}",
|
2019-07-01 12:55:26 -04:00
|
|
|
)
|
|
|
|
|
2019-08-28 07:18:53 -04:00
|
|
|
setup_logging(ss, config, use_worker_options=True)
|
|
|
|
|
2019-07-01 12:55:26 -04:00
|
|
|
ss.setup()
|
|
|
|
|
|
|
|
# We use task.react as the basic run command as it correctly handles tearing
|
|
|
|
# down the reactor when the deferreds resolve and setting the return value.
|
|
|
|
# We also make sure that `_base.start` gets run before we actually run the
|
|
|
|
# command.
|
|
|
|
|
2021-11-10 15:06:54 -05:00
|
|
|
async def run() -> None:
|
2019-07-01 12:55:26 -04:00
|
|
|
with LoggingContext("command"):
|
2021-10-18 11:14:12 -04:00
|
|
|
await _base.start(ss)
|
2020-09-02 07:44:50 -04:00
|
|
|
await args.func(ss, args)
|
2019-07-01 12:55:26 -04:00
|
|
|
|
|
|
|
_base.start_worker_reactor(
|
2020-09-02 07:44:50 -04:00
|
|
|
"synapse-admin-cmd",
|
|
|
|
config,
|
|
|
|
run_command=lambda: task.react(lambda _reactor: defer.ensureDeferred(run())),
|
2019-07-01 12:55:26 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
with LoggingContext("main"):
|
|
|
|
start(sys.argv[1:])
|