mjolnir/src/utils.ts

257 lines
11 KiB
TypeScript
Raw Normal View History

2019-09-27 20:26:57 +00:00
/*
Copyright 2019-2021 The Matrix.org Foundation C.I.C.
2019-09-27 20:26:57 +00:00
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
import {
extractRequestError,
LogLevel,
LogService,
MatrixClient,
MatrixGlob,
MessageType,
Permalinks,
TextualMessageEventContent,
UserID
} from "matrix-bot-sdk";
import { logMessage } from "./LogProxy";
import config from "./config";
import * as htmlEscape from "escape-html";
import { ClientRequest, IncomingMessage } from "http";
2019-10-09 14:53:37 +00:00
2019-09-27 20:26:57 +00:00
export function setToArray<T>(set: Set<T>): T[] {
const arr: T[] = [];
for (const v of set) {
arr.push(v);
}
return arr;
}
2019-10-09 14:53:37 +00:00
2019-12-10 02:43:41 +00:00
export function isTrueJoinEvent(event: any): boolean {
const membership = event['content']['membership'] || 'join';
let prevMembership = "leave";
if (event['unsigned'] && event['unsigned']['prev_content']) {
prevMembership = event['unsigned']['prev_content']['membership'] || 'leave';
}
// We look at the previous membership to filter out profile changes
return membership === 'join' && prevMembership !== "join";
}
2020-04-14 22:44:31 +00:00
export async function redactUserMessagesIn(client: MatrixClient, userIdOrGlob: string, targetRoomIds: string[], limit = 1000) {
for (const targetRoomId of targetRoomIds) {
await logMessage(LogLevel.DEBUG, "utils#redactUserMessagesIn", `Fetching sent messages for ${userIdOrGlob} in ${targetRoomId} to redact...`, targetRoomId);
await getMessagesByUserIn(client, userIdOrGlob, targetRoomId, limit, async (eventsToRedact) => {
for (const victimEvent of eventsToRedact) {
await logMessage(LogLevel.DEBUG, "utils#redactUserMessagesIn", `Redacting ${victimEvent['event_id']} in ${targetRoomId}`, targetRoomId);
if (!config.noop) {
await client.redactEvent(targetRoomId, victimEvent['event_id']);
} else {
await logMessage(LogLevel.WARN, "utils#redactUserMessagesIn", `Tried to redact ${victimEvent['event_id']} in ${targetRoomId} but Mjolnir is running in no-op mode`, targetRoomId);
}
}
});
}
}
2019-12-10 02:43:41 +00:00
/**
* Gets all the events sent by a user (or users if using wildcards) in a given room ID, since
* the time they joined.
* @param {MatrixClient} client The client to use.
* @param {string} sender The sender. A matrix user id or a wildcard to match multiple senders e.g. *.example.com.
* Can also be used to generically search the sender field e.g. *bob* for all events from senders with "bob" in them.
* See `MatrixGlob` in matrix-bot-sdk.
2019-12-10 02:43:41 +00:00
* @param {string} roomId The room ID to search in.
* @param {number} limit The maximum number of messages to search. Defaults to 1000. This will be a greater or equal
* number of events that are provided to the callback if a wildcard is used, as not all events paginated
* will match the glob. The reason the limit is calculated this way is so that a caller cannot accidentally
* traverse the entire room history.
* @param {function} cb Callback function to handle the events as they are received.
* The callback will only be called if there are any relevant events.
* @returns {Promise<void>} Resolves when either: the limit has been reached, no relevant events could be found or there is no more timeline to paginate.
2019-12-10 02:43:41 +00:00
*/
export async function getMessagesByUserIn(client: MatrixClient, sender: string, roomId: string, limit: number, cb: (events: any[]) => void): Promise<void> {
const isGlob = sender.includes("*");
const roomEventFilter = {
rooms: [roomId],
... isGlob ? {} : {senders: [sender]}
2019-10-09 14:53:37 +00:00
};
2019-12-10 02:43:41 +00:00
const matcher = new MatrixGlob(sender);
function testUser(userId: string): boolean {
if (isGlob) {
return matcher.test(userId);
} else {
return userId === sender;
}
}
/**
* Note: `rooms/initialSync` is deprecated. However, there is no replacement for this API for the time being.
* While previous versions of this function used `/sync`, experience shows that it can grow extremely
* slow (4-5 minutes long) when we need to sync many large rooms, which leads to timeouts and
* breakage in Mjolnir, see https://github.com/matrix-org/synapse/issues/10842.
*/
function roomInitialSync() {
return client.doRequest("GET", `/_matrix/client/r0/rooms/${encodeURIComponent(roomId)}/initialSync`);
2019-10-09 14:53:37 +00:00
}
function backfill(from: string) {
const qs = {
filter: JSON.stringify(roomEventFilter),
2019-10-09 14:53:37 +00:00
from: from,
dir: "b",
};
LogService.info("utils", "Backfilling with token: " + from);
2019-10-09 14:53:37 +00:00
return client.doRequest("GET", `/_matrix/client/r0/rooms/${encodeURIComponent(roomId)}/messages`, qs);
}
// Do an initial sync first to get the batch token
const response = await roomInitialSync();
2019-10-09 14:53:37 +00:00
2019-12-10 02:43:41 +00:00
let processed = 0;
/**
* Filter events from the timeline to events that are from a matching sender and under the limit that can be processed by the callback.
* @param events Events from the room timeline.
* @returns Events that can safely be processed by the callback.
*/
function filterEvents(events: any[]) {
2021-07-22 06:38:44 +00:00
const messages: any[] = [];
for (const event of events) {
if (processed >= limit) return messages; // we have provided enough events.
2019-12-10 02:43:41 +00:00
processed++;
if (testUser(event['sender'])) messages.push(event);
2019-10-09 14:53:37 +00:00
}
return messages;
}
2019-10-09 14:53:37 +00:00
// The recommended APIs for fetching events from a room is to use both rooms/initialSync then /messages.
// Unfortunately, this results in code that is rather hard to read, as these two APIs employ very different data structures.
// We prefer discarding the results from rooms/initialSync and reading only from /messages,
// even if it's a little slower, for the sake of code maintenance.
const timeline = response['messages']
if (timeline) {
// The end of the PaginationChunk has the most recent events from rooms/initialSync.
// This token is required be present in the PagintionChunk from rooms/initialSync.
let token = timeline['end']!;
// We check that we have the token because rooms/messages is not required to provide one
// and will not provide one when there is no more history to paginate.
while (token && processed < limit) {
const bfMessages = await backfill(token);
2019-12-10 02:43:41 +00:00
let lastToken = token;
token = bfMessages['end'];
if (lastToken === token) {
LogService.debug("utils", "Backfill returned same end token - returning early.");
return;
2019-10-09 14:53:37 +00:00
}
const events = filterEvents(bfMessages['chunk'] || []);
// If we are using a glob, there may be no relevant events in this chunk.
if (events.length > 0) {
await cb(events);
}
2019-10-09 14:53:37 +00:00
}
} else {
throw new Error(`Internal Error: rooms/initialSync did not return a pagination chunk for ${roomId}, this is not normal and if it is we need to stop using it. See roomInitialSync() for why we are using it.`);
}
2019-10-09 14:53:37 +00:00
}
export async function replaceRoomIdsWithPills(client: MatrixClient, text: string, roomIds: string[] | string, msgtype: MessageType = "m.text"): Promise<TextualMessageEventContent> {
if (!Array.isArray(roomIds)) roomIds = [roomIds];
const content: TextualMessageEventContent = {
body: text,
formatted_body: htmlEscape(text),
msgtype: msgtype,
format: "org.matrix.custom.html",
};
const escapeRegex = (v: string): string => {
return v.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
};
const viaServers = [(new UserID(await client.getUserId())).domain];
for (const roomId of roomIds) {
let alias = roomId;
try {
alias = (await client.getPublishedAlias(roomId)) || roomId;
} catch (e) {
// This is a recursive call, so tell the function not to try and call us
await logMessage(LogLevel.WARN, "utils", `Failed to resolve room alias for ${roomId} - see console for details`, null, true);
LogService.warn("utils", extractRequestError(e));
}
const regexRoomId = new RegExp(escapeRegex(roomId), "g");
content.body = content.body.replace(regexRoomId, alias);
2021-07-22 06:38:44 +00:00
if (content.formatted_body) {
content.formatted_body = content.formatted_body.replace(regexRoomId, `<a href="${Permalinks.forRoom(alias, viaServers)}">${alias}</a>`);
}
}
return content;
}
export function makeClientWithSanerExceptions(client: MatrixClient): MatrixClient {
let result = new Proxy(client, {
get: function (obj, key) {
let value = obj[key];
if (!(typeof value == "function")) {
return value;
}
return function (...args) {
let result = value.apply(client, args);
if (!(result instanceof Promise)) {
// We're only interested in watching async code.
return result;
}
return result.catch(reason => {
if (!(reason instanceof IncomingMessage)) {
// In most cases, we're happy with the result.
throw reason;
}
// However, MatrixClient has a tendency of throwing
// instances of `IncomingMessage` instead of instances
// of `Error`. The former take ~800 lines of log and
// provide no stack trace, which makes them typically
// useless.
let method: string | null = null;
let path: string = '';
let body: string | null = null;
if (reason.method) {
method = reason.method;
}
if (reason.url) {
path = reason.url;
}
if ("req" in reason && (reason as any).req instanceof ClientRequest) {
if (!method) {
method = (reason as any).req.method;
}
if (!path) {
path = (reason as any).req.path;
}
}
if ("body" in reason) {
body = JSON.stringify((reason as any).body);
}
let error = new Error(`Error during MatrixClient request ${method} ${path}: ${reason.statusCode} ${reason.statusMessage} -- ${body}`);
//(error as any).message = reason;
throw error;
});
}
}
});
return result;
}