From fefe29e98bd9d2dba15367c7587dec3d88727666 Mon Sep 17 00:00:00 2001 From: gnuxie Date: Thu, 14 Oct 2021 10:35:53 +0100 Subject: [PATCH] Improve readability of getMessagesByUser --- src/utils.ts | 63 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 22 deletions(-) diff --git a/src/utils.ts b/src/utils.ts index 9713332..32601f9 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -69,13 +69,19 @@ export async function redactUserMessagesIn(client: MatrixClient, userIdOrGlob: s * Gets all the events sent by a user (or users if using wildcards) in a given room ID, since * the time they joined. * @param {MatrixClient} client The client to use. - * @param {string} sender The sender. Can include wildcards to match multiple people. + * @param {string} sender The sender. A matrix user id or a wildcard to match multiple senders e.g. *.example.com. + * Can also be used to generically search the sender field e.g. *bob* for all events from senders with "bob" in them. + * See `MatrixGlob` in matrix-bot-sdk. * @param {string} roomId The room ID to search in. - * @param {number} limit The maximum number of messages to search. Defaults to 1000. + * @param {number} limit The maximum number of messages to search. Defaults to 1000. This will be a greater or equal + * number of events that are provided to the callback if a wildcard is used, as not all events paginated + * will match the glob. The reason the limit is calculated this way is so that a caller cannot accidentally + * traverse the entire room history. * @param {function} cb Callback function to handle the events as they are received. - * @returns {Promise} Resolves when complete. + * The callback will only be called if there are any relevant events. + * @returns {Promise} Resolves when either: the limit has been reached, no relevant events could be found or there is no more timeline to paginate. */ -export async function getMessagesByUserIn(client: MatrixClient, sender: string, roomId: string, limit: number, cb: (events: any[]) => void): Promise { +export async function getMessagesByUserIn(client: MatrixClient, sender: string, roomId: string, limit: number, cb: (events: any[]) => void): Promise { const isGlob = sender.includes("*"); const roomEventFilter = { rooms: [roomId], @@ -108,43 +114,56 @@ export async function getMessagesByUserIn(client: MatrixClient, sender: string, from: from, dir: "b", }; - LogService.info("utils", "Backfilling with token: " + token); + LogService.info("utils", "Backfilling with token: " + from); return client.doRequest("GET", `/_matrix/client/r0/rooms/${encodeURIComponent(roomId)}/messages`, qs); } // Do an initial sync first to get the batch token const response = await roomInitialSync(); - if (!response) return []; let processed = 0; - - const timeline = (response['messages'] || {}) - const syncedMessages = timeline['chunk'] || []; - // The start of the chunk has the oldest events. - let token = timeline['start']; - let bfMessages = {chunk: syncedMessages, end: token}; - do { + /** + * Filter events from the timeline to events that are from a matching sender and under the limit that can be processed by the callback. + * @param events Events from the room timeline. + * @returns Events that can safely be processed by the callback. + */ + function filterEvents(events: any[]) { const messages: any[] = []; - for (const event of (bfMessages['chunk'] || [])) { - if (processed >= limit) return; // we're done even if we don't want to be + for (const event of events) { + if (processed >= limit) return messages; // we have provided enough events. processed++; if (testUser(event['sender'])) messages.push(event); } + return messages; + } - if (token) { - bfMessages = await backfill(token); + // The recommended APIs for fetching events from a room is to use both rooms/initialSync then /messages. + // Unfortunately, this results in code that is rather hard to read, as these two APIs employ very different data structures. + // We prefer discarding the results from rooms/initialSync and reading only from /messages, + // even if it's a little slower, for the sake of code maintenance. + const timeline = response['messages'] + if (timeline) { + // The end of the PaginationChunk has the most recent events from rooms/initialSync. + // This token is required be present in the PagintionChunk from rooms/initialSync. + let token = timeline['end']!; + // We check that we have the token because rooms/messages is not required to provide one + // and will not provide one when there is no more history to paginate. + while (token && processed < limit) { + const bfMessages = await backfill(token); let lastToken = token; token = bfMessages['end']; if (lastToken === token) { - LogService.warn("utils", "Backfill returned same end token - returning"); - cb(messages); + LogService.debug("utils", "Backfill returned same end token - returning early."); return; } + const events = filterEvents(bfMessages['chunk'] || []); + // If we are using a glob, there may be no relevant events in this chunk. + if (events.length > 0) cb(events); } - - cb(messages); - } while (token); + } else { + throw new Error(`Internal Error: rooms/initialSync did not return a pagination chunk for ${roomId}, this is not normal and if it is we need to stop using it. See roomInitialSync() for why we are using it.`); + } } export async function replaceRoomIdsWithPills(client: MatrixClient, text: string, roomIds: string[] | string, msgtype: MessageType = "m.text"): Promise {