Merge pull request #138 from matrix-org/gnuxie/timeline-weirdness

Fix various issues with timeline pagination (getMessagesByUserIn)

- Ensures the callback that is provided to getMessagesByUserIn is only called when there are relevant events (Ie never called with an empty array)
- Changes getMessagesByUserIn to await on the callback it has been given. (This stops async callbacks failing outside of the context which they were provided if the caller awaits on getMessagesByUserIn).
- Changes getMessagesByUserIn to return a promise that resolves when either: limit has been reached, no relevant events could be found or there is no more timeline to paginate.
- Fixes an issue where a sync filter was being given to `rooms/messages` which led to Mjolnir paginating the entire timeline in situations where it was not necessary (e.g. exact sender was given)
This commit is contained in:
Gnuxie 2021-10-20 12:22:22 +01:00 committed by GitHub
commit e7195678d4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 336 additions and 71 deletions

View File

@ -12,7 +12,7 @@
"lint": "tslint --project ./tsconfig.json -t stylish", "lint": "tslint --project ./tsconfig.json -t stylish",
"start:dev": "yarn build && node lib/index.js", "start:dev": "yarn build && node lib/index.js",
"test": "ts-mocha --project ./tsconfig.json test/commands/**/*.ts", "test": "ts-mocha --project ./tsconfig.json test/commands/**/*.ts",
"test:integration": "NODE_ENV=harness ts-mocha --require test/integration/fixtures.ts --project ./tsconfig.json test/integration/**/*Test.ts", "test:integration": "NODE_ENV=harness ts-mocha --require test/integration/fixtures.ts --project ./tsconfig.json \"test/integration/**/*Test.ts\"",
"test:manual": "NODE_ENV=harness ts-node test/integration/manualLaunchScript.ts" "test:manual": "NODE_ENV=harness ts-node test/integration/manualLaunchScript.ts"
}, },
"devDependencies": { "devDependencies": {

View File

@ -69,49 +69,25 @@ export async function redactUserMessagesIn(client: MatrixClient, userIdOrGlob: s
* Gets all the events sent by a user (or users if using wildcards) in a given room ID, since * Gets all the events sent by a user (or users if using wildcards) in a given room ID, since
* the time they joined. * the time they joined.
* @param {MatrixClient} client The client to use. * @param {MatrixClient} client The client to use.
* @param {string} sender The sender. Can include wildcards to match multiple people. * @param {string} sender The sender. A matrix user id or a wildcard to match multiple senders e.g. *.example.com.
* Can also be used to generically search the sender field e.g. *bob* for all events from senders with "bob" in them.
* See `MatrixGlob` in matrix-bot-sdk.
* @param {string} roomId The room ID to search in. * @param {string} roomId The room ID to search in.
* @param {number} limit The maximum number of messages to search. Defaults to 1000. * @param {number} limit The maximum number of messages to search. Defaults to 1000. This will be a greater or equal
* number of events that are provided to the callback if a wildcard is used, as not all events paginated
* will match the glob. The reason the limit is calculated this way is so that a caller cannot accidentally
* traverse the entire room history.
* @param {function} cb Callback function to handle the events as they are received. * @param {function} cb Callback function to handle the events as they are received.
* @returns {Promise<any>} Resolves when complete. * The callback will only be called if there are any relevant events.
* @returns {Promise<void>} Resolves when either: the limit has been reached, no relevant events could be found or there is no more timeline to paginate.
*/ */
export async function getMessagesByUserIn(client: MatrixClient, sender: string, roomId: string, limit: number, cb: (events: any[]) => void): Promise<any> { export async function getMessagesByUserIn(client: MatrixClient, sender: string, roomId: string, limit: number, cb: (events: any[]) => void): Promise<void> {
const filter = { const isGlob = sender.includes("*");
room: { const roomEventFilter = {
rooms: [roomId], rooms: [roomId],
state: { ... isGlob ? {} : {senders: [sender]}
// types: ["m.room.member"], // We'll redact all types of events
rooms: [roomId],
},
timeline: {
rooms: [roomId],
// types: ["m.room.message"], // We'll redact all types of events
},
ephemeral: {
limit: 0,
types: [],
},
account_data: {
limit: 0,
types: [],
},
},
presence: {
limit: 0,
types: [],
},
account_data: {
limit: 0,
types: [],
},
}; };
let isGlob = true;
if (!sender.includes("*")) {
isGlob = false;
filter.room.timeline['senders'] = [sender];
}
const matcher = new MatrixGlob(sender); const matcher = new MatrixGlob(sender);
function testUser(userId: string): boolean { function testUser(userId: string): boolean {
@ -134,47 +110,62 @@ export async function getMessagesByUserIn(client: MatrixClient, sender: string,
function backfill(from: string) { function backfill(from: string) {
const qs = { const qs = {
filter: JSON.stringify(filter), filter: JSON.stringify(roomEventFilter),
from: from, from: from,
dir: "b", dir: "b",
}; };
LogService.info("utils", "Backfilling with token: " + token); LogService.info("utils", "Backfilling with token: " + from);
return client.doRequest("GET", `/_matrix/client/r0/rooms/${encodeURIComponent(roomId)}/messages`, qs); return client.doRequest("GET", `/_matrix/client/r0/rooms/${encodeURIComponent(roomId)}/messages`, qs);
} }
// Do an initial sync first to get the batch token // Do an initial sync first to get the batch token
const response = await roomInitialSync(); const response = await roomInitialSync();
if (!response) return [];
let processed = 0; let processed = 0;
/**
const timeline = (response['messages'] || {}) * Filter events from the timeline to events that are from a matching sender and under the limit that can be processed by the callback.
const syncedMessages = timeline['chunk'] || []; * @param events Events from the room timeline.
// The start of the chunk has the oldest events. * @returns Events that can safely be processed by the callback.
let token = timeline['start']; */
let bfMessages = {chunk: syncedMessages, end: token}; function filterEvents(events: any[]) {
do {
const messages: any[] = []; const messages: any[] = [];
for (const event of (bfMessages['chunk'] || [])) { for (const event of events) {
if (processed >= limit) return; // we're done even if we don't want to be if (processed >= limit) return messages; // we have provided enough events.
processed++; processed++;
if (testUser(event['sender'])) messages.push(event); if (testUser(event['sender'])) messages.push(event);
} }
return messages;
}
if (token) { // The recommended APIs for fetching events from a room is to use both rooms/initialSync then /messages.
bfMessages = await backfill(token); // Unfortunately, this results in code that is rather hard to read, as these two APIs employ very different data structures.
// We prefer discarding the results from rooms/initialSync and reading only from /messages,
// even if it's a little slower, for the sake of code maintenance.
const timeline = response['messages']
if (timeline) {
// The end of the PaginationChunk has the most recent events from rooms/initialSync.
// This token is required be present in the PagintionChunk from rooms/initialSync.
let token = timeline['end']!;
// We check that we have the token because rooms/messages is not required to provide one
// and will not provide one when there is no more history to paginate.
while (token && processed < limit) {
const bfMessages = await backfill(token);
let lastToken = token; let lastToken = token;
token = bfMessages['end']; token = bfMessages['end'];
if (lastToken === token) { if (lastToken === token) {
LogService.warn("utils", "Backfill returned same end token - returning"); LogService.debug("utils", "Backfill returned same end token - returning early.");
cb(messages);
return; return;
} }
const events = filterEvents(bfMessages['chunk'] || []);
// If we are using a glob, there may be no relevant events in this chunk.
if (events.length > 0) {
await cb(events);
}
} }
} else {
cb(messages); throw new Error(`Internal Error: rooms/initialSync did not return a pagination chunk for ${roomId}, this is not normal and if it is we need to stop using it. See roomInitialSync() for why we are using it.`);
} while (token); }
} }
export async function replaceRoomIdsWithPills(client: MatrixClient, text: string, roomIds: string[] | string, msgtype: MessageType = "m.text"): Promise<TextualMessageEventContent> { export async function replaceRoomIdsWithPills(client: MatrixClient, text: string, roomIds: string[] | string, msgtype: MessageType = "m.text"): Promise<TextualMessageEventContent> {

View File

@ -0,0 +1,46 @@
import { MatrixClient } from "matrix-bot-sdk";
/**
* Returns a promise that resolves to an event that is reacting to the event produced by targetEventThunk.
* @param client A MatrixClient that is already in the targetRoom that can be started to listen for the event produced by targetEventThunk.
* This function assumes that the start() has already been called on the client.
* @param targetRoom The room to listen for the reaction in.
* @param reactionKey The reaction key to wait for.
* @param targetEventThunk A function that produces an event ID when called. This event ID is then used to listen for a reaction.
* @returns The reaction event.
*/
export async function onReactionTo(client: MatrixClient, targetRoom: string, reactionKey: string, targetEventThunk: () => Promise<string>): Promise<any> {
let reactionEvents = [];
const addEvent = function (roomId, event) {
if (roomId !== targetRoom) return;
if (event.type !== 'm.reaction') return;
reactionEvents.push(event);
};
let targetCb;
try {
client.on('room.event', addEvent)
const targetEventId = await targetEventThunk();
for (let event of reactionEvents) {
const relates_to = event.content['m.relates_to'];
if (relates_to.event_id === targetEventId && relates_to.key === reactionKey) {
return event;
}
}
return await new Promise((resolve, reject) => {
targetCb = function(roomId, event) {
if (roomId !== targetRoom) return;
if (event.type !== 'm.reaction') return;
const relates_to = event.content['m.relates_to'];
if (relates_to.event_id === targetEventId && relates_to.key === reactionKey) {
resolve(event)
}
}
client.on('room.event', targetCb);
});
} finally {
client.removeListener('room.event', addEvent);
if (targetCb) {
client.removeListener('room.event', targetCb);
}
}
}

View File

@ -0,0 +1,130 @@
import { strict as assert } from "assert";
import config from "../../../src/config";
import { newTestUser } from "../clientHelper";
import { getMessagesByUserIn } from "../../../src/utils";
import { LogService } from "matrix-bot-sdk";
import { onReactionTo } from "./commandUtils";
describe("Test: The redaction command", function () {
it('Mjölnir redacts all of the events sent by a spammer when instructed to by giving their id and a room id.', async function() {
this.timeout(20000);
// Create a few users and a room.
let badUser = await newTestUser(false, "spammer-needs-redacting");
let badUserId = await badUser.getUserId();
const mjolnir = config.RUNTIME.client!
let mjolnirUserId = await mjolnir.getUserId();
let moderator = await newTestUser(false, "moderator");
this.moderator = moderator;
await moderator.joinRoom(config.managementRoom);
let targetRoom = await moderator.createRoom({ invite: [await badUser.getUserId(), mjolnirUserId]});
await moderator.setUserPowerLevel(mjolnirUserId, targetRoom, 100);
await badUser.joinRoom(targetRoom);
moderator.sendMessage(config.managementRoom, {msgtype: 'm.text.', body: `!mjolnir rooms add ${targetRoom}`});
LogService.debug("redactionTest", `targetRoom: ${targetRoom}, managementRoom: ${config.managementRoom}`);
// Sandwich irrelevant messages in bad messages.
await badUser.sendMessage(targetRoom, {msgtype: 'm.text', body: "Very Bad Stuff"});
await Promise.all([...Array(50).keys()].map((i) => moderator.sendMessage(targetRoom, {msgtype: 'm.text.', body: `Irrelevant Message #${i}`})));
for (let i = 0; i < 5; i++) {
await badUser.sendMessage(targetRoom, {msgtype: 'm.text', body: "Very Bad Stuff"});
}
await Promise.all([...Array(50).keys()].map((i) => moderator.sendMessage(targetRoom, {msgtype: 'm.text.', body: `Irrelevant Message #${i}`})));
await badUser.sendMessage(targetRoom, {msgtype: 'm.text', body: "Very Bad Stuff"});
try {
moderator.start();
await onReactionTo(moderator, config.managementRoom, '✅', async () => {
return await moderator.sendMessage(config.managementRoom, { msgtype: 'm.text', body: `!mjolnir redact ${badUserId} ${targetRoom}` });
});
} finally {
moderator.stop();
}
await getMessagesByUserIn(moderator, badUserId, targetRoom, 1000, function(events) {
events.map(e => {
if (e.type === 'm.room.member') {
assert.equal(Object.keys(e.content).length, 1, "Only membership should be left on the membership even when it has been redacted.")
} else if (Object.keys(e.content).length !== 0) {
throw new Error(`This event should have been redacted: ${JSON.stringify(e, null, 2)}`)
}
})
});
})
it('Mjölnir redacts all of the events sent by a spammer when instructed to by giving their id in multiple rooms.', async function() {
this.timeout(20000);
// Create a few users and a room.
let badUser = await newTestUser(false, "spammer-needs-redacting");
let badUserId = await badUser.getUserId();
const mjolnir = config.RUNTIME.client!
let mjolnirUserId = await mjolnir.getUserId();
let moderator = await newTestUser(false, "moderator");
this.moderator = moderator;
await moderator.joinRoom(config.managementRoom);
let targetRooms: string[] = [];
for (let i = 0; i < 5; i++) {
let targetRoom = await moderator.createRoom({ invite: [await badUser.getUserId(), mjolnirUserId]});
await moderator.setUserPowerLevel(mjolnirUserId, targetRoom, 100);
await badUser.joinRoom(targetRoom);
await moderator.sendMessage(config.managementRoom, {msgtype: 'm.text.', body: `!mjolnir rooms add ${targetRoom}`});
targetRooms.push(targetRoom);
// Sandwich irrelevant messages in bad messages.
await badUser.sendMessage(targetRoom, {msgtype: 'm.text', body: "Very Bad Stuff"});
await Promise.all([...Array(50).keys()].map((j) => moderator.sendMessage(targetRoom, {msgtype: 'm.text.', body: `Irrelevant Message #${j}`})));
for (let j = 0; j < 5; j++) {
await badUser.sendMessage(targetRoom, {msgtype: 'm.text', body: "Very Bad Stuff"});
}
await Promise.all([...Array(50).keys()].map((j) => moderator.sendMessage(targetRoom, {msgtype: 'm.text.', body: `Irrelevant Message #${j}`})));
await badUser.sendMessage(targetRoom, {msgtype: 'm.text', body: "Very Bad Stuff"});
}
try {
moderator.start();
await onReactionTo(moderator, config.managementRoom, '✅', async () => {
return await moderator.sendMessage(config.managementRoom, { msgtype: 'm.text', body: `!mjolnir redact ${badUserId}` });
});
} finally {
moderator.stop();
}
targetRooms.map(async targetRoom => {
await getMessagesByUserIn(moderator, badUserId, targetRoom, 1000, function(events) {
events.map(e => {
if (e.type === 'm.room.member') {
assert.equal(Object.keys(e.content).length, 1, "Only membership should be left on the membership even when it has been redacted.")
} else if (Object.keys(e.content).length !== 0) {
throw new Error(`This event should have been redacted: ${JSON.stringify(e, null, 2)}`)
}
})
})
});
});
it("Redacts a single event when instructed to.", async function () {
this.timeout(20000);
// Create a few users and a room.
let badUser = await newTestUser(false, "spammer-needs-redacting");
const mjolnir = config.RUNTIME.client!
let mjolnirUserId = await mjolnir.getUserId();
let moderator = await newTestUser(false, "moderator");
this.moderator = moderator;
await moderator.joinRoom(config.managementRoom);
let targetRoom = await moderator.createRoom({ invite: [await badUser.getUserId(), mjolnirUserId]});
await moderator.setUserPowerLevel(mjolnirUserId, targetRoom, 100);
await badUser.joinRoom(targetRoom);
moderator.sendMessage(config.managementRoom, {msgtype: 'm.text.', body: `!mjolnir rooms add ${targetRoom}`});
let eventToRedact = await badUser.sendMessage(targetRoom, {msgtype: 'm.text', body: "Very Bad Stuff"});
try {
moderator.start();
await onReactionTo(moderator, config.managementRoom, '✅', async () => {
return await moderator.sendMessage(config.managementRoom, {msgtype: 'm.text', body: `!mjolnir redact https://matrix.to/#/${encodeURIComponent(targetRoom)}/${encodeURIComponent(eventToRedact)}`});
});
} finally {
moderator.stop();
}
let redactedEvent = await moderator.getEvent(targetRoom, eventToRedact);
assert.equal(Object.keys(redactedEvent.content).length, 0, "This event should have been redacted");
})
});

View File

@ -8,13 +8,14 @@ import { makeMjolnir, teardownManagementRoom } from "./mjolnirSetupUtils";
// So there is some code in here to "undo" the mutation after we stop Mjolnir syncing. // So there is some code in here to "undo" the mutation after we stop Mjolnir syncing.
export const mochaHooks = { export const mochaHooks = {
beforeEach: [ beforeEach: [
async function() { async function() {
console.log("mochaHooks.beforeEach"); console.log("mochaHooks.beforeEach");
this.managementRoomAlias = config.managementRoom; this.managementRoomAlias = config.managementRoom;
this.mjolnir = await makeMjolnir(); this.mjolnir = await makeMjolnir();
this.mjolnir.start(); config.RUNTIME.client = this.mjolnir.client;
console.log("mochaHooks.beforeEach DONE"); this.mjolnir.start();
} console.log("mochaHooks.beforeEach DONE");
}
], ],
afterEach: [ afterEach: [
async function() { async function() {
@ -27,4 +28,4 @@ export const mochaHooks = {
await teardownManagementRoom(this.mjolnir.client, managementRoomId, this.managementRoomAlias); await teardownManagementRoom(this.mjolnir.client, managementRoomId, this.managementRoomAlias);
} }
] ]
}; };

View File

@ -1,12 +1,15 @@
import config from "../../src/config"; import config from "../../src/config";
import { newTestUser, noticeListener } from "./clientHelper" import { newTestUser, noticeListener } from "./clientHelper"
describe("Test: !help command", () => { describe("Test: !help command", function() {
let client; let client;
before(async function () { this.beforeEach(async function () {
client = await newTestUser(true); client = await newTestUser(true);
await client.start(); await client.start();
}) })
this.afterEach(async function () {
await client.stop();
})
it('Mjolnir responded to !mjolnir help', async function() { it('Mjolnir responded to !mjolnir help', async function() {
this.timeout(30000); this.timeout(30000);
console.log(`management room ${config.managementRoom}`); console.log(`management room ${config.managementRoom}`);
@ -24,7 +27,4 @@ describe("Test: !help command", () => {
await client.sendMessage(config.managementRoom, {msgtype: "m.text", body: "!mjolnir help"}) await client.sendMessage(config.managementRoom, {msgtype: "m.text", body: "!mjolnir help"})
await reply await reply
}) })
after(async function () {
await client.stop();
})
}) })

View File

@ -0,0 +1,97 @@
import { strict as assert } from "assert";
import { newTestUser } from "./clientHelper";
import { getMessagesByUserIn } from "../../src/utils";
/**
* Ensure that Mjolnir paginates only the necessary segment of the room timeline when backfilling.
*/
describe("Test: timeline pagination", function () {
it('does not paginate across the entire room history while backfilling.', async function() {
this.timeout(20000);
// Create a few users and a room.
let badUser = await newTestUser(false, "spammer");
let badUserId = await badUser.getUserId();
let moderator = await newTestUser(false, "moderator");
let targetRoom = await moderator.createRoom({ invite: [await badUser.getUserId()]});
await badUser.joinRoom(targetRoom);
// send some irrelevant messages
await Promise.all([...Array(200).keys()].map((i) => moderator.sendMessage(targetRoom, {msgtype: 'm.text.', body: `Irrelevant Message #${i}`})));
// bad guy sends 5 messages
for (let i = 0; i < 5; i++) {
await badUser.sendMessage(targetRoom, {msgtype: 'm.text', body: "Very Bad Stuff"});
}
// send some irrelevant messages
await Promise.all([...Array(50).keys()].map((i) => moderator.sendMessage(targetRoom, {msgtype: 'm.text.', body: `Irrelevant Message #${i}`})));
// bad guy sends 1 extra message at the most recent edge of the timeline.
await badUser.sendMessage(targetRoom, {msgtype: 'm.text', body: "Very Bad Stuff"});
// then call this paignator and ensure that we don't go across the entire room history.
let cbCount = 0;
let eventCount = 0;
await getMessagesByUserIn(moderator, badUserId, targetRoom, 1000, function(events) {
cbCount += 1;
eventCount += events.length;
events.map(e => assert.equal(e.sender, badUserId, "All the events should be from the same sender"));
});
assert.equal(cbCount, 1, "The callback only needs to be called once with all the messages because the events should be filtered.");
assert.equal(eventCount, 7, "There shouldn't be any more events (1 member event and 6 messages), and they should all be from the same account.");
})
it('does not call the callback with an empty array when there are no relevant events', async function() {
this.timeout(20000);
let badUser = await newTestUser(false, "spammer");
let badUserId = await badUser.getUserId();
let moderator = await newTestUser(false, "moderator");
let targetRoom = await moderator.createRoom();
// send some irrelevant messages
await Promise.all([...Array(200).keys()].map((i) => moderator.sendMessage(targetRoom, {msgtype: 'm.text.', body: `Irrelevant Message #${i}`})));
// The callback should not be called.
let cbCount = 0;
await getMessagesByUserIn(moderator, badUserId, targetRoom, 1000, (events) => {
cbCount += 1;
});
assert.equal(cbCount, 0, "The callback should never get called");
})
it("The limit provided is respected", async function() {
this.timeout(20000);
let badUser = await newTestUser(false, "spammer");
let badUserId = await badUser.getUserId();
let moderator = await newTestUser(false, "moderator");
let targetRoom = await moderator.createRoom({ invite: [await badUser.getUserId()]});
await badUser.joinRoom(targetRoom);
// send some bad person messages
// bad guy sends 5 messages at the start of the timeline
for (let i = 0; i < 5; i++) {
await badUser.sendMessage(targetRoom, {msgtype: 'm.text', body: "Very Bad Stuff"});
}
// send some irrelevant messages
await Promise.all([...Array(200).keys()].map((i) => moderator.sendMessage(targetRoom, {msgtype: 'm.text.', body: `Irrelevant Message #${i}`})));
let cbCount = 0;
await getMessagesByUserIn(moderator, "*spammer*", targetRoom, 200, (events) => {
cbCount += 1;
});
// Remember that the limit is the number of events that getMessagesByUserIn has checked against the glob,
// not the number of events to provide to the callback.
// E.g. we don't want to paginate to the beginning of history just because less than 200 events match the glob,
// which is very likely if a user has only just started sending messages.
assert.equal(cbCount, 0, "The callback should never be called as the limit should be reached beforehand.");
await getMessagesByUserIn(moderator, "*spammer*", targetRoom, 205, (events) => {
cbCount += 1;
events.map(e => assert.equal(e.sender, badUserId, "All the events should be from the same sender"));
});
assert.equal(cbCount, 1, "The callback should be called once with events matching the glob.");
});
it("Gives the events to the callback ordered by youngest first (even more important when the limit is reached halfway through a chunk).", async function() {
this.timeout(20000);
let moderator = await newTestUser(false, "moderator");
let moderatorId = await moderator.getUserId();
let targetRoom = await moderator.createRoom();
for (let i = 0; i < 20; i++) {
await moderator.sendMessage(targetRoom, {msgtype: 'm.text.', body: `${i}`})
}
await getMessagesByUserIn(moderator, moderatorId, targetRoom, 5, (events) => {
let messageNumbers = events.map(event => parseInt(event.content.body, 10));
messageNumbers.map(n => assert.equal(n >= 15, true, "The youngest events should be given to the callback first."))
});
})
});