mirror of
https://github.com/matrix-org/mjolnir.git
synced 2024-10-01 01:36:06 -04:00
267 lines
14 KiB
TypeScript
267 lines
14 KiB
TypeScript
import { strict as assert } from "assert";
|
|
|
|
import { UserID } from "@vector-im/matrix-bot-sdk";
|
|
import { Suite } from "mocha";
|
|
import { Mjolnir } from "../../src/Mjolnir";
|
|
import { DetectFederationLag, LAG_STATE_EVENT } from "../../src/protections/DetectFederationLag";
|
|
import { getFirstReply } from "./commands/commandUtils";
|
|
import { newTestUser } from "./clientHelper";
|
|
|
|
const LOCAL_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS = 180_000;
|
|
const LOCAL_HOMESERVER_LAG_EXIT_WARNING_ZONE_MS = 100_000;
|
|
const FEDERATED_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS = 300_000;
|
|
const FEDERATED_HOMESERVER_LAG_EXIT_WARNING_ZONE_MS = 200_000;
|
|
const BUCKET_DURATION_MS = 100;
|
|
const SAMPLE_SIZE = 100;
|
|
const NUMBER_OF_LAGGING_FEDERATED_HOMESERVERS_ENTER_WARNING_ZONE = 2;
|
|
|
|
const RE_STATS = /(\{(:?.|\n)*\})[^}]*$/m;
|
|
|
|
describe("Test: DetectFederationLag protection", function() {
|
|
// In this entire test, we call `handleEvent` directly, injecting
|
|
// - events that simulate lag;
|
|
// - a progression through time, to make sure that histograms get processed.
|
|
beforeEach(async function() {
|
|
// Setup an instance of DetectFederationLag
|
|
this.detector = new DetectFederationLag();
|
|
await this.mjolnir.protectionManager.registerProtection(this.detector);
|
|
await this.mjolnir.protectionManager.enableProtection("DetectFederationLag");
|
|
|
|
// Setup a moderator.
|
|
this.moderator = await newTestUser(this.config.homeserverUrl, { name: { contains: "moderator" } });
|
|
await this.moderator.joinRoom(this.mjolnir.managementRoomId);
|
|
|
|
const SETTINGS = {
|
|
// The protection should kick in immediately.
|
|
initialDelayGrace: 0,
|
|
// Make histograms progress quickly.
|
|
bucketDuration: BUCKET_DURATION_MS,
|
|
// Three homeservers should be sufficient to raise an alert.
|
|
numberOfLaggingFederatedHomeserversEnterWarningZone: NUMBER_OF_LAGGING_FEDERATED_HOMESERVERS_ENTER_WARNING_ZONE,
|
|
|
|
localHomeserverLagEnterWarningZone: LOCAL_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS,
|
|
localHomeserverLagExitWarningZone: LOCAL_HOMESERVER_LAG_EXIT_WARNING_ZONE_MS,
|
|
|
|
federatedHomeserverLagEnterWarningZone: FEDERATED_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS,
|
|
federatedHomeserverLagExitWarningZone: FEDERATED_HOMESERVER_LAG_EXIT_WARNING_ZONE_MS,
|
|
};
|
|
for (let key of Object.keys(SETTINGS)) {
|
|
this.detector.settings[key].setValue(SETTINGS[key]);
|
|
}
|
|
this.localDomain = new UserID(await this.mjolnir.client.getUserId()).domain;
|
|
this.protectedRoomId = `!room1:${this.localDomain}`;
|
|
this.mjolnir.addProtectedRoom(this.protectedRoomId);
|
|
|
|
this.simulateLag = async (senders: string[], lag: number, start: Date) => {
|
|
const content = {};
|
|
const origin_server_ts = start.getTime() - lag;
|
|
for (let i = 0; i < SAMPLE_SIZE; ++i) {
|
|
// We call directly `this.detector.handleEvent` to be able to forge old values of `origin_server_ts`.
|
|
await this.detector.handleEvent(this.mjolnir, this.protectedRoomId, {
|
|
sender: senders[i % senders.length],
|
|
origin_server_ts,
|
|
content,
|
|
},
|
|
// Make sure that time progresses through histogram buckets.
|
|
simulateDate(start, i)
|
|
);
|
|
}
|
|
};
|
|
|
|
this.getAlertEvent = async () => {
|
|
try {
|
|
let event = await this.mjolnir.client.getRoomStateEvent(this.mjolnir.managementRoomId, LAG_STATE_EVENT, this.protectedRoomId);
|
|
if (Object.keys(event).length == 0) {
|
|
// Event was redacted.
|
|
return null;
|
|
}
|
|
return event;
|
|
} catch (ex) {
|
|
// No such event.
|
|
return null;
|
|
}
|
|
};
|
|
|
|
this.getCommandStatus = async () => {
|
|
const protectedRoomReply = await getFirstReply(this.mjolnir.client, this.mjolnir.managementRoomId, () => {
|
|
const command = `!mjolnir status protection DetectFederationLag ${this.protectedRoomId}`;
|
|
return this.moderator.sendMessage(this.mjolnir.managementRoomId, { msgtype: 'm.text', body: command });
|
|
});
|
|
const globalReply = await getFirstReply(this.mjolnir.client, this.mjolnir.managementRoomId, () => {
|
|
const command = `!mjolnir status protection DetectFederationLag *`;
|
|
return this.moderator.sendMessage(this.mjolnir.managementRoomId, { msgtype: 'm.text', body: command });
|
|
});
|
|
const protectedRoomStatsStr = protectedRoomReply.content.body.match(RE_STATS)[0];
|
|
const globalStatsStr = globalReply.content.body.match(RE_STATS)[0];
|
|
return {
|
|
protectedRoomStats: protectedRoomStatsStr ? JSON.parse(protectedRoomStatsStr) : null,
|
|
globalStats: globalStatsStr ? JSON.parse(globalStatsStr) : null,
|
|
}
|
|
}
|
|
});
|
|
|
|
afterEach(async function() {
|
|
await this.detector.cleanup();
|
|
this.detector.dispose();
|
|
await this.moderator?.stop();
|
|
});
|
|
|
|
let simulateDate = (start: Date, progress: number = SAMPLE_SIZE) =>
|
|
new Date(start.getTime() + 2 * progress * BUCKET_DURATION_MS / SAMPLE_SIZE);
|
|
|
|
it('DetectFederationLag doesn\'t detect lag when there isn\'t any', async function() {
|
|
this.timeout(60000);
|
|
const MULTIPLIERS = [0, 0.5, 0.9];
|
|
|
|
// In this test, all the events we send have a lag < {local, federated}HomeserverLagEnterWarningZoneMS.
|
|
const start = new Date();
|
|
|
|
// Ensure that no alert has been emitted yet.
|
|
assert.equal(await this.getAlertEvent(), null, "Initially, there should be no alert");
|
|
|
|
// First, let's send events from the local homeserver.
|
|
const LOCAL_SENDERS = [`@local_user:${this.localDomain}`];
|
|
for (let multiplier of MULTIPLIERS) {
|
|
const LAG = multiplier * LOCAL_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS;
|
|
await this.simulateLag(LOCAL_SENDERS, LAG, start);
|
|
assert.equal(await this.getAlertEvent(), null, `We have sent lots of local pseudo-events with a small lag of ${LAG}, there should be NO alert`);
|
|
}
|
|
|
|
// Three distinct remote servers should be sufficient to trigger an alert, if they all lag.
|
|
const REMOTE_SENDERS = [
|
|
"@user2:left.example.com",
|
|
"@user3:right.example.com",
|
|
"@user4:middle.example.com",
|
|
];
|
|
for (let multiplier of MULTIPLIERS) {
|
|
const LAG = multiplier * FEDERATED_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS;
|
|
await this.simulateLag(REMOTE_SENDERS, LAG, start);
|
|
assert.equal(await this.getAlertEvent(), null, `We have sent lots of remote pseudo-events with a small lag of ${LAG}, there should be NO alert`);
|
|
}
|
|
|
|
const {protectedRoomStats, globalStats} = await this.getCommandStatus();
|
|
assert.ok(protectedRoomStats, "We should see stats for our room");
|
|
assert.ok(protectedRoomStats.min >= 0, `min ${protectedRoomStats.min} >= 0`);
|
|
assert.ok(protectedRoomStats.min < protectedRoomStats.max);
|
|
assert.ok(protectedRoomStats.mean > 0);
|
|
assert.ok(protectedRoomStats.mean < protectedRoomStats.max);
|
|
assert.ok(protectedRoomStats.median < protectedRoomStats.max);
|
|
assert.ok(protectedRoomStats.median > 0);
|
|
assert.ok(protectedRoomStats.max >= MULTIPLIERS[MULTIPLIERS.length - 1] * FEDERATED_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS);
|
|
assert.ok(protectedRoomStats.max < FEDERATED_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS);
|
|
assert.deepEqual(globalStats, { [this.protectedRoomId]: protectedRoomStats });
|
|
});
|
|
|
|
it('DetectFederationLag detects lag on local homeserver', async function() {
|
|
this.timeout(60000);
|
|
// In this test, all the events we send have a lag > localHomeserverLagEnterWarningZoneMS.
|
|
const start = new Date();
|
|
const stop = simulateDate(start);
|
|
|
|
// Ensure that no alert has been emitted yet.
|
|
assert.equal(await this.getAlertEvent(), null, "Initially, there should be no alert");
|
|
|
|
// Simulate lagging events from the local homeserver. This should trigger an alarm.
|
|
const SENDERS = [`@local_user_1:${this.localDomain}`];
|
|
await this.simulateLag(SENDERS, 1.5 * LOCAL_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS, start);
|
|
|
|
let lagEvent = await this.getAlertEvent();
|
|
console.debug(lagEvent);
|
|
|
|
assert(lagEvent, "Local lag should be reported");
|
|
assert.equal(JSON.stringify(lagEvent.domains), JSON.stringify([this.localDomain]), "Lag report should mention only the local domain");
|
|
assert.equal(lagEvent.roomId, this.protectedRoomId, "Lag report should mention the right room");
|
|
assert(new Date(lagEvent.since) >= start, "Lag report should have happened since `now`");
|
|
assert(new Date(lagEvent.since) < stop, "Lag should have been detected before the end of the bombardment");
|
|
|
|
{
|
|
const {protectedRoomStats, globalStats} = await this.getCommandStatus();
|
|
assert.ok(protectedRoomStats, "We should see stats for our room");
|
|
assert.ok(protectedRoomStats.min >= 0, `min ${protectedRoomStats.min} >= 0`);
|
|
assert.ok(protectedRoomStats.min < protectedRoomStats.max);
|
|
assert.ok(protectedRoomStats.mean > 0);
|
|
assert.ok(protectedRoomStats.mean < protectedRoomStats.max);
|
|
assert.ok(protectedRoomStats.median < protectedRoomStats.max);
|
|
assert.ok(protectedRoomStats.median > 0);
|
|
assert.ok(protectedRoomStats.max >= 1.5 * LOCAL_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS);
|
|
assert.deepEqual(globalStats, { [this.protectedRoomId]: protectedRoomStats })
|
|
}
|
|
|
|
// Simulate non-lagging events from the local homeserver. After a while, this should rescind the alarm.
|
|
// We switch to a new (pseudo-)user to simplify reading logs.
|
|
const SENDERS_2 = [`@local_user_2:${this.localDomain}`];
|
|
const start2 = new Date(stop.getTime() + 1_000);
|
|
await this.simulateLag(SENDERS_2, 0.75 * LOCAL_HOMESERVER_LAG_EXIT_WARNING_ZONE_MS, start2);
|
|
|
|
assert.equal(await this.getAlertEvent(), null, "The alert should now be rescinded");
|
|
|
|
{
|
|
const {protectedRoomStats, globalStats} = await this.getCommandStatus();
|
|
assert.ok(protectedRoomStats, "We should see stats for our room");
|
|
assert.ok(protectedRoomStats.min >= 0, `min ${protectedRoomStats.min} >= 0`);
|
|
assert.ok(protectedRoomStats.min < protectedRoomStats.max);
|
|
assert.ok(protectedRoomStats.mean > 0);
|
|
assert.ok(protectedRoomStats.mean < protectedRoomStats.max);
|
|
assert.ok(protectedRoomStats.median < protectedRoomStats.max);
|
|
assert.ok(protectedRoomStats.median > 0);
|
|
assert.ok(protectedRoomStats.max >= 0.75 * LOCAL_HOMESERVER_LAG_EXIT_WARNING_ZONE_MS);
|
|
assert.ok(protectedRoomStats.max < FEDERATED_HOMESERVER_LAG_EXIT_WARNING_ZONE_MS);
|
|
assert.deepEqual(globalStats, { [this.protectedRoomId]: protectedRoomStats })
|
|
}
|
|
});
|
|
|
|
it('DetectFederationLag doesn\'t report lag when only one federated homeserver lags', async function() {
|
|
this.timeout(60000);
|
|
// In this test, all the events we send have a lag > federatedHomeserverLagEnterWarningZoneMS.
|
|
const start = new Date();
|
|
|
|
// Ensure that no alert has been emitted yet.
|
|
assert.equal(await this.getAlertEvent(), null, "Initially, there should be no alert");
|
|
|
|
// First, let's send events from the local homeserver.
|
|
const SENDERS = ["@left:left.example.com"];
|
|
await this.simulateLag(SENDERS, 1.5 * FEDERATED_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS, start);
|
|
|
|
let lagEvent = await this.getAlertEvent();
|
|
assert.equal(lagEvent, null, "With only one federated homeserver lagging, we shouldn't report any lag");
|
|
});
|
|
|
|
it('DetectFederationLag reports lag when three federated homeservers lag', async function() {
|
|
this.timeout(60000);
|
|
// In this test, all the events we send have a lag > federatedHomeserverLagEnterWarningZoneMS.
|
|
const start = new Date();
|
|
const stop = simulateDate(start);
|
|
|
|
// Ensure that no alert has been emitted yet.
|
|
assert.equal(await this.getAlertEvent(), null, "Initially, there should be no alert");
|
|
|
|
// Simulate lagging events from remote homeservers. This should trigger an alarm.
|
|
const SENDERS = [
|
|
"@left:left.example.com",
|
|
"@middle:middle.example.com",
|
|
"@right:right.example.com",
|
|
];
|
|
await this.simulateLag(SENDERS, 1.5 * FEDERATED_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS, start);
|
|
|
|
let lagEvent = await this.getAlertEvent();
|
|
console.debug(lagEvent);
|
|
assert(lagEvent, "Local lag should be reported");
|
|
assert.equal(JSON.stringify(lagEvent.domains.sort()), JSON.stringify(["left.example.com", "middle.example.com", "right.example.com"]), "Lag report should mention only the local domain");
|
|
assert.equal(lagEvent.roomId, this.protectedRoomId, "Lag report should mention the right room");
|
|
assert(new Date(lagEvent.since) >= start, "Lag report should have happened since `now`");
|
|
assert(new Date(lagEvent.since) < stop, "Lag should have been detected before the end of the bombardment");
|
|
|
|
// Simulate non-lagging events from remote homeservers. After a while, this should rescind the alarm.
|
|
// We switch to new (pseudo-)users to simplify reading logs.
|
|
const SENDERS_2 = [
|
|
"@left_2:left.example.com",
|
|
"@middle_2:middle.example.com",
|
|
"@right_2:right.example.com",
|
|
];
|
|
const start2 = new Date(stop.getTime() + 1_000);
|
|
await this.simulateLag(SENDERS_2, 0.75 * FEDERATED_HOMESERVER_LAG_EXIT_WARNING_ZONE_MS, start2);
|
|
|
|
assert.equal(await this.getAlertEvent(), null, "The alert should now be rescinded");
|
|
});
|
|
});
|