mirror of
https://github.com/matrix-org/mjolnir.git
synced 2024-09-29 20:56:23 +00:00
A room Protection designed to measure lag in a room (#217)
This commit is contained in:
parent
a3f77e1b39
commit
82a2e63d23
@ -37,7 +37,8 @@
|
||||
"html-to-text": "^8.0.0",
|
||||
"js-yaml": "^4.1.0",
|
||||
"jsdom": "^16.6.0",
|
||||
"matrix-bot-sdk": "^0.5.19"
|
||||
"matrix-bot-sdk": "^0.5.19",
|
||||
"parse-duration": "^1.0.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
|
@ -34,7 +34,7 @@ import { COMMAND_PREFIX, handleCommand } from "./commands/CommandHandler";
|
||||
import { applyUserBans } from "./actions/ApplyBan";
|
||||
import config from "./config";
|
||||
import ErrorCache, { ERROR_KIND_FATAL, ERROR_KIND_PERMISSION } from "./ErrorCache";
|
||||
import { IProtection } from "./protections/IProtection";
|
||||
import { Protection } from "./protections/IProtection";
|
||||
import { PROTECTIONS } from "./protections/protections";
|
||||
import { ProtectionSettingValidationError } from "./protections/ProtectionSettings";
|
||||
import { UnlistedUserRedactionQueue } from "./queues/UnlistedUserRedactionQueue";
|
||||
@ -67,7 +67,7 @@ export class Mjolnir {
|
||||
private displayName: string;
|
||||
private localpart: string;
|
||||
private currentState: string = STATE_NOT_STARTED;
|
||||
public protections = new Map<string /* protection name */, IProtection>();
|
||||
public protections = new Map<string /* protection name */, Protection>();
|
||||
/**
|
||||
* This is for users who are not listed on a watchlist,
|
||||
* but have been flagged by the automatic spam detection as suispicous
|
||||
@ -246,7 +246,7 @@ export class Mjolnir {
|
||||
return this.currentState;
|
||||
}
|
||||
|
||||
public get enabledProtections(): IProtection[] {
|
||||
public get enabledProtections(): Protection[] {
|
||||
return [...this.protections.values()].filter(p => p.enabled);
|
||||
}
|
||||
|
||||
@ -464,7 +464,7 @@ export class Mjolnir {
|
||||
/*
|
||||
* Read org.matrix.mjolnir.setting state event, find any saved settings for
|
||||
* the requested protectionName, then iterate and validate against their parser
|
||||
* counterparts in IProtection.settings and return those which validate
|
||||
* counterparts in Protection.settings and return those which validate
|
||||
*
|
||||
* @param protectionName The name of the protection whose settings we're reading
|
||||
* @returns Every saved setting for this protectionName that has a valid value
|
||||
@ -543,7 +543,7 @@ export class Mjolnir {
|
||||
*
|
||||
* @param protection The protection object we want to register
|
||||
*/
|
||||
public async registerProtection(protection: IProtection): Promise<any> {
|
||||
public async registerProtection(protection: Protection) {
|
||||
this.protections.set(protection.name, protection)
|
||||
|
||||
let enabledProtections: { enabled: string[] } | null = null;
|
||||
@ -587,6 +587,10 @@ export class Mjolnir {
|
||||
return list;
|
||||
}
|
||||
|
||||
public getProtection(protectionName: string): Protection | null {
|
||||
return this.protections.get(protectionName) ?? null;
|
||||
}
|
||||
|
||||
public async watchList(roomRef: string): Promise<BanList | null> {
|
||||
const joinedRooms = await this.client.getJoinedRooms();
|
||||
const permalink = Permalinks.parseUrl(roomRef);
|
||||
@ -854,7 +858,7 @@ export class Mjolnir {
|
||||
}
|
||||
}
|
||||
|
||||
if (Object.keys(this.protectedRooms).includes(roomId)) {
|
||||
if (roomId in this.protectedRooms) {
|
||||
if (event['sender'] === await this.client.getUserId()) return; // Ignore ourselves
|
||||
|
||||
// Iterate all the enabled protections
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright 2019-2021 The Matrix.org Foundation C.I.C.
|
||||
Copyright 2019-2022 The Matrix.org Foundation C.I.C.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@ -38,6 +38,7 @@ import { execShutdownRoomCommand } from "./ShutdownRoomCommand";
|
||||
import { execAddAliasCommand, execMoveAliasCommand, execRemoveAliasCommand, execResolveCommand } from "./AliasCommands";
|
||||
import { execKickCommand } from "./KickCommand";
|
||||
|
||||
|
||||
export const COMMAND_PREFIX = "!mjolnir";
|
||||
|
||||
export async function handleCommand(roomId: string, event: { content: { body: string } }, mjolnir: Mjolnir) {
|
||||
@ -46,7 +47,7 @@ export async function handleCommand(roomId: string, event: { content: { body: st
|
||||
|
||||
try {
|
||||
if (parts.length === 1 || parts[1] === 'status') {
|
||||
return await execStatusCommand(roomId, event, mjolnir);
|
||||
return await execStatusCommand(roomId, event, mjolnir, parts.slice(2));
|
||||
} else if (parts[1] === 'ban' && parts.length > 2) {
|
||||
return await execBanCommand(roomId, event, mjolnir, parts);
|
||||
} else if (parts[1] === 'unban' && parts.length > 2) {
|
||||
@ -114,6 +115,7 @@ export async function handleCommand(roomId: string, event: { content: { body: st
|
||||
const menu = "" +
|
||||
"!mjolnir - Print status information\n" +
|
||||
"!mjolnir status - Print status information\n" +
|
||||
"!mjolnir status protection <protection> [subcommand] - Print status information for a protection\n" +
|
||||
"!mjolnir ban <list shortcode> <user|room|server> <glob> [reason] - Adds an entity to the ban list\n" +
|
||||
"!mjolnir unban <list shortcode> <user|room|server> <glob> [apply] - Removes an entity from the ban list. If apply is 'true', the users matching the glob will actually be unbanned\n" +
|
||||
"!mjolnir redact <user ID> [room alias/ID] [limit] - Redacts messages by the sender in the target room (or all rooms), up to a maximum number of events in the backlog (default 1000)\n" +
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright 2019, 2020 The Matrix.org Foundation C.I.C.
|
||||
Copyright 2019-2022 The Matrix.org Foundation C.I.C.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@ -16,9 +16,23 @@ limitations under the License.
|
||||
|
||||
import { Mjolnir, STATE_CHECKING_PERMISSIONS, STATE_NOT_STARTED, STATE_RUNNING, STATE_SYNCING } from "../Mjolnir";
|
||||
import { RichReply } from "matrix-bot-sdk";
|
||||
import { htmlEscape } from "../utils";
|
||||
|
||||
// !mjolnir
|
||||
export async function execStatusCommand(roomId: string, event: any, mjolnir: Mjolnir) {
|
||||
export async function execStatusCommand(roomId: string, event: any, mjolnir: Mjolnir, parts: string[]) {
|
||||
switch (parts[0]) {
|
||||
case undefined:
|
||||
case 'mjolnir':
|
||||
return showMjolnirStatus(roomId, event, mjolnir);
|
||||
case 'protection':
|
||||
return showProtectionStatus(roomId, event, mjolnir, parts.slice(/* ["protection"] */ 1));
|
||||
default:
|
||||
throw new Error(`Invalid status command: ${htmlEscape(parts[0])}`);
|
||||
}
|
||||
}
|
||||
|
||||
async function showMjolnirStatus(roomId: string, event: any, mjolnir: Mjolnir) {
|
||||
// Display the status of Mjölnir.
|
||||
let html = "";
|
||||
let text = "";
|
||||
|
||||
@ -68,3 +82,25 @@ export async function execStatusCommand(roomId: string, event: any, mjolnir: Mjo
|
||||
reply["msgtype"] = "m.notice";
|
||||
return mjolnir.client.sendMessage(roomId, reply);
|
||||
}
|
||||
|
||||
async function showProtectionStatus(roomId: string, event: any, mjolnir: Mjolnir, parts: string[]) {
|
||||
const protectionName = parts[0];
|
||||
const protection = mjolnir.getProtection(protectionName);
|
||||
let text;
|
||||
let html;
|
||||
if (!protection) {
|
||||
text = html = "Unknown protection";
|
||||
} else {
|
||||
const status = await protection.statusCommand(mjolnir, parts.slice(1));
|
||||
if (status) {
|
||||
text = status.text;
|
||||
html = status.html;
|
||||
} else {
|
||||
text = "<no status>";
|
||||
html = "<no status>";
|
||||
}
|
||||
}
|
||||
const reply = RichReply.createFor(roomId, event, text, html);
|
||||
reply["msgtype"] = "m.notice";
|
||||
await mjolnir.client.sendMessage(roomId, reply);
|
||||
}
|
||||
|
742
src/protections/DetectFederationLag.ts
Normal file
742
src/protections/DetectFederationLag.ts
Normal file
@ -0,0 +1,742 @@
|
||||
/*
|
||||
Copyright 2022 The Matrix.org Foundation C.I.C.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
import { Protection } from "./IProtection";
|
||||
import { DurationMSProtectionSetting, NumberProtectionSetting, StringSetProtectionSetting } from "./ProtectionSettings";
|
||||
import { Mjolnir } from "../Mjolnir";
|
||||
import { LogLevel, UserID } from "matrix-bot-sdk";
|
||||
|
||||
const DEFAULT_BUCKET_DURATION_MS = 10_000;
|
||||
const DEFAULT_BUCKET_NUMBER = 6;
|
||||
const DEFAULT_CLEANUP_PERIOD_MS = 3_600 * 1_000;
|
||||
const DEFAULT_INITIAL_DELAY_GRACE_MS = 180_000;
|
||||
const DEFAULT_LOCAL_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS = 120_000;
|
||||
const DEFAULT_LOCAL_HOMESERVER_LAG_EXIT_WARNING_ZONE_MS = 100_000;
|
||||
const DEFAULT_FEDERATED_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS = 180_000;
|
||||
const DEFAULT_FEDERATED_HOMESERVER_LAG_EXIT_WARNING_ZONE_MS = 150_000;
|
||||
const DEFAULT_NUMBER_OF_LAGGING_FEDERATED_SERVERS_ENTER_WARNING_ZONE = 20;
|
||||
const DEFAULT_NUMBER_OF_LAGGING_FEDERATED_SERVERS_EXIT_WARNING_ZONE = 10;
|
||||
const DEFAULT_REWARN_AFTER_MS = 60_000;
|
||||
|
||||
/**
|
||||
* A state event emitted in the moderation room when there is lag,
|
||||
* redacted when lag has disappeared.
|
||||
*
|
||||
* The state key is the id of the room in which lag was detected.
|
||||
*/
|
||||
export const LAG_STATE_EVENT = "org.mjolnir.monitoring.lag";
|
||||
|
||||
/**
|
||||
* Settings for a timed histogram.
|
||||
*/
|
||||
type HistogramSettings = {
|
||||
// The width of a bucket, in ms.
|
||||
bucketDurationMS: number,
|
||||
// The number of buckets.
|
||||
bucketNumber: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* A histogram with time as x and some arbitrary value T as y.
|
||||
*/
|
||||
class TimedHistogram<T> {
|
||||
/**
|
||||
* An array of at most `this.settings.bucketNumber` buckets of events.
|
||||
*
|
||||
* Each bucket gathers all events that were pushed during an interval of
|
||||
* `this.settings.bucketDurationMS` starting at `bucket.timeStamp`.
|
||||
*
|
||||
* `0` is the oldest bucket.
|
||||
* ..
|
||||
* `length - 1` is the most recent bucket.
|
||||
*
|
||||
* Notes:
|
||||
* - this is a sparse array, buckets are not necessarily adjacent;
|
||||
* - if `this.updateSettings()` is called, we do not redistribute events
|
||||
* between buckets, so it may take some time before statistics fully
|
||||
* respect the new settings.
|
||||
*/
|
||||
protected buckets: {
|
||||
start: Date;
|
||||
events: T[]
|
||||
}[];
|
||||
|
||||
/**
|
||||
* Construct an empty TimedHistogram
|
||||
*/
|
||||
constructor(private settings: HistogramSettings) {
|
||||
this.buckets = []
|
||||
}
|
||||
|
||||
/**
|
||||
* Push a new event into the histogram.
|
||||
*
|
||||
* New events are always considered most recent, without checking `new`.
|
||||
* If pushing a new event causes the histogram to overflow, oldest buckets
|
||||
* are removed.
|
||||
*
|
||||
* @param event The event to push.
|
||||
* @param now The current date, used to create a new bucket to the event if
|
||||
* necessary and to determine whether some buckets are too old.
|
||||
*/
|
||||
push(event: T, now: Date) {
|
||||
let timeStamp = now.getTime();
|
||||
let latestBucket = this.buckets[this.buckets.length - 1];
|
||||
if (latestBucket && latestBucket.start.getTime() + this.settings.bucketDurationMS >= timeStamp) {
|
||||
// We're still within `durationPerColumnMS` of latest entry, we can reuse that entry.
|
||||
latestBucket.events.push(event);
|
||||
return;
|
||||
}
|
||||
// Otherwise, initialize an entry, then prune columns that are too old.
|
||||
this.buckets.push({
|
||||
start: now,
|
||||
events: [event]
|
||||
});
|
||||
this.trimBuckets(this.settings, now);
|
||||
}
|
||||
|
||||
/**
|
||||
* If any buckets are too old, remove them. If there are (still) too
|
||||
* many buckets, remove the oldest ones.
|
||||
*/
|
||||
private trimBuckets(settings: HistogramSettings, now: Date) {
|
||||
if (this.buckets.length > settings.bucketNumber) {
|
||||
this.buckets.splice(0, this.buckets.length - settings.bucketNumber);
|
||||
}
|
||||
const oldestAcceptableTimestamp = now.getTime() - settings.bucketDurationMS * settings.bucketNumber;
|
||||
for (let i = this.buckets.length - 2; i >= 0; --i) {
|
||||
// Find the most recent bucket that is too old.
|
||||
if (this.buckets[i].start.getTime() < oldestAcceptableTimestamp) {
|
||||
// ...and remove that bucket and every bucket before it.
|
||||
this.buckets.splice(0, i + 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the settings of a histogram.
|
||||
*/
|
||||
public updateSettings(settings: HistogramSettings, now: Date) {
|
||||
this.trimBuckets(settings, now);
|
||||
this.settings = settings;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* General-purpose statistics on a sample.
|
||||
*/
|
||||
class Stats {
|
||||
// Minimum.
|
||||
public readonly min: number;
|
||||
// Maximum.
|
||||
public readonly max: number;
|
||||
// Mean.
|
||||
public readonly mean: number;
|
||||
// Median.
|
||||
public readonly median: number;
|
||||
// Standard deviation.
|
||||
public readonly stddev: number;
|
||||
// Length of the sample.
|
||||
public readonly length: number;
|
||||
|
||||
constructor(values: number[]) {
|
||||
this.length = values.length;
|
||||
if (this.length === 0) {
|
||||
throw new TypeError("Attempting to compute stats on an empty sample");
|
||||
}
|
||||
if (this.length === 1) {
|
||||
// `values[Math.ceil(this.length / 2)]` below fails when `this.length == 1`.
|
||||
this.min =
|
||||
this.max =
|
||||
this.mean =
|
||||
this.median = values[0];
|
||||
this.stddev = 0;
|
||||
return;
|
||||
}
|
||||
values.sort((a, b) => a - b); // Don't forget to force sorting by value, not by stringified value!
|
||||
this.min = values[0];
|
||||
this.max = values[this.length - 1];
|
||||
let total = 0;
|
||||
for (let num of values) {
|
||||
total += num;
|
||||
}
|
||||
this.mean = total / this.length;
|
||||
|
||||
let totalVariance = 0;
|
||||
for (let num of values) {
|
||||
const deviation = num - this.mean;
|
||||
totalVariance += deviation * deviation;
|
||||
}
|
||||
this.stddev = Math.sqrt(totalVariance / this.length);
|
||||
|
||||
if (this.length % 2 === 0) {
|
||||
this.median = values[this.length / 2];
|
||||
} else {
|
||||
this.median = (values[Math.floor(this.length / 2)] + values[Math.ceil(this.length / 2)]) / 2;
|
||||
}
|
||||
}
|
||||
|
||||
public round(): { min: number, max: number, mean: number, median: number, stddev: number, length: number } {
|
||||
return {
|
||||
min: Math.round(this.min),
|
||||
max: Math.round(this.max),
|
||||
mean: Math.round(this.mean),
|
||||
median: Math.round(this.median),
|
||||
stddev: Math.round(this.stddev),
|
||||
length: this.length
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A subclass of TimedHistogram that supports only numbers
|
||||
* and can compute statistics.
|
||||
*/
|
||||
class NumbersTimedHistogram extends TimedHistogram<number> {
|
||||
constructor(settings: HistogramSettings) {
|
||||
super(settings);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute stats.
|
||||
*
|
||||
* @returns `null` if the histogram is empty, otherwise `Stats`.
|
||||
*/
|
||||
public stats(): Stats | null {
|
||||
if (this.buckets.length === 0) {
|
||||
return null;
|
||||
}
|
||||
let numbers = [];
|
||||
for (let bucket of this.buckets) {
|
||||
numbers.push(...bucket.events);
|
||||
}
|
||||
if (numbers.length === 0) {
|
||||
return null;
|
||||
}
|
||||
return new Stats(numbers);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Lag information on a server for a specific room.
|
||||
*
|
||||
* The same server may be represented by distinct instances of `ServerInfo` in
|
||||
* distinct rooms.
|
||||
*/
|
||||
class ServerInfo {
|
||||
/**
|
||||
* The histogram collecting lag, in ms.
|
||||
*/
|
||||
private histogram: NumbersTimedHistogram;
|
||||
|
||||
/**
|
||||
* Date of the latest message received from this server.
|
||||
*
|
||||
* May be used to clean up data structures.
|
||||
*/
|
||||
public latestMessage: Date = new Date(0);
|
||||
public latestStatsUpdate: Date;
|
||||
|
||||
constructor(settings: HistogramSettings, now: Date) {
|
||||
this.histogram = new NumbersTimedHistogram(settings);
|
||||
this.latestStatsUpdate = now;
|
||||
}
|
||||
|
||||
/**
|
||||
* Record lag information on this server.
|
||||
*
|
||||
* @param lag The duration of lag, in ms.
|
||||
*/
|
||||
pushLag(lag: number, now: Date) {
|
||||
this.latestMessage = now;
|
||||
this.histogram.push(lag, now);
|
||||
}
|
||||
|
||||
updateSettings(settings: HistogramSettings, now: Date) {
|
||||
this.histogram.updateSettings(settings, now);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute stats.
|
||||
*
|
||||
* @returns `null` if the histogram is empty, otherwise `Stats`.
|
||||
*/
|
||||
stats(now?: Date) {
|
||||
if (now) {
|
||||
this.latestStatsUpdate = now;
|
||||
}
|
||||
return this.histogram.stats();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Thresholds to start/stop warning of an issue.
|
||||
*
|
||||
* Once we have hit a value higher that `enterWarningZone`, the alert
|
||||
* will remain active until the value decreases below `exitWarningZone`.
|
||||
*/
|
||||
type WarningThresholds = {
|
||||
enterWarningZone: number,
|
||||
exitWarningZone: number
|
||||
}
|
||||
|
||||
enum AlertDiff {
|
||||
Start,
|
||||
Stop,
|
||||
NoChange
|
||||
}
|
||||
|
||||
/**
|
||||
* Statistics to help determine whether we should raise the alarm on lag in a room.
|
||||
*
|
||||
* Each individual server may have lag.
|
||||
*/
|
||||
class RoomInfo {
|
||||
/**
|
||||
* A map of domain => lag information.
|
||||
*/
|
||||
private serverLags: Map<string /* domain */, ServerInfo> = new Map();
|
||||
/**
|
||||
* The set of servers currently on alert.
|
||||
*/
|
||||
private serverAlerts: Set<string /* domain */> = new Set();
|
||||
|
||||
/**
|
||||
* Global lag information for this room.
|
||||
*/
|
||||
public totalLag: ServerInfo;
|
||||
|
||||
/**
|
||||
* If non-`null`, the date at which this room started being on alert.
|
||||
* Otherwise, the room is not an alert.
|
||||
*/
|
||||
public latestAlertStart: Date | null;
|
||||
|
||||
/**
|
||||
* The date at which we last issued a warning on this room.
|
||||
*
|
||||
* Used to avoid spamming the monitoring room with too many warnings per room.
|
||||
*/
|
||||
public latestWarning: Date = new Date(0);
|
||||
|
||||
/**
|
||||
* If non-`null`, we have issued a structured warning as a state event.
|
||||
* This needs to be redacted once the alert has passed.
|
||||
*/
|
||||
public warnStateEventId: string | null = null;
|
||||
|
||||
/**
|
||||
* The date at which we last received a message in this room.
|
||||
*/
|
||||
public latestMessage: Date = new Date(0);
|
||||
|
||||
constructor(now: Date) {
|
||||
this.serverLags = new Map();
|
||||
this.totalLag = new ServerInfo({
|
||||
bucketDurationMS: DEFAULT_BUCKET_DURATION_MS,
|
||||
bucketNumber: DEFAULT_BUCKET_NUMBER
|
||||
}, now);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a lag annotation.
|
||||
*
|
||||
* @param serverId The server from which the message was sent. Could be the local server.
|
||||
* @param lag How many ms of lag was measured. Hopefully ~0.
|
||||
* @param settings Settings used in case we need to create or update the histogram.
|
||||
* @param thresholds The thresholds to use to determine whether an origin server is currently lagging.
|
||||
* @param now Instant at which all of this was measured.
|
||||
*/
|
||||
pushLag(serverId: string, lag: number, settings: HistogramSettings, thresholds: WarningThresholds, now: Date = new Date()): AlertDiff {
|
||||
this.latestMessage = now;
|
||||
|
||||
// Update per-server lag.
|
||||
let serverInfo = this.serverLags.get(serverId);
|
||||
if (!serverInfo) {
|
||||
serverInfo = new ServerInfo(settings, now);
|
||||
this.serverLags.set(serverId, serverInfo);
|
||||
} else {
|
||||
serverInfo.updateSettings(settings, now);
|
||||
}
|
||||
serverInfo.pushLag(lag, now);
|
||||
|
||||
// Update global lag.
|
||||
this.totalLag.updateSettings(settings, now);
|
||||
this.totalLag.pushLag(lag, now);
|
||||
|
||||
// Check for alerts, if necessary.
|
||||
if (serverInfo.latestStatsUpdate.getTime() + settings.bucketDurationMS > now.getTime()) {
|
||||
// Too early to recompute stats.
|
||||
return AlertDiff.NoChange;
|
||||
}
|
||||
|
||||
let stats = serverInfo.stats(now)!;
|
||||
if (stats.median > thresholds.enterWarningZone) {
|
||||
// Oops, we're now on alert for this server.
|
||||
let previous = this.serverAlerts.has(serverId);
|
||||
if (!previous) {
|
||||
this.serverAlerts.add(serverId);
|
||||
return AlertDiff.Start;
|
||||
}
|
||||
} else if (stats.median < thresholds.exitWarningZone) {
|
||||
// Ah, we left the alert zone.
|
||||
let previous = this.serverAlerts.has(serverId);
|
||||
if (previous) {
|
||||
this.serverAlerts.delete(serverId);
|
||||
return AlertDiff.Stop;
|
||||
}
|
||||
}
|
||||
return AlertDiff.NoChange;
|
||||
}
|
||||
|
||||
/**
|
||||
* The number of servers currently on alert.
|
||||
*/
|
||||
public get alerts(): number {
|
||||
return this.serverAlerts.size;
|
||||
}
|
||||
|
||||
/**
|
||||
* The current global stats.
|
||||
*
|
||||
* These stats are not separated by remote server.
|
||||
*
|
||||
* @returns null if we have no recent data at all,
|
||||
* some stats otherwise.
|
||||
*/
|
||||
public globalStats(): Stats | null {
|
||||
return this.totalLag.stats();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a server is currently marked as lagging.
|
||||
*
|
||||
* A server is marked as lagging if its mean lag has exceeded
|
||||
* `threshold.enterWarningZone` and has not decreased below
|
||||
* `threshold.exitWarningZone`.
|
||||
*
|
||||
* @returns `true` is that server is currently on alert.
|
||||
*/
|
||||
public isServerOnAlert(serverId: string): boolean {
|
||||
return this.serverAlerts.has(serverId);
|
||||
}
|
||||
|
||||
/**
|
||||
* The list of servers currently on alert.
|
||||
*/
|
||||
public serversOnAlert(): IterableIterator<string> {
|
||||
return this.serverAlerts.keys();
|
||||
}
|
||||
|
||||
public cleanup(settings: HistogramSettings, now: Date, oldest: Date) {
|
||||
// Cleanup global histogram.
|
||||
//
|
||||
// If `oldest == now - settings.duration * settings.number`, this
|
||||
// should correspond exactly to the cleanup that takes place within
|
||||
// `this.serverLags`. There is a risk of inconsistency between data
|
||||
// if this is not the case.
|
||||
//
|
||||
// We assume that this is an acceptable risk: as we regularly
|
||||
// erase oldest data from both `this.totalLag` and individual
|
||||
// entries of `this.serverLags`, both sets of data will eventually
|
||||
// catch up with each other.
|
||||
this.totalLag.updateSettings(settings, now);
|
||||
let serverLagsDeleteIds = [];
|
||||
for (let [serverId, serverStats] of this.serverLags) {
|
||||
if (serverStats.latestMessage < oldest) {
|
||||
// Remove entire histogram.
|
||||
serverLagsDeleteIds.push(serverId);
|
||||
continue;
|
||||
}
|
||||
// Cleanup histogram.
|
||||
serverStats.updateSettings(settings, now);
|
||||
}
|
||||
for (let key of serverLagsDeleteIds) {
|
||||
this.serverLags.delete(key);
|
||||
this.serverAlerts.delete(key);
|
||||
// Note that we remove the alert to save memory (it's not really useful
|
||||
// to keep monitoring a server for too long after receiving a message)
|
||||
// but this does NOT guaranteed that server lag is over. It may be that
|
||||
// the server is down or that the server is lagging by more than ~1h
|
||||
// (by default).
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export class DetectFederationLag extends Protection {
|
||||
/**
|
||||
* For each room we're monitoring, lag information.
|
||||
*/
|
||||
lagPerRoom: Map<string /* roomId */, RoomInfo> = new Map();
|
||||
public settings = {
|
||||
// Rooms to ignore.
|
||||
ignoreRooms: new StringSetProtectionSetting(),
|
||||
// Servers to ignore, typically because they're known to be slow.
|
||||
ignoreServers: new StringSetProtectionSetting(),
|
||||
// How often we should recompute lag.
|
||||
bucketDuration: new DurationMSProtectionSetting(DEFAULT_BUCKET_DURATION_MS, 100),
|
||||
// How long we should remember lag in a room (`bucketDuration * bucketNumber` ms).
|
||||
bucketNumber: new NumberProtectionSetting(DEFAULT_BUCKET_NUMBER, 1),
|
||||
// How much lag before the local homeserver is considered lagging.
|
||||
localHomeserverLagEnterWarningZone: new DurationMSProtectionSetting(DEFAULT_LOCAL_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS, 1),
|
||||
// How much lag before the local homeserver is considered not lagging anymore.
|
||||
localHomeserverLagExitWarningZone: new DurationMSProtectionSetting(DEFAULT_LOCAL_HOMESERVER_LAG_EXIT_WARNING_ZONE_MS, 1),
|
||||
// How much lag before a federated homeserver is considered lagging.
|
||||
federatedHomeserverLagEnterWarningZone: new DurationMSProtectionSetting(DEFAULT_FEDERATED_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS, 1),
|
||||
// How much lag before a federated homeserver is considered not lagging anymore.
|
||||
federatedHomeserverLagExitWarningZone: new DurationMSProtectionSetting(DEFAULT_FEDERATED_HOMESERVER_LAG_EXIT_WARNING_ZONE_MS, 1),
|
||||
// How much time we should wait before printing a new warning.
|
||||
warnAgainAfter: new DurationMSProtectionSetting(DEFAULT_REWARN_AFTER_MS, 1),
|
||||
// How many federated homeservers it takes to trigger an alert.
|
||||
// You probably want to update this if you're monitoring a room that
|
||||
// has many underpowered homeservers.
|
||||
numberOfLaggingFederatedHomeserversEnterWarningZone: new NumberProtectionSetting(DEFAULT_NUMBER_OF_LAGGING_FEDERATED_SERVERS_ENTER_WARNING_ZONE, 1),
|
||||
// How many federated homeservers it takes before we're considered not on alert anymore.
|
||||
// You probably want to update this if you're monitoring a room that
|
||||
// has many underpowered homeservers.
|
||||
numberOfLaggingFederatedHomeserversExitWarningZone: new NumberProtectionSetting(DEFAULT_NUMBER_OF_LAGGING_FEDERATED_SERVERS_EXIT_WARNING_ZONE, 1),
|
||||
// How long to wait before actually collecting statistics.
|
||||
// Used to avoid being misled by Mjölnir catching up with old messages on first sync.
|
||||
initialDelayGrace: new DurationMSProtectionSetting(DEFAULT_INITIAL_DELAY_GRACE_MS, 0),
|
||||
cleanupPeriod: new DurationMSProtectionSetting(DEFAULT_CLEANUP_PERIOD_MS, 1),
|
||||
};
|
||||
// The instant at which the first message was received.
|
||||
private firstMessage: Date | null = null;
|
||||
// The latest instant at which we have started cleaning up old data.
|
||||
private latestCleanup: Date = new Date(0);
|
||||
private latestHistogramSettings: HistogramSettings;
|
||||
constructor() {
|
||||
super();
|
||||
// Initialize and watch `this.latestHistogramSettings`.
|
||||
this.updateLatestHistogramSettings();
|
||||
this.settings.bucketDuration.on("set", () => this.updateLatestHistogramSettings());
|
||||
this.settings.bucketNumber.on("set", () => this.updateLatestHistogramSettings());
|
||||
}
|
||||
dispose() {
|
||||
this.settings.bucketDuration.removeAllListeners();
|
||||
this.settings.bucketNumber.removeAllListeners();
|
||||
}
|
||||
public get name(): string {
|
||||
return 'DetectFederationLag';
|
||||
}
|
||||
public get description(): string {
|
||||
return `Warn moderators if either the local homeserver starts lagging by ${this.settings.localHomeserverLagEnterWarningZone.value}ms or at least ${this.settings.numberOfLaggingFederatedHomeserversEnterWarningZone.value} start lagging by at least ${this.settings.federatedHomeserverLagEnterWarningZone.value}ms.`;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param now An argument used only by tests, to simulate events taking place at a specific date.
|
||||
*/
|
||||
public async handleEvent(mjolnir: Mjolnir, roomId: string, event: any, now: Date = new Date()) {
|
||||
// First, handle all cases in which we should ignore the event.
|
||||
if (!this.firstMessage) {
|
||||
this.firstMessage = now;
|
||||
}
|
||||
if (this.firstMessage.getTime() + this.settings.initialDelayGrace.value > now.getTime()) {
|
||||
// We're still in the initial grace period, ignore.
|
||||
return;
|
||||
}
|
||||
if (this.latestCleanup.getTime() + this.settings.cleanupPeriod.value > now.getTime()) {
|
||||
// We should run some cleanup.
|
||||
this.latestCleanup = now;
|
||||
this.cleanup(now);
|
||||
}
|
||||
if (this.settings.ignoreRooms.value.has(roomId)) {
|
||||
// Room is ignored.
|
||||
return;
|
||||
}
|
||||
const sender = event['sender'] as string;
|
||||
if (typeof sender !== "string") {
|
||||
// Ill-formed event.
|
||||
return;
|
||||
}
|
||||
if (sender === await mjolnir.client.getUserId()) {
|
||||
// Let's not create loops.
|
||||
return;
|
||||
}
|
||||
const domain = new UserID(sender).domain;
|
||||
if (!domain) {
|
||||
// Ill-formed event.
|
||||
return;
|
||||
}
|
||||
|
||||
const origin = event['origin_server_ts'] as number;
|
||||
if (typeof origin !== "number" || isNaN(origin)) {
|
||||
// Ill-formed event.
|
||||
return;
|
||||
}
|
||||
const delay = now.getTime() - origin;
|
||||
if (delay < 0) {
|
||||
// Could be an ill-formed event.
|
||||
// Could be non-motonic clocks or other time shennanigans.
|
||||
return;
|
||||
}
|
||||
|
||||
let roomInfo = this.lagPerRoom.get(roomId);
|
||||
if (!roomInfo) {
|
||||
roomInfo = new RoomInfo(now);
|
||||
this.lagPerRoom.set(roomId, roomInfo);
|
||||
}
|
||||
|
||||
const localDomain = new UserID(await mjolnir.client.getUserId()).domain
|
||||
const isLocalDomain = domain === localDomain;
|
||||
const thresholds =
|
||||
isLocalDomain
|
||||
? {
|
||||
enterWarningZone: this.settings.localHomeserverLagEnterWarningZone.value,
|
||||
exitWarningZone: this.settings.localHomeserverLagExitWarningZone.value,
|
||||
}
|
||||
: {
|
||||
enterWarningZone: this.settings.federatedHomeserverLagEnterWarningZone.value,
|
||||
exitWarningZone: this.settings.federatedHomeserverLagExitWarningZone.value,
|
||||
};
|
||||
|
||||
const diff = roomInfo.pushLag(domain, delay, this.latestHistogramSettings, thresholds, now);
|
||||
if (diff === AlertDiff.NoChange) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (roomInfo.latestWarning.getTime() + this.settings.warnAgainAfter.value > now.getTime()) {
|
||||
if (!isLocalDomain || diff !== AlertDiff.Start) {
|
||||
// No need to check for alarms, we have raised an alarm recently.
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Check whether an alarm needs to be raised!
|
||||
const isLocalDomainOnAlert = roomInfo.isServerOnAlert(localDomain);
|
||||
if (roomInfo.alerts > this.settings.numberOfLaggingFederatedHomeserversEnterWarningZone.value
|
||||
|| isLocalDomainOnAlert) {
|
||||
// Raise the alarm!
|
||||
if (!roomInfo.latestAlertStart) {
|
||||
roomInfo.latestAlertStart = now;
|
||||
}
|
||||
roomInfo.latestAlertStart = now;
|
||||
// Background-send message.
|
||||
const stats = roomInfo.globalStats();
|
||||
/* do not await */ mjolnir.logMessage(LogLevel.WARN, "FederationLag",
|
||||
`Room ${roomId} is experiencing ${isLocalDomainOnAlert ? "LOCAL" : "federated"} lag since ${roomInfo.latestAlertStart}.\n${roomInfo.alerts} homeservers are lagging: ${[...roomInfo.serversOnAlert()].sort()} .\nRoom lag statistics: ${JSON.stringify(stats, null, 2)}.`);
|
||||
// Drop a state event, for the use of potential other bots.
|
||||
const warnStateEventId = await mjolnir.client.sendStateEvent(mjolnir.managementRoomId, LAG_STATE_EVENT, roomId, {
|
||||
domains: [...roomInfo.serversOnAlert()],
|
||||
roomId,
|
||||
// We need to round the stats, as Matrix doesn't support floating-point
|
||||
// numbers in messages.
|
||||
stats: stats?.round(),
|
||||
since: roomInfo.latestAlertStart,
|
||||
});
|
||||
roomInfo.warnStateEventId = warnStateEventId;
|
||||
} else if (roomInfo.alerts < this.settings.numberOfLaggingFederatedHomeserversExitWarningZone.value
|
||||
|| !isLocalDomainOnAlert) {
|
||||
// Stop the alarm!
|
||||
/* do not await */ mjolnir.logMessage(LogLevel.INFO, "FederationLag",
|
||||
`Room ${roomId} lag has decreased to an acceptable level. Currently, ${roomInfo.alerts} homeservers are still lagging`
|
||||
);
|
||||
if (roomInfo.warnStateEventId) {
|
||||
const warnStateEventId = roomInfo.warnStateEventId;
|
||||
roomInfo.warnStateEventId = null;
|
||||
await mjolnir.client.redactEvent(mjolnir.managementRoomId, warnStateEventId, "Alert over");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Run cleanup on data structures, to save memory.
|
||||
*
|
||||
* @param now Now.
|
||||
* @param oldest Prune any data older than `oldest`.
|
||||
*/
|
||||
public async cleanup(now: Date = new Date()) {
|
||||
const oldest: Date = this.getOldestAcceptableData(now);
|
||||
const lagPerRoomDeleteIds = [];
|
||||
for (const [roomId, roomInfo] of this.lagPerRoom) {
|
||||
if (roomInfo.latestMessage < oldest) {
|
||||
// We need to remove the entire room.
|
||||
lagPerRoomDeleteIds.push(roomId);
|
||||
continue;
|
||||
}
|
||||
// Clean room stats.
|
||||
roomInfo.cleanup(this.latestHistogramSettings, now, oldest);
|
||||
}
|
||||
for (const roomId of lagPerRoomDeleteIds) {
|
||||
this.lagPerRoom.delete(roomId);
|
||||
}
|
||||
}
|
||||
|
||||
private getOldestAcceptableData(now: Date): Date {
|
||||
return new Date(now.getTime() - this.latestHistogramSettings.bucketDurationMS * this.latestHistogramSettings.bucketNumber)
|
||||
}
|
||||
private updateLatestHistogramSettings() {
|
||||
this.latestHistogramSettings = Object.freeze({
|
||||
bucketDurationMS: this.settings.bucketDuration.value,
|
||||
bucketNumber: this.settings.bucketNumber.value,
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* Return (mostly) human-readable lag status.
|
||||
*/
|
||||
public async statusCommand(mjolnir: Mjolnir, subcommand: string[]): Promise<{html: string, text: string} | null> {
|
||||
const roomId = subcommand[0] || "*";
|
||||
const localDomain = new UserID(await mjolnir.client.getUserId()).domain;
|
||||
const annotatedStats = (roomInfo: RoomInfo) => {
|
||||
const stats = roomInfo.globalStats()?.round();
|
||||
if (!stats) {
|
||||
return null;
|
||||
}
|
||||
const isLocalDomainOnAlert = roomInfo.isServerOnAlert(localDomain);
|
||||
const numberOfServersOnAlert = roomInfo.alerts;
|
||||
if (isLocalDomainOnAlert) {
|
||||
(stats as any)["warning"] = "Local homeserver is lagging";
|
||||
} else if (numberOfServersOnAlert > this.settings.numberOfLaggingFederatedHomeserversEnterWarningZone.value) {
|
||||
(stats as any)["warning"] = `${numberOfServersOnAlert} homeservers are lagging`;
|
||||
}
|
||||
return stats;
|
||||
};
|
||||
let text;
|
||||
let html;
|
||||
if (roomId === "*") {
|
||||
// Collate data from all protected rooms.
|
||||
const result: any = {};
|
||||
|
||||
for (const [perRoomId, perRoomInfo] of this.lagPerRoom.entries()) {
|
||||
const key = await mjolnir.client.getPublishedAlias(perRoomId) || perRoomId;
|
||||
result[key] = annotatedStats(perRoomInfo);
|
||||
}
|
||||
text = JSON.stringify(result, null, 2);
|
||||
html = `<code>${JSON.stringify(result, null, " ")}</code>`;
|
||||
} else {
|
||||
// Fetch data from a specific room.
|
||||
const roomInfo = this.lagPerRoom.get(roomId);
|
||||
if (!roomInfo) {
|
||||
html = text = `Either ${roomId} is unmonitored or it has received no messages in a while`;
|
||||
} else {
|
||||
// Fetch data from all remote homeservers.
|
||||
const stats = annotatedStats(roomInfo);
|
||||
if (!stats) {
|
||||
html = text = `No recent messages in room ${roomId}`;
|
||||
} else {
|
||||
text = JSON.stringify(stats, null, 2);
|
||||
html = `<code>${JSON.stringify(stats, null, " ")}</code>`;
|
||||
}
|
||||
}
|
||||
}
|
||||
return {
|
||||
text,
|
||||
html
|
||||
}
|
||||
}
|
||||
}
|
@ -23,31 +23,34 @@ import { AbstractProtectionSetting } from "./ProtectionSettings";
|
||||
*
|
||||
* Protections are guaranteed to be run before redaction handlers.
|
||||
*/
|
||||
export interface IProtection {
|
||||
readonly name: string;
|
||||
readonly description: string;
|
||||
enabled: boolean;
|
||||
settings: { [setting: string]: AbstractProtectionSetting<any, any> };
|
||||
/*
|
||||
* Handle a single event from a protected room, to decide if we need to
|
||||
* respond to it
|
||||
*/
|
||||
handleEvent(mjolnir: Mjolnir, roomId: string, event: any): Promise<any>;
|
||||
/*
|
||||
* Handle a single reported event from a protecte room, to decide if we
|
||||
* need to respond to it
|
||||
*/
|
||||
handleReport(mjolnir: Mjolnir, roomId: string, reporterId: string, reason: string, event: any): Promise<any>;
|
||||
}
|
||||
export abstract class Protection implements IProtection {
|
||||
export abstract class Protection {
|
||||
abstract readonly name: string
|
||||
abstract readonly description: string;
|
||||
enabled = false;
|
||||
abstract settings: { [setting: string]: AbstractProtectionSetting<any, any> };
|
||||
|
||||
|
||||
/*
|
||||
* Handle a single event from a protected room, to decide if we need to
|
||||
* respond to it
|
||||
*/
|
||||
handleEvent(mjolnir: Mjolnir, roomId: string, event: any): Promise<any> {
|
||||
return Promise.resolve(null);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle a single reported event from a protecte room, to decide if we
|
||||
* need to respond to it
|
||||
*/
|
||||
handleReport(mjolnir: Mjolnir, roomId: string, reporterId: string, event: any, reason?: string): Promise<any> {
|
||||
return Promise.resolve(null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return status information for `!mjolnir status ${protectionName}`.
|
||||
*/
|
||||
async statusCommand(mjolnir: Mjolnir, subcommand: string[]): Promise<{html: string, text: string} | null> {
|
||||
// By default, protections don't have any status to show.
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
@ -14,13 +14,24 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
import { EventEmitter } from "events";
|
||||
import { default as parseDuration } from "parse-duration";
|
||||
|
||||
// Define a few aliases to simplify parsing durations.
|
||||
|
||||
parseDuration["milliseconds"] = parseDuration["millis"] = parseDuration["ms"];
|
||||
parseDuration["days"] = parseDuration["day"];
|
||||
parseDuration["weeks"] = parseDuration["week"] = parseDuration["wk"];
|
||||
parseDuration["months"] = parseDuration["month"];
|
||||
parseDuration["years"] = parseDuration["year"];
|
||||
|
||||
export class ProtectionSettingValidationError extends Error {};
|
||||
|
||||
/*
|
||||
* @param TChange Type for individual pieces of data (e.g. `string`)
|
||||
* @param TValue Type for overall value of this setting (e.g. `string[]`)
|
||||
*/
|
||||
export class AbstractProtectionSetting<TChange, TValue> {
|
||||
export class AbstractProtectionSetting<TChange, TValue> extends EventEmitter {
|
||||
// the current value of this setting
|
||||
value: TValue
|
||||
|
||||
@ -50,6 +61,7 @@ export class AbstractProtectionSetting<TChange, TValue> {
|
||||
*/
|
||||
setValue(data: TValue) {
|
||||
this.value = data;
|
||||
this.emit("set", data);
|
||||
}
|
||||
}
|
||||
export class AbstractProtectionListSetting<TChange, TValue> extends AbstractProtectionSetting<TChange, TValue> {
|
||||
@ -88,10 +100,30 @@ export class StringListProtectionSetting extends AbstractProtectionListSetting<s
|
||||
fromString = (data: string): string => data;
|
||||
validate = (data: string): boolean => true;
|
||||
addValue(data: string): string[] {
|
||||
return [...this.value, data];
|
||||
this.emit("add", data);
|
||||
this.value.push(data);
|
||||
return this.value;
|
||||
}
|
||||
removeValue(data: string): string[] {
|
||||
return this.value.filter(i => i !== data);
|
||||
this.emit("remove", data);
|
||||
this.value = this.value.filter(i => i !== data);
|
||||
return this.value;
|
||||
}
|
||||
}
|
||||
|
||||
export class StringSetProtectionSetting extends AbstractProtectionListSetting<string, Set<String>> {
|
||||
value: Set<string> = new Set();
|
||||
fromString = (data: string): string => data;
|
||||
validate = (data: string): boolean => true;
|
||||
addValue(data: string): Set<string> {
|
||||
this.emit("add", data);
|
||||
this.value.add(data);
|
||||
return this.value;
|
||||
}
|
||||
removeValue(data: string): Set<string> {
|
||||
this.emit("remove", data);
|
||||
this.value.delete(data);
|
||||
return this.value;
|
||||
}
|
||||
}
|
||||
|
||||
@ -125,5 +157,30 @@ export class NumberProtectionSetting extends AbstractProtectionSetting<number, n
|
||||
&& (this.min === undefined || this.min <= data)
|
||||
&& (this.max === undefined || data <= this.max))
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* A setting holding durations, in ms.
|
||||
*
|
||||
* When parsing, the setting expects a unit, e.g. "1ms".
|
||||
*/
|
||||
export class DurationMSProtectionSetting extends AbstractProtectionSetting<number, number> {
|
||||
constructor(
|
||||
defaultValue: number,
|
||||
public readonly minMS: number|undefined = undefined,
|
||||
public readonly maxMS: number|undefined = undefined
|
||||
) {
|
||||
super();
|
||||
this.setValue(defaultValue);
|
||||
}
|
||||
|
||||
fromString(data: string) {
|
||||
let number = parseDuration(data);
|
||||
return isNaN(number) ? undefined : number;
|
||||
}
|
||||
validate(data: number) {
|
||||
return (!isNaN(data)
|
||||
&& (this.minMS === undefined || this.minMS <= data)
|
||||
&& (this.maxMS === undefined || data <= this.maxMS))
|
||||
}
|
||||
}
|
||||
|
@ -15,18 +15,20 @@ limitations under the License.
|
||||
*/
|
||||
|
||||
import { FirstMessageIsImage } from "./FirstMessageIsImage";
|
||||
import { IProtection } from "./IProtection";
|
||||
import { Protection } from "./IProtection";
|
||||
import { BasicFlooding } from "./BasicFlooding";
|
||||
import { DetectFederationLag } from "./DetectFederationLag";
|
||||
import { WordList } from "./WordList";
|
||||
import { MessageIsVoice } from "./MessageIsVoice";
|
||||
import { MessageIsMedia } from "./MessageIsMedia";
|
||||
import { TrustedReporters } from "./TrustedReporters";
|
||||
|
||||
export const PROTECTIONS: IProtection[] = [
|
||||
export const PROTECTIONS: Protection[] = [
|
||||
new FirstMessageIsImage(),
|
||||
new BasicFlooding(),
|
||||
new WordList(),
|
||||
new MessageIsVoice(),
|
||||
new MessageIsMedia(),
|
||||
new TrustedReporters()
|
||||
new TrustedReporters(),
|
||||
new DetectFederationLag(),
|
||||
];
|
||||
|
266
test/integration/detectFederationLagTest.ts
Normal file
266
test/integration/detectFederationLagTest.ts
Normal file
@ -0,0 +1,266 @@
|
||||
import { strict as assert } from "assert";
|
||||
|
||||
import { UserID } from "matrix-bot-sdk";
|
||||
import { Suite } from "mocha";
|
||||
import { Mjolnir } from "../../src/Mjolnir";
|
||||
import { DetectFederationLag, LAG_STATE_EVENT } from "../../src/protections/DetectFederationLag";
|
||||
import { getFirstReply } from "./commands/commandUtils";
|
||||
import { newTestUser } from "./clientHelper";
|
||||
|
||||
const LOCAL_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS = 180_000;
|
||||
const LOCAL_HOMESERVER_LAG_EXIT_WARNING_ZONE_MS = 100_000;
|
||||
const FEDERATED_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS = 300_000;
|
||||
const FEDERATED_HOMESERVER_LAG_EXIT_WARNING_ZONE_MS = 200_000;
|
||||
const BUCKET_DURATION_MS = 100;
|
||||
const SAMPLE_SIZE = 100;
|
||||
const NUMBER_OF_LAGGING_FEDERATED_HOMESERVERS_ENTER_WARNING_ZONE = 2;
|
||||
|
||||
const RE_STATS = /(\{(:?.|\n)*\})[^}]*$/m;
|
||||
|
||||
describe("Test: DetectFederationLag protection", function() {
|
||||
// In this entire test, we call `handleEvent` directly, injecting
|
||||
// - events that simulate lag;
|
||||
// - a progression through time, to make sure that histograms get processed.
|
||||
beforeEach(async function() {
|
||||
// Setup an instance of DetectFederationLag
|
||||
this.detector = new DetectFederationLag();
|
||||
await this.mjolnir.registerProtection(this.detector);
|
||||
await this.mjolnir.enableProtection("DetectFederationLag");
|
||||
|
||||
// Setup a moderator.
|
||||
this.moderator = await newTestUser({ name: { contains: "moderator" } });
|
||||
await this.moderator.joinRoom(this.mjolnir.managementRoomId);
|
||||
|
||||
const SETTINGS = {
|
||||
// The protection should kick in immediately.
|
||||
initialDelayGrace: 0,
|
||||
// Make histograms progress quickly.
|
||||
bucketDuration: BUCKET_DURATION_MS,
|
||||
// Three homeservers should be sufficient to raise an alert.
|
||||
numberOfLaggingFederatedHomeserversEnterWarningZone: NUMBER_OF_LAGGING_FEDERATED_HOMESERVERS_ENTER_WARNING_ZONE,
|
||||
|
||||
localHomeserverLagEnterWarningZone: LOCAL_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS,
|
||||
localHomeserverLagExitWarningZone: LOCAL_HOMESERVER_LAG_EXIT_WARNING_ZONE_MS,
|
||||
|
||||
federatedHomeserverLagEnterWarningZone: FEDERATED_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS,
|
||||
federatedHomeserverLagExitWarningZone: FEDERATED_HOMESERVER_LAG_EXIT_WARNING_ZONE_MS,
|
||||
};
|
||||
for (let key of Object.keys(SETTINGS)) {
|
||||
this.detector.settings[key].setValue(SETTINGS[key]);
|
||||
}
|
||||
this.localDomain = new UserID(await this.mjolnir.client.getUserId()).domain;
|
||||
this.protectedRoomId = `!room1:${this.localDomain}`;
|
||||
this.mjolnir.addProtectedRoom(this.protectedRoomId);
|
||||
|
||||
this.simulateLag = async (senders: string[], lag: number, start: Date) => {
|
||||
const content = {};
|
||||
const origin_server_ts = start.getTime() - lag;
|
||||
for (let i = 0; i < SAMPLE_SIZE; ++i) {
|
||||
// We call directly `this.detector.handleEvent` to be able to forge old values of `origin_server_ts`.
|
||||
await this.detector.handleEvent(this.mjolnir, this.protectedRoomId, {
|
||||
sender: senders[i % senders.length],
|
||||
origin_server_ts,
|
||||
content,
|
||||
},
|
||||
// Make sure that time progresses through histogram buckets.
|
||||
simulateDate(start, i)
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
this.getAlertEvent = async () => {
|
||||
try {
|
||||
let event = await this.mjolnir.client.getRoomStateEvent(this.mjolnir.managementRoomId, LAG_STATE_EVENT, this.protectedRoomId);
|
||||
if (Object.keys(event).length == 0) {
|
||||
// Event was redacted.
|
||||
return null;
|
||||
}
|
||||
return event;
|
||||
} catch (ex) {
|
||||
// No such event.
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
this.getCommandStatus = async () => {
|
||||
const protectedRoomReply = await getFirstReply(this.mjolnir.client, this.mjolnir.managementRoomId, () => {
|
||||
const command = `!mjolnir status protection DetectFederationLag ${this.protectedRoomId}`;
|
||||
return this.moderator.sendMessage(this.mjolnir.managementRoomId, { msgtype: 'm.text', body: command });
|
||||
});
|
||||
const globalReply = await getFirstReply(this.mjolnir.client, this.mjolnir.managementRoomId, () => {
|
||||
const command = `!mjolnir status protection DetectFederationLag *`;
|
||||
return this.moderator.sendMessage(this.mjolnir.managementRoomId, { msgtype: 'm.text', body: command });
|
||||
});
|
||||
const protectedRoomStatsStr = protectedRoomReply.content.body.match(RE_STATS)[0];
|
||||
const globalStatsStr = globalReply.content.body.match(RE_STATS)[0];
|
||||
return {
|
||||
protectedRoomStats: protectedRoomStatsStr ? JSON.parse(protectedRoomStatsStr) : null,
|
||||
globalStats: globalStatsStr ? JSON.parse(globalStatsStr) : null,
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
afterEach(async function() {
|
||||
await this.detector.cleanup();
|
||||
this.detector.dispose();
|
||||
await this.moderator?.stop();
|
||||
});
|
||||
|
||||
let simulateDate = (start: Date, progress: number = SAMPLE_SIZE) =>
|
||||
new Date(start.getTime() + 2 * progress * BUCKET_DURATION_MS / SAMPLE_SIZE);
|
||||
|
||||
it('DetectFederationLag doesn\'t detect lag when there isn\'t any', async function() {
|
||||
this.timeout(60000);
|
||||
const MULTIPLIERS = [0, 0.5, 0.9];
|
||||
|
||||
// In this test, all the events we send have a lag < {local, federated}HomeserverLagEnterWarningZoneMS.
|
||||
const start = new Date();
|
||||
|
||||
// Ensure that no alert has been emitted yet.
|
||||
assert.equal(await this.getAlertEvent(), null, "Initially, there should be no alert");
|
||||
|
||||
// First, let's send events from the local homeserver.
|
||||
const LOCAL_SENDERS = [`@local_user:${this.localDomain}`];
|
||||
for (let multiplier of MULTIPLIERS) {
|
||||
const LAG = multiplier * LOCAL_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS;
|
||||
await this.simulateLag(LOCAL_SENDERS, LAG, start);
|
||||
assert.equal(await this.getAlertEvent(), null, `We have sent lots of local pseudo-events with a small lag of ${LAG}, there should be NO alert`);
|
||||
}
|
||||
|
||||
// Three distinct remote servers should be sufficient to trigger an alert, if they all lag.
|
||||
const REMOTE_SENDERS = [
|
||||
"@user2:left.example.com",
|
||||
"@user3:right.example.com",
|
||||
"@user4:middle.example.com",
|
||||
];
|
||||
for (let multiplier of MULTIPLIERS) {
|
||||
const LAG = multiplier * FEDERATED_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS;
|
||||
await this.simulateLag(REMOTE_SENDERS, LAG, start);
|
||||
assert.equal(await this.getAlertEvent(), null, `We have sent lots of remote pseudo-events with a small lag of ${LAG}, there should be NO alert`);
|
||||
}
|
||||
|
||||
const {protectedRoomStats, globalStats} = await this.getCommandStatus();
|
||||
assert.ok(protectedRoomStats, "We should see stats for our room");
|
||||
assert.ok(protectedRoomStats.min >= 0, `min ${protectedRoomStats.min} >= 0`);
|
||||
assert.ok(protectedRoomStats.min < protectedRoomStats.max);
|
||||
assert.ok(protectedRoomStats.mean > 0);
|
||||
assert.ok(protectedRoomStats.mean < protectedRoomStats.max);
|
||||
assert.ok(protectedRoomStats.median < protectedRoomStats.max);
|
||||
assert.ok(protectedRoomStats.median > 0);
|
||||
assert.ok(protectedRoomStats.max >= MULTIPLIERS[MULTIPLIERS.length - 1] * FEDERATED_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS);
|
||||
assert.ok(protectedRoomStats.max < FEDERATED_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS);
|
||||
assert.deepEqual(globalStats, { [this.protectedRoomId]: protectedRoomStats });
|
||||
});
|
||||
|
||||
it('DetectFederationLag detects lag on local homeserver', async function() {
|
||||
this.timeout(60000);
|
||||
// In this test, all the events we send have a lag > localHomeserverLagEnterWarningZoneMS.
|
||||
const start = new Date();
|
||||
const stop = simulateDate(start);
|
||||
|
||||
// Ensure that no alert has been emitted yet.
|
||||
assert.equal(await this.getAlertEvent(), null, "Initially, there should be no alert");
|
||||
|
||||
// Simulate lagging events from the local homeserver. This should trigger an alarm.
|
||||
const SENDERS = [`@local_user_1:${this.localDomain}`];
|
||||
await this.simulateLag(SENDERS, 1.5 * LOCAL_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS, start);
|
||||
|
||||
let lagEvent = await this.getAlertEvent();
|
||||
console.debug(lagEvent);
|
||||
|
||||
assert(lagEvent, "Local lag should be reported");
|
||||
assert.equal(JSON.stringify(lagEvent.domains), JSON.stringify([this.localDomain]), "Lag report should mention only the local domain");
|
||||
assert.equal(lagEvent.roomId, this.protectedRoomId, "Lag report should mention the right room");
|
||||
assert(new Date(lagEvent.since) >= start, "Lag report should have happened since `now`");
|
||||
assert(new Date(lagEvent.since) < stop, "Lag should have been detected before the end of the bombardment");
|
||||
|
||||
{
|
||||
const {protectedRoomStats, globalStats} = await this.getCommandStatus();
|
||||
assert.ok(protectedRoomStats, "We should see stats for our room");
|
||||
assert.ok(protectedRoomStats.min >= 0, `min ${protectedRoomStats.min} >= 0`);
|
||||
assert.ok(protectedRoomStats.min < protectedRoomStats.max);
|
||||
assert.ok(protectedRoomStats.mean > 0);
|
||||
assert.ok(protectedRoomStats.mean < protectedRoomStats.max);
|
||||
assert.ok(protectedRoomStats.median < protectedRoomStats.max);
|
||||
assert.ok(protectedRoomStats.median > 0);
|
||||
assert.ok(protectedRoomStats.max >= 1.5 * LOCAL_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS);
|
||||
assert.deepEqual(globalStats, { [this.protectedRoomId]: protectedRoomStats })
|
||||
}
|
||||
|
||||
// Simulate non-lagging events from the local homeserver. After a while, this should rescind the alarm.
|
||||
// We switch to a new (pseudo-)user to simplify reading logs.
|
||||
const SENDERS_2 = [`@local_user_2:${this.localDomain}`];
|
||||
const start2 = new Date(stop.getTime() + 1_000);
|
||||
await this.simulateLag(SENDERS_2, 0.75 * LOCAL_HOMESERVER_LAG_EXIT_WARNING_ZONE_MS, start2);
|
||||
|
||||
assert.equal(await this.getAlertEvent(), null, "The alert should now be rescinded");
|
||||
|
||||
{
|
||||
const {protectedRoomStats, globalStats} = await this.getCommandStatus();
|
||||
assert.ok(protectedRoomStats, "We should see stats for our room");
|
||||
assert.ok(protectedRoomStats.min >= 0, `min ${protectedRoomStats.min} >= 0`);
|
||||
assert.ok(protectedRoomStats.min < protectedRoomStats.max);
|
||||
assert.ok(protectedRoomStats.mean > 0);
|
||||
assert.ok(protectedRoomStats.mean < protectedRoomStats.max);
|
||||
assert.ok(protectedRoomStats.median < protectedRoomStats.max);
|
||||
assert.ok(protectedRoomStats.median > 0);
|
||||
assert.ok(protectedRoomStats.max >= 0.75 * LOCAL_HOMESERVER_LAG_EXIT_WARNING_ZONE_MS);
|
||||
assert.ok(protectedRoomStats.max < FEDERATED_HOMESERVER_LAG_EXIT_WARNING_ZONE_MS);
|
||||
assert.deepEqual(globalStats, { [this.protectedRoomId]: protectedRoomStats })
|
||||
}
|
||||
});
|
||||
|
||||
it('DetectFederationLag doesn\'t report lag when only one federated homeserver lags', async function() {
|
||||
this.timeout(60000);
|
||||
// In this test, all the events we send have a lag > federatedHomeserverLagEnterWarningZoneMS.
|
||||
const start = new Date();
|
||||
|
||||
// Ensure that no alert has been emitted yet.
|
||||
assert.equal(await this.getAlertEvent(), null, "Initially, there should be no alert");
|
||||
|
||||
// First, let's send events from the local homeserver.
|
||||
const SENDERS = ["@left:left.example.com"];
|
||||
await this.simulateLag(SENDERS, 1.5 * FEDERATED_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS, start);
|
||||
|
||||
let lagEvent = await this.getAlertEvent();
|
||||
assert.equal(lagEvent, null, "With only one federated homeserver lagging, we shouldn't report any lag");
|
||||
});
|
||||
|
||||
it('DetectFederationLag reports lag when three federated homeservers lag', async function() {
|
||||
this.timeout(60000);
|
||||
// In this test, all the events we send have a lag > federatedHomeserverLagEnterWarningZoneMS.
|
||||
const start = new Date();
|
||||
const stop = simulateDate(start);
|
||||
|
||||
// Ensure that no alert has been emitted yet.
|
||||
assert.equal(await this.getAlertEvent(), null, "Initially, there should be no alert");
|
||||
|
||||
// Simulate lagging events from remote homeservers. This should trigger an alarm.
|
||||
const SENDERS = [
|
||||
"@left:left.example.com",
|
||||
"@middle:middle.example.com",
|
||||
"@right:right.example.com",
|
||||
];
|
||||
await this.simulateLag(SENDERS, 1.5 * FEDERATED_HOMESERVER_LAG_ENTER_WARNING_ZONE_MS, start);
|
||||
|
||||
let lagEvent = await this.getAlertEvent();
|
||||
console.debug(lagEvent);
|
||||
assert(lagEvent, "Local lag should be reported");
|
||||
assert.equal(JSON.stringify(lagEvent.domains.sort()), JSON.stringify(["left.example.com", "middle.example.com", "right.example.com"]), "Lag report should mention only the local domain");
|
||||
assert.equal(lagEvent.roomId, this.protectedRoomId, "Lag report should mention the right room");
|
||||
assert(new Date(lagEvent.since) >= start, "Lag report should have happened since `now`");
|
||||
assert(new Date(lagEvent.since) < stop, "Lag should have been detected before the end of the bombardment");
|
||||
|
||||
// Simulate non-lagging events from remote homeservers. After a while, this should rescind the alarm.
|
||||
// We switch to new (pseudo-)users to simplify reading logs.
|
||||
const SENDERS_2 = [
|
||||
"@left_2:left.example.com",
|
||||
"@middle_2:middle.example.com",
|
||||
"@right_2:right.example.com",
|
||||
];
|
||||
const start2 = new Date(stop.getTime() + 1_000);
|
||||
await this.simulateLag(SENDERS_2, 0.75 * FEDERATED_HOMESERVER_LAG_EXIT_WARNING_ZONE_MS, start2);
|
||||
|
||||
assert.equal(await this.getAlertEvent(), null, "The alert should now be rescinded");
|
||||
});
|
||||
});
|
@ -9,6 +9,7 @@ import { makeMjolnir, teardownManagementRoom } from "./mjolnirSetupUtils";
|
||||
export const mochaHooks = {
|
||||
beforeEach: [
|
||||
async function() {
|
||||
console.error("---- entering test", JSON.stringify(this.currentTest.title)); // Makes MatrixClient error logs a bit easier to parse.
|
||||
console.log("mochaHooks.beforeEach");
|
||||
// Sometimes it takes a little longer to register users.
|
||||
this.timeout(10000)
|
||||
@ -33,6 +34,7 @@ export const mochaHooks = {
|
||||
]);
|
||||
// remove alias from management room and leave it.
|
||||
await teardownManagementRoom(this.mjolnir.client, this.mjolnir.managementRoomId, config.managementRoom);
|
||||
console.error("---- completed test", JSON.stringify(this.currentTest.title), "\n\n"); // Makes MatrixClient error logs a bit easier to parse.
|
||||
}
|
||||
]
|
||||
};
|
||||
|
17
yarn.lock
17
yarn.lock
@ -91,6 +91,11 @@
|
||||
"@types/connect" "*"
|
||||
"@types/node" "*"
|
||||
|
||||
"@types/config@0.0.41":
|
||||
version "0.0.41"
|
||||
resolved "https://registry.yarnpkg.com/@types/config/-/config-0.0.41.tgz#c8237ab09730380801f3643beaefa077ca5f3c28"
|
||||
integrity sha512-HjXUmIld0gwvyG8MU/17QtLzOyuMX4jbGuijmS9sWsob5xxgZ/hY9cbRCaHIHqTQ3HMLhwS3F8uXq3Bt9zgzHA==
|
||||
|
||||
"@types/connect@*":
|
||||
version "3.4.35"
|
||||
resolved "https://registry.yarnpkg.com/@types/connect/-/connect-3.4.35.tgz#5fcf6ae445e4021d1fc2219a4873cc73a3bb2ad1"
|
||||
@ -122,6 +127,11 @@
|
||||
"@types/qs" "*"
|
||||
"@types/serve-static" "*"
|
||||
|
||||
"@types/html-to-text@^8.0.1":
|
||||
version "8.0.1"
|
||||
resolved "https://registry.yarnpkg.com/@types/html-to-text/-/html-to-text-8.0.1.tgz#e449513df2283b1adedc85bdc2f6b7187f32972a"
|
||||
integrity sha512-0B/OifmJYmk5r9z9+KJtGWOF0LEjbTN4D2QeCh+mAw81JkJwC83NvNWUZFEqRT5PpnjX7vX0ab1SMGcwCs3Lag==
|
||||
|
||||
"@types/istanbul-lib-coverage@*", "@types/istanbul-lib-coverage@^2.0.0":
|
||||
version "2.0.3"
|
||||
resolved "https://registry.yarnpkg.com/@types/istanbul-lib-coverage/-/istanbul-lib-coverage-2.0.3.tgz#4ba8ddb720221f432e443bd5f9117fd22cfd4762"
|
||||
@ -827,7 +837,7 @@ escalade@^3.1.1:
|
||||
resolved "https://registry.yarnpkg.com/escalade/-/escalade-3.1.1.tgz#d8cfdc7000965c5a0174b4a82eaa5c0552742e40"
|
||||
integrity sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==
|
||||
|
||||
escape-html@^1.0.3, escape-html@~1.0.3:
|
||||
escape-html@~1.0.3:
|
||||
version "1.0.3"
|
||||
resolved "https://registry.yarnpkg.com/escape-html/-/escape-html-1.0.3.tgz#0258eae4d3d0c0974de1c169188ef0051d1d1988"
|
||||
integrity sha1-Aljq5NPQwJdN4cFpGI7wBR0dGYg=
|
||||
@ -2005,6 +2015,11 @@ parent-module@^1.0.0:
|
||||
dependencies:
|
||||
callsites "^3.0.0"
|
||||
|
||||
parse-duration@^1.0.2:
|
||||
version "1.0.2"
|
||||
resolved "https://registry.yarnpkg.com/parse-duration/-/parse-duration-1.0.2.tgz#b9aa7d3a1363cc7e8845bea8fd3baf8a11df5805"
|
||||
integrity sha512-Dg27N6mfok+ow1a2rj/nRjtCfaKrHUZV2SJpEn/s8GaVUSlf4GGRCRP1c13Hj+wfPKVMrFDqLMLITkYKgKxyyg==
|
||||
|
||||
parse-srcset@^1.0.2:
|
||||
version "1.0.2"
|
||||
resolved "https://registry.yarnpkg.com/parse-srcset/-/parse-srcset-1.0.2.tgz#f2bd221f6cc970a938d88556abc589caaaa2bde1"
|
||||
|
Loading…
Reference in New Issue
Block a user