From bccb18225baac1840799431abf670ec0f497538b Mon Sep 17 00:00:00 2001 From: Emi Simpson Date: Sun, 21 Jun 2020 13:56:11 -0400 Subject: [PATCH 1/8] Added unconfigurable wordlist protection --- src/protections/WordList.ts | 88 ++++++++++++++++++++++++++++++++++ src/protections/protections.ts | 6 +++ 2 files changed, 94 insertions(+) create mode 100644 src/protections/WordList.ts diff --git a/src/protections/WordList.ts b/src/protections/WordList.ts new file mode 100644 index 0000000..7c2842b --- /dev/null +++ b/src/protections/WordList.ts @@ -0,0 +1,88 @@ +/* +Copyright 2019, 2020 The Matrix.org Foundation C.I.C. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import { IProtection } from "./IProtection"; +import { Mjolnir } from "../Mjolnir"; +import { LogLevel, LogService } from "matrix-bot-sdk"; +import { logMessage } from "../LogProxy"; +import config from "../config"; +import { isTrueJoinEvent } from "../utils"; + +export class WordList implements IProtection { + + private justJoined: { [roomId: string]: { [username: string]: Date} } = {}; + private badWords: RegExp = new RegExp(/.*(poopyhead).*/i) + + constructor() { + } + + public get name(): string { + return 'WordList'; + } + + public async handleEvent(mjolnir: Mjolnir, roomId: string, event: any): Promise { + if (!this.justJoined[roomId]) this.justJoined[roomId] = {}; + + const content = event['content'] || {}; + + // When a new member logs in, store the time they joined. This will be useful + // when we need to check if a message was sent within 20 minutes of joining + if (event['type'] === 'm.room.member') { + if (isTrueJoinEvent(event)) { + const now = new Date(); + this.justJoined[roomId][event['state_key']] = now; + LogService.info("WordList", `${event['state_key']} joined ${roomId} at ${now.toDateString()}`); + } else if (content['membership'] == 'leave' || content['membership'] == 'ban') { + delete this.justJoined[roomId][event['sender']] + } + + return; // stop processing (membership event spam is another problem) + } + + if (event['type'] === 'm.room.message') { + const message = content['formatted_body'] || content['body'] || null; + + const joinTime = this.justJoined[roomId][event['sender']] + if (joinTime) { // Disregard if the user isn't recently joined + + // Check if they did join recently, was it within 20 minutes + const now = new Date(); + if (now.valueOf() - joinTime.valueOf() > 20 * 60 * 1000) { + delete this.justJoined[roomId][event['sender']] // Remove the user + LogService.info("WordList", `${event['sender']} is no longer considered suspect`); + return + } + + // Perform the test + if (message && this.badWords.test(message)) { + await logMessage(LogLevel.WARN, "WordList", `Banning ${event['sender']} for word list violation in ${roomId}.`); + if (!config.noop) { + await mjolnir.client.banUser(event['sender'], roomId, "Word list violation"); + } else { + await logMessage(LogLevel.WARN, "WordList", `Tried to ban ${event['sender']} in ${roomId} but Mjolnir is running in no-op mode`, roomId); + } + + // Redact the event + if (!config.noop) { + await mjolnir.client.redactEvent(roomId, event['event_id'], "spam"); + } else { + await logMessage(LogLevel.WARN, "WordList", `Tried to redact ${event['event_id']} in ${roomId} but Mjolnir is running in no-op mode`, roomId); + } + } + } + } + } +} diff --git a/src/protections/protections.ts b/src/protections/protections.ts index 894a221..612a1b2 100644 --- a/src/protections/protections.ts +++ b/src/protections/protections.ts @@ -17,6 +17,7 @@ limitations under the License. import { FirstMessageIsImage } from "./FirstMessageIsImage"; import { IProtection } from "./IProtection"; import { BasicFlooding, MAX_PER_MINUTE } from "./BasicFlooding"; +import { WordList } from "./WordList"; export const PROTECTIONS: PossibleProtections = { [new FirstMessageIsImage().name]: { @@ -28,6 +29,11 @@ export const PROTECTIONS: PossibleProtections = { description: "If a user posts more than " + MAX_PER_MINUTE + " messages in 60s they'll be " + "banned for spam. This does not publish the ban to any of your ban lists.", factory: () => new BasicFlooding(), + }, + [new WordList().name]: { + description: "If a user posts a monitored word within 20 minutes of joining, they " + + "will be banned from that room. This will not publish the ban to a ban list.", + factory: () => new WordList(), } }; From c2fbf0934acfa57ff64d453fefa436eced364a12 Mon Sep 17 00:00:00 2001 From: Emi Simpson Date: Sun, 21 Jun 2020 15:18:34 -0400 Subject: [PATCH 2/8] Added configuration for wordlist protection --- config/default.yaml | 18 ++++++++ src/config.ts | 12 ++++++ src/protections/WordList.ts | 86 ++++++++++++++++++++++--------------- 3 files changed, 82 insertions(+), 34 deletions(-) diff --git a/config/default.yaml b/config/default.yaml index 9b91a02..d7efb97 100644 --- a/config/default.yaml +++ b/config/default.yaml @@ -107,6 +107,24 @@ commands: additionalPrefixes: - "mjolnir_bot" +# Configuration specific to certain toggleable protections +protections: + # Configuration for the wordlist plugin, which can ban users based if they say certain + # blocked words shortly after joining. + wordlist: + # A list of words which should be monitored by the bot. These will match if any part + # of the word is present in the message in any case. e.g. "poop" also matches + # "poOPYHEad". Additionally, regular expressions can be used. + words: + - "nigger" + - "faggot" + - "tranny" + - "retard" + # How long after a user joins the server should the bot monitor their messages. After + # this time, users can say words from the wordlist without being banned automatically. + # Set to zero to disable (users will always be banned if they say a bad word) + minutesBeforeTrusting: 20 + # Options for monitoring the health of the bot health: # healthz options. These options are best for use in container environments diff --git a/src/config.ts b/src/config.ts index 0673a1c..d24321e 100644 --- a/src/config.ts +++ b/src/config.ts @@ -43,6 +43,12 @@ interface IConfig { allowNoPrefix: boolean; additionalPrefixes: string[]; }; + protections: { + wordlist: { + words: string[]; + minutesBeforeTrusting: number; + }; + }; health: { healthz: { enabled: boolean; @@ -89,6 +95,12 @@ const defaultConfig: IConfig = { allowNoPrefix: false, additionalPrefixes: [], }, + protections: { + wordlist: { + words: ["nigger", "faggot", "tranny", "retard"], + minutesBeforeTrusting: 20 + } + }, health: { healthz: { enabled: false, diff --git a/src/protections/WordList.ts b/src/protections/WordList.ts index 7c2842b..2ab10ff 100644 --- a/src/protections/WordList.ts +++ b/src/protections/WordList.ts @@ -24,9 +24,14 @@ import { isTrueJoinEvent } from "../utils"; export class WordList implements IProtection { private justJoined: { [roomId: string]: { [username: string]: Date} } = {}; - private badWords: RegExp = new RegExp(/.*(poopyhead).*/i) + private badWords: RegExp; constructor() { + // Create a mega-regex from all the tiny baby regexs + this.badWords = new RegExp( + "(" + config.protections.wordlist.words.join(")|(")+ ")", + "i" + ) } public get name(): string { @@ -34,53 +39,66 @@ export class WordList implements IProtection { } public async handleEvent(mjolnir: Mjolnir, roomId: string, event: any): Promise { - if (!this.justJoined[roomId]) this.justJoined[roomId] = {}; const content = event['content'] || {}; + const mbt = config.protections.wordlist.minutesBeforeTrusting; - // When a new member logs in, store the time they joined. This will be useful - // when we need to check if a message was sent within 20 minutes of joining - if (event['type'] === 'm.room.member') { - if (isTrueJoinEvent(event)) { - const now = new Date(); - this.justJoined[roomId][event['state_key']] = now; - LogService.info("WordList", `${event['state_key']} joined ${roomId} at ${now.toDateString()}`); - } else if (content['membership'] == 'leave' || content['membership'] == 'ban') { - delete this.justJoined[roomId][event['sender']] + if (mbt > 0) { + if (!this.justJoined[roomId]) this.justJoined[roomId] = {}; + + // When a new member logs in, store the time they joined. This will be useful + // when we need to check if a message was sent within 20 minutes of joining + if (event['type'] === 'm.room.member') { + if (isTrueJoinEvent(event)) { + const now = new Date(); + this.justJoined[roomId][event['state_key']] = now; + LogService.info("WordList", `${event['state_key']} joined ${roomId} at ${now.toDateString()}`); + } else if (content['membership'] == 'leave' || content['membership'] == 'ban') { + delete this.justJoined[roomId][event['sender']] + } + + return; } - - return; // stop processing (membership event spam is another problem) } if (event['type'] === 'm.room.message') { const message = content['formatted_body'] || content['body'] || null; - const joinTime = this.justJoined[roomId][event['sender']] - if (joinTime) { // Disregard if the user isn't recently joined + // Check conditions first + if (mbt > 0) { + const joinTime = this.justJoined[roomId][event['sender']] + if (joinTime) { // Disregard if the user isn't recently joined - // Check if they did join recently, was it within 20 minutes - const now = new Date(); - if (now.valueOf() - joinTime.valueOf() > 20 * 60 * 1000) { - delete this.justJoined[roomId][event['sender']] // Remove the user - LogService.info("WordList", `${event['sender']} is no longer considered suspect`); + // Check if they did join recently, was it within the timeframe + const now = new Date(); + if (now.valueOf() - joinTime.valueOf() > mbt * 60 * 1000) { + delete this.justJoined[roomId][event['sender']] // Remove the user + LogService.info("WordList", `${event['sender']} is no longer considered suspect`); + return + } + + } else { + // The user isn't in the recently joined users list, no need to keep + // looking return } + } - // Perform the test - if (message && this.badWords.test(message)) { - await logMessage(LogLevel.WARN, "WordList", `Banning ${event['sender']} for word list violation in ${roomId}.`); - if (!config.noop) { - await mjolnir.client.banUser(event['sender'], roomId, "Word list violation"); - } else { - await logMessage(LogLevel.WARN, "WordList", `Tried to ban ${event['sender']} in ${roomId} but Mjolnir is running in no-op mode`, roomId); - } - // Redact the event - if (!config.noop) { - await mjolnir.client.redactEvent(roomId, event['event_id'], "spam"); - } else { - await logMessage(LogLevel.WARN, "WordList", `Tried to redact ${event['event_id']} in ${roomId} but Mjolnir is running in no-op mode`, roomId); - } + // Perform the test + if (message && this.badWords.test(message)) { + await logMessage(LogLevel.WARN, "WordList", `Banning ${event['sender']} for word list violation in ${roomId}.`); + if (!config.noop) { + await mjolnir.client.banUser(event['sender'], roomId, "Word list violation"); + } else { + await logMessage(LogLevel.WARN, "WordList", `Tried to ban ${event['sender']} in ${roomId} but Mjolnir is running in no-op mode`, roomId); + } + + // Redact the event + if (!config.noop) { + await mjolnir.client.redactEvent(roomId, event['event_id'], "spam"); + } else { + await logMessage(LogLevel.WARN, "WordList", `Tried to redact ${event['event_id']} in ${roomId} but Mjolnir is running in no-op mode`, roomId); } } } From 1e47be497b7e59a9ffc2a2a4582b3e1c617bdaf0 Mon Sep 17 00:00:00 2001 From: Emi Simpson Date: Sun, 21 Jun 2020 15:21:50 -0400 Subject: [PATCH 3/8] Fix lint Signed-off-by: Emi Simpson --- src/protections/WordList.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/protections/WordList.ts b/src/protections/WordList.ts index 2ab10ff..80c1701 100644 --- a/src/protections/WordList.ts +++ b/src/protections/WordList.ts @@ -29,7 +29,7 @@ export class WordList implements IProtection { constructor() { // Create a mega-regex from all the tiny baby regexs this.badWords = new RegExp( - "(" + config.protections.wordlist.words.join(")|(")+ ")", + "(" + config.protections.wordlist.words.join(")|(") + ")", "i" ) } @@ -53,7 +53,7 @@ export class WordList implements IProtection { const now = new Date(); this.justJoined[roomId][event['state_key']] = now; LogService.info("WordList", `${event['state_key']} joined ${roomId} at ${now.toDateString()}`); - } else if (content['membership'] == 'leave' || content['membership'] == 'ban') { + } else if (content['membership'] === 'leave' || content['membership'] === 'ban') { delete this.justJoined[roomId][event['sender']] } From 6fa93e3bae136bac5930540c69a7ab9236a02caa Mon Sep 17 00:00:00 2001 From: Emi Tatsuo Date: Sun, 21 Jun 2020 16:26:51 -0400 Subject: [PATCH 4/8] Corrected description of WordList to be time-independant --- src/protections/protections.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/protections/protections.ts b/src/protections/protections.ts index 612a1b2..ba81b5d 100644 --- a/src/protections/protections.ts +++ b/src/protections/protections.ts @@ -31,7 +31,7 @@ export const PROTECTIONS: PossibleProtections = { factory: () => new BasicFlooding(), }, [new WordList().name]: { - description: "If a user posts a monitored word within 20 minutes of joining, they " + + description: "If a user posts a monitored word a set amount of time after joining, they " + "will be banned from that room. This will not publish the ban to a ban list.", factory: () => new WordList(), } From 4c0bbd845daa80cc0eb0414cbc0da06ac7920047 Mon Sep 17 00:00:00 2001 From: Emi Tatsuo Date: Sat, 31 Oct 2020 09:23:18 -0400 Subject: [PATCH 5/8] Removed default words from word list in favor of more neutral words --- config/default.yaml | 8 ++++---- src/config.ts | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/config/default.yaml b/config/default.yaml index d7efb97..5a160d2 100644 --- a/config/default.yaml +++ b/config/default.yaml @@ -116,10 +116,10 @@ protections: # of the word is present in the message in any case. e.g. "poop" also matches # "poOPYHEad". Additionally, regular expressions can be used. words: - - "nigger" - - "faggot" - - "tranny" - - "retard" + - "CaSe" + - "InSeNsAtIve" + - "WoRd" + - "LiSt" # How long after a user joins the server should the bot monitor their messages. After # this time, users can say words from the wordlist without being banned automatically. # Set to zero to disable (users will always be banned if they say a bad word) diff --git a/src/config.ts b/src/config.ts index d24321e..6dac3b7 100644 --- a/src/config.ts +++ b/src/config.ts @@ -97,7 +97,7 @@ const defaultConfig: IConfig = { }, protections: { wordlist: { - words: ["nigger", "faggot", "tranny", "retard"], + words: [], minutesBeforeTrusting: 20 } }, From 50f23520019dccca1936cefa1c88c1ebf525a22e Mon Sep 17 00:00:00 2001 From: Emi Tatsuo Date: Sat, 31 Oct 2020 09:28:50 -0400 Subject: [PATCH 6/8] Refactored to more verbose variable names; Updated copyright --- src/protections/WordList.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/protections/WordList.ts b/src/protections/WordList.ts index 80c1701..88015eb 100644 --- a/src/protections/WordList.ts +++ b/src/protections/WordList.ts @@ -1,5 +1,5 @@ /* -Copyright 2019, 2020 The Matrix.org Foundation C.I.C. +Copyright 2020 Emi Tatsuo Simpson et al. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -41,9 +41,9 @@ export class WordList implements IProtection { public async handleEvent(mjolnir: Mjolnir, roomId: string, event: any): Promise { const content = event['content'] || {}; - const mbt = config.protections.wordlist.minutesBeforeTrusting; + const minsBeforeTrusting = config.protections.wordlist.minutesBeforeTrusting; - if (mbt > 0) { + if (minsBeforeTrusting > 0) { if (!this.justJoined[roomId]) this.justJoined[roomId] = {}; // When a new member logs in, store the time they joined. This will be useful @@ -65,13 +65,13 @@ export class WordList implements IProtection { const message = content['formatted_body'] || content['body'] || null; // Check conditions first - if (mbt > 0) { + if (minsBeforeTrusting > 0) { const joinTime = this.justJoined[roomId][event['sender']] if (joinTime) { // Disregard if the user isn't recently joined // Check if they did join recently, was it within the timeframe const now = new Date(); - if (now.valueOf() - joinTime.valueOf() > mbt * 60 * 1000) { + if (now.valueOf() - joinTime.valueOf() > minsBeforeTrusting * 60 * 1000) { delete this.justJoined[roomId][event['sender']] // Remove the user LogService.info("WordList", `${event['sender']} is no longer considered suspect`); return From 44a15f78517b89438c388e56799fcdde3b89f872 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Sat, 31 Oct 2020 09:32:49 -0400 Subject: [PATCH 7/8] Corrected whitespace in config.yml --- config/default.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/config/default.yaml b/config/default.yaml index 5a160d2..74e4431 100644 --- a/config/default.yaml +++ b/config/default.yaml @@ -120,6 +120,7 @@ protections: - "InSeNsAtIve" - "WoRd" - "LiSt" + # How long after a user joins the server should the bot monitor their messages. After # this time, users can say words from the wordlist without being banned automatically. # Set to zero to disable (users will always be banned if they say a bad word) From afe1c8f5a27d4b3294ce7b1e1521d4536977c9cb Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Wed, 4 Nov 2020 12:19:32 -0700 Subject: [PATCH 8/8] Update config/default.yaml --- config/default.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/default.yaml b/config/default.yaml index 74e4431..9e9505c 100644 --- a/config/default.yaml +++ b/config/default.yaml @@ -113,8 +113,8 @@ protections: # blocked words shortly after joining. wordlist: # A list of words which should be monitored by the bot. These will match if any part - # of the word is present in the message in any case. e.g. "poop" also matches - # "poOPYHEad". Additionally, regular expressions can be used. + # of the word is present in the message in any case. e.g. "hello" also matches + # "HEllO". Additionally, regular expressions can be used. words: - "CaSe" - "InSeNsAtIve"