diff --git a/config/default.yaml b/config/default.yaml index 3edc54b..9b91a02 100644 --- a/config/default.yaml +++ b/config/default.yaml @@ -106,3 +106,34 @@ commands: # as display names or prefixed with exclamation points. additionalPrefixes: - "mjolnir_bot" + +# Options for monitoring the health of the bot +health: + # healthz options. These options are best for use in container environments + # like Kubernetes to detect how healthy the service is. The bot will report + # that it is unhealthy until it is able to process user requests. Typically + # this means that it'll flag itself as unhealthy for a number of minutes + # before saying "Now monitoring rooms" and flagging itself healthy. + # + # Health is flagged through HTTP status codes, defined below. + healthz: + # Whether the healthz integration should be enabled (default false) + enabled: false + + # The port to expose the webserver on. Defaults to 8080. + port: 8080 + + # The address to listen for requests on. Defaults to all addresses. + address: "0.0.0.0" + + # The path to expose the monitoring endpoint at. Defaults to `/healthz` + endpoint: "/healthz" + + # The HTTP status code which reports that the bot is healthy/ready to + # process requests. Typically this should not be changed. Defaults to + # 200. + healthyStatus: 200 + + # The HTTP status code which reports that the bot is not healthy/ready. + # Defaults to 418. + unhealthyStatus: 418 diff --git a/src/Mjolnir.ts b/src/Mjolnir.ts index 1030a9c..fd577b5 100644 --- a/src/Mjolnir.ts +++ b/src/Mjolnir.ts @@ -26,6 +26,7 @@ import ErrorCache, { ERROR_KIND_FATAL, ERROR_KIND_PERMISSION } from "./ErrorCach import { IProtection } from "./protections/IProtection"; import { PROTECTIONS } from "./protections/protections"; import { AutomaticRedactionQueue } from "./queues/AutomaticRedactionQueue"; +import { Healthz } from "./health/healthz"; export const STATE_NOT_STARTED = "not_started"; export const STATE_CHECKING_PERMISSIONS = "checking_permissions"; @@ -169,7 +170,18 @@ export class Mjolnir { } }).then(async () => { this.currentState = STATE_RUNNING; + Healthz.isHealthy = true; await logMessage(LogLevel.INFO, "Mjolnir@startup", "Startup complete. Now monitoring rooms."); + }).catch(async err => { + try { + LogService.error("Mjolnir", "Error during startup:"); + LogService.error("Mjolnir", err); + await logMessage(LogLevel.ERROR, "Mjolnir@startup", "Startup failed due to error - see console"); + } catch (e) { + // If we failed to handle the error, just crash + console.error(e); + process.exit(1); + } }); } diff --git a/src/config.ts b/src/config.ts index fb39bd5..0673a1c 100644 --- a/src/config.ts +++ b/src/config.ts @@ -43,6 +43,16 @@ interface IConfig { allowNoPrefix: boolean; additionalPrefixes: string[]; }; + health: { + healthz: { + enabled: boolean; + port: number; + address: string; + endpoint: string; + healthyStatus: number; + unhealthyStatus: number; + }; + }; /** * Config options only set at runtime. Try to avoid using the objects @@ -79,6 +89,16 @@ const defaultConfig: IConfig = { allowNoPrefix: false, additionalPrefixes: [], }, + health: { + healthz: { + enabled: false, + port: 8080, + address: "0.0.0.0", + endpoint: "/healthz", + healthyStatus: 200, + unhealthyStatus: 418, + }, + }, // Needed to make the interface happy. RUNTIME: { diff --git a/src/health/healthz.ts b/src/health/healthz.ts new file mode 100644 index 0000000..7f00309 --- /dev/null +++ b/src/health/healthz.ts @@ -0,0 +1,41 @@ +/* +Copyright 2020 The Matrix.org Foundation C.I.C. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import config from "../config"; +import * as http from "http"; +import { LogService } from "matrix-bot-sdk"; + +export class Healthz { + private static healthCode: number; + + public static set isHealthy(val: boolean) { + Healthz.healthCode = val ? config.health.healthz.healthyStatus : config.health.healthz.unhealthyStatus; + } + + public static get isHealthy(): boolean { + return Healthz.healthCode === config.health.healthz.healthyStatus; + } + + public static listen() { + const server = http.createServer((req, res) => { + res.writeHead(Healthz.healthCode); + res.end(`health code: ${Healthz.healthCode}`); + }); + server.listen(config.health.healthz.port, config.health.healthz.address, () => { + LogService.info("Healthz", `Listening for health requests on ${config.health.healthz.address}:${config.health.healthz.port}`); + }); + } +} diff --git a/src/index.ts b/src/index.ts index 52b6744..989545a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,5 +1,5 @@ /* -Copyright 2019 The Matrix.org Foundation C.I.C. +Copyright 2019, 2020 The Matrix.org Foundation C.I.C. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -30,6 +30,7 @@ import { Mjolnir } from "./Mjolnir"; import { logMessage } from "./LogProxy"; import { MembershipEvent } from "matrix-bot-sdk/lib/models/events/MembershipEvent"; import * as htmlEscape from "escape-html"; +import { Healthz } from "./health/healthz"; config.RUNTIME = {client: null}; @@ -38,6 +39,11 @@ LogService.setLevel(LogLevel.fromString(config.logLevel, LogLevel.DEBUG)); LogService.info("index", "Starting bot..."); +Healthz.isHealthy = false; // start off unhealthy +if (config.health.healthz.enabled) { + Healthz.listen(); +} + (async function () { const storage = new SimpleFsStorageProvider(path.join(config.dataPath, "bot.json"));