From 121d4cf98f94b44ea6c2d5abe04bc06a20b4ccdd Mon Sep 17 00:00:00 2001 From: Gnuxie <50846879+Gnuxie@users.noreply.github.com> Date: Tue, 9 Aug 2022 10:57:38 +0100 Subject: [PATCH] Mjolnir would apply stale ACL to rooms during batching (#331) * banListTest would applyACL before rules appeared in `/state`. Mjolnir will call applyServerAcls several times while a policy list is being updated, sometimes concurrently. This means a request to set a server ACL in a room which has old data can finish after a more recent recent request with the correct ACL. This means that the old ACL gets applied to the rooms (for a moment). This is a follow up from https://github.com/matrix-org/mjolnir/pull/314/commits/551065815e65e6117d7e8105afd6f5da9b87c8af * Only allow one invocation of applyServerAcls at a time as to not conflict with each other by using a promise chain. We don't use the throttle queue because we don't want to be blocked by other background tasks. We don't make another throttle queue because we don't want throttling and we don't want to delay the ACL application, which can happen even with throttle time of 0. --- src/Mjolnir.ts | 9 +++++++++ src/actions/ApplyAcl.ts | 10 ++++++++++ test/integration/banListTest.ts | 11 +++++++---- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/Mjolnir.ts b/src/Mjolnir.ts index 91525f2..5070bdb 100644 --- a/src/Mjolnir.ts +++ b/src/Mjolnir.ts @@ -104,6 +104,15 @@ export class Mjolnir { private webapis: WebAPIs; private protectedRoomActivityTracker: ProtectedRoomActivityTracker; public taskQueue: ThrottlingQueue; + /** + * Used to provide mutual exclusion when synchronizing rooms with the state of a policy list. + * This is because requests operating with rules from an older version of the list that are slow + * could race & give the room an inconsistent state. An example is if we add multiple m.policy.rule.server rules, + * which would cause several requests to a room to send a new m.room.server_acl event. + * These requests could finish in any order, which has left rooms with an inconsistent server_acl event + * until Mjolnir synchronises the room with its policy lists again, which can be in the region of hours. + */ + public aclChain: Promise = Promise.resolve(); /* * Config-enabled polling of reports in Synapse, so Mjolnir can react to reports */ diff --git a/src/actions/ApplyAcl.ts b/src/actions/ApplyAcl.ts index 44767ce..13c7f4d 100644 --- a/src/actions/ApplyAcl.ts +++ b/src/actions/ApplyAcl.ts @@ -31,6 +31,16 @@ import { ERROR_KIND_FATAL, ERROR_KIND_PERMISSION } from "../ErrorCache"; * @param {Mjolnir} mjolnir The Mjolnir client to apply the ACLs with. */ export async function applyServerAcls(lists: PolicyList[], roomIds: string[], mjolnir: Mjolnir): Promise { + // we need to provide mutual exclusion so that we do not have requests updating the m.room.server_acl event + // finish out of order and therefore leave the room out of sync with the policy lists. + return new Promise((resolve, reject) => { + mjolnir.aclChain = mjolnir.aclChain + .then(() => _applyServerAcls(lists, roomIds, mjolnir)) + .then(resolve, reject); + }); +} + +async function _applyServerAcls(lists: PolicyList[], roomIds: string[], mjolnir: Mjolnir): Promise { const serverName: string = new UserID(await mjolnir.client.getUserId()).domain; // Construct a server ACL first diff --git a/test/integration/banListTest.ts b/test/integration/banListTest.ts index 349aa37..eb3984b 100644 --- a/test/integration/banListTest.ts +++ b/test/integration/banListTest.ts @@ -261,20 +261,23 @@ describe('Test: ACL updates will batch when rules are added in succession.', fun mjolnir.joinRoom(banListId); this.mjolnir!.watchList(Permalinks.forRoom(banListId)); const acl = new ServerAcl(serverName).denyIpAddresses().allowServer("*"); - for (let i = 0; i < 200; i++) { + const evilServerCount = 200; + for (let i = 0; i < evilServerCount; i++) { const badServer = `${i}.evil.com`; acl.denyServer(badServer); await createPolicyRule(moderator, banListId, RULE_SERVER, badServer, `Rule #${i}`); // Give them a bit of a spread over time. await new Promise(resolve => setTimeout(resolve, 5)); } - // give the events a chance to appear in the response to `/state`, since this is a problem. - await new Promise(resolve => setTimeout(resolve, 2000)); - // We do this because it should force us to wait until all the ACL events have been applied. // Even if that does mean the last few events will not go through batching... await this.mjolnir!.syncLists(); + // At this point we check that the state within Mjolnir is internally consistent, this is just because debugging the following + // is a pita. + const list: PolicyList = this.mjolnir.policyLists[0]!; + assert.equal(list.serverRules.length, evilServerCount, `There should be ${evilServerCount} rules in here`); + // Check each of the protected rooms for ACL events and make sure they were batched and are correct. await Promise.all(protectedRooms.map(async room => { const roomAcl = await mjolnir.getRoomStateEvent(room, "m.room.server_acl", "");