Add ability to run multiple pusher instances (#7855)

This reuses the same scheme as federation sender sharding
This commit is contained in:
Erik Johnston 2020-07-16 14:06:28 +01:00 committed by GitHub
parent a827838706
commit 649a7ead5c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 293 additions and 82 deletions

View file

@ -19,9 +19,11 @@ import argparse
import errno
import os
from collections import OrderedDict
from hashlib import sha256
from textwrap import dedent
from typing import Any, MutableMapping, Optional
from typing import Any, List, MutableMapping, Optional
import attr
import yaml
@ -717,4 +719,36 @@ def find_config_files(search_paths):
return config_files
__all__ = ["Config", "RootConfig"]
@attr.s
class ShardedWorkerHandlingConfig:
"""Algorithm for choosing which instance is responsible for handling some
sharded work.
For example, the federation senders use this to determine which instances
handles sending stuff to a given destination (which is used as the `key`
below).
"""
instances = attr.ib(type=List[str])
def should_handle(self, instance_name: str, key: str) -> bool:
"""Whether this instance is responsible for handling the given key.
"""
# If multiple instances are not defined we always return true.
if not self.instances or len(self.instances) == 1:
return True
# We shard by taking the hash, modulo it by the number of instances and
# then checking whether this instance matches the instance at that
# index.
#
# (Technically this introduces some bias and is not entirely uniform,
# but since the hash is so large the bias is ridiculously small).
dest_hash = sha256(key.encode("utf8")).digest()
dest_int = int.from_bytes(dest_hash, byteorder="little")
remainder = dest_int % (len(self.instances))
return self.instances[remainder] == instance_name
__all__ = ["Config", "RootConfig", "ShardedWorkerHandlingConfig"]