From 0940b67eb83d7a819ebe8ffb88eb2826e679f41d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Damir=20Jeli=C4=87?= Date: Mon, 17 Jun 2019 15:23:06 +0200 Subject: [PATCH] pantalaimon: Add the ability to index only encrypted rooms. Since some public rooms can be quite large downloading the room history for such rooms can be quite resource intensive. Replicating the whole room history locally for such rooms might be undesirable. This patch adds the ability to only download room history and index messages for encrypted rooms. Do note that the search API supports searching in multiple rooms, thus if the indexing is restricted to encrypted rooms a search across multiple rooms will need to make a search request to the server as well as a local search. This mode is currently unsupported. --- docs/man/pantalaimon.5 | 7 +++++++ pantalaimon/client.py | 17 ++++++++++++----- pantalaimon/config.py | 6 +++++- pantalaimon/daemon.py | 30 +++++++++++++++++++++++++++--- 4 files changed, 51 insertions(+), 9 deletions(-) diff --git a/docs/man/pantalaimon.5 b/docs/man/pantalaimon.5 index 216afc7..a443ab8 100644 --- a/docs/man/pantalaimon.5 +++ b/docs/man/pantalaimon.5 @@ -66,6 +66,13 @@ Homeserver, note that this will make the search much slower. If this is set to .Nm pantalaimon will not make any additional HTTP requests and will leave some data fields in the search response empty. Defaults to "Off". +.It Cm IndexEncryptedOnly +A configuration option to decide if +.Nm pantalaimon +should fetch the history for +unencrytped rooms as well as for encrypted ones. If True, only the history for +encrypted rooms is fetched and indexed. Search requests for non-encrypted +rooms are forwarded to the Homeserver. .El .Pp Aditional to the homeserver section a special section with the name diff --git a/pantalaimon/client.py b/pantalaimon/client.py index a8e2143..bdf8ea0 100644 --- a/pantalaimon/client.py +++ b/pantalaimon/client.py @@ -180,6 +180,9 @@ class PanClient(AsyncClient): display_name = room.user_name(event.sender) avatar_url = room.avatar_url(event.sender) + if not room.encrypted and self.pan_conf.index_encrypted_only: + return + self.index.add_event(event, room.room_id, display_name, avatar_url) @property @@ -296,13 +299,18 @@ class PanClient(AsyncClient): self.next_batch ) - for room_id, room in response.rooms.join.items(): - if room.timeline.limited: + for room_id, room_info in response.rooms.join.items(): + if room_info.timeline.limited: + room = self.rooms[room_id] + + if not room.encrypted and self.pan_conf.index_encrypted_only: + continue + logger.info("Room {} had a limited timeline, queueing " "room for history fetching.".format( - self.rooms[room_id].display_name + room.display_name )) - task = FetchTask(room_id, room.timeline.prev_batch) + task = FetchTask(room_id, room_info.timeline.prev_batch) self.pan_store.save_fetcher_task(self.server_name, self.user_id, task) @@ -701,7 +709,6 @@ class PanClient(AsyncClient): event_dict["context"]["start"] = context.start event_dict["context"]["end"] = context.end - validate_json(search_terms, SEARCH_TERMS_SCHEMA) search_terms = search_terms["search_categories"]["room_events"] term = search_terms["search_term"] diff --git a/pantalaimon/config.py b/pantalaimon/config.py index 7e0f8ac..c00786d 100644 --- a/pantalaimon/config.py +++ b/pantalaimon/config.py @@ -35,6 +35,7 @@ class PanConfigParser(configparser.ConfigParser): "Notifications": "on", "UseKeyring": "yes", "SearchRequests": "off", + "IndexEncryptedOnly": "True", }, converters={ "address": parse_address, @@ -114,6 +115,7 @@ class ServerConfig: ignore_verification = attr.ib(type=bool, default=False) keyring = attr.ib(type=bool, default=True) search_requests = attr.ib(type=bool, default=False) + index_encrypted_only = attr.ib(type=bool, default=True) @attr.s @@ -171,6 +173,7 @@ class PanConfig: keyring = section.getboolean("UseKeyring") proxy = section.geturl("Proxy") search_requests = section.getboolean("SearchRequests") + index_encrypted_only = section.getboolean("IndexEncryptedOnly") listen_tuple = (listen_address, listen_port) @@ -189,7 +192,8 @@ class PanConfig: ssl, ignore_verification, keyring, - search_requests + search_requests, + index_encrypted_only ) self.servers[section_name] = server_conf diff --git a/pantalaimon/daemon.py b/pantalaimon/daemon.py index 8c1c6e5..1695b16 100755 --- a/pantalaimon/daemon.py +++ b/pantalaimon/daemon.py @@ -29,7 +29,8 @@ from nio import (Api, EncryptionError, LoginResponse, OlmTrustError, SendRetryError) from pantalaimon.client import (InvalidLimit, InvalidOrderByError, PanClient, - UnknownRoomError) + UnknownRoomError, SEARCH_TERMS_SCHEMA, + validate_json) from pantalaimon.index import InvalidQueryError from pantalaimon.log import logger from pantalaimon.store import ClientInfo, PanStore @@ -932,12 +933,12 @@ class ProxyDaemon: return self._unknown_token try: - search_categories = await request.json() + content = await request.json() except (JSONDecodeError, ContentTypeError): return self._not_json try: - result = await client.search(search_categories) + validate_json(content, SEARCH_TERMS_SCHEMA) except ValidationError: return web.json_response( { @@ -946,6 +947,29 @@ class ProxyDaemon: }, status=400, ) + + # If we're indexing only encrypted rooms check if the search request is + # for an encrypted room, if it isn't forward it to the server. + # TODO if the search request contains no rooms, that is a search in all + # rooms or a mix of encrypted and unencrypted rooms we need to combine + # search a local search with a remote search. + if self.conf.index_encrypted_only: + s_filter = content["search_categories"]["room_events"]["filter"] + rooms = s_filter.get("rooms", list(client.rooms)) + + for room_id in rooms: + try: + room = client.rooms[room_id] + if room.encrypted: + break + + except KeyError: + return await self.forward_to_web(request) + else: + return await self.forward_to_web(request) + + try: + result = await client.search(content) except (InvalidOrderByError, InvalidLimit, InvalidQueryError) as e: return web.json_response( {