pantalaimon: Add the ability to index only encrypted rooms.

Since some public rooms can be quite large downloading the room history
for such rooms can be quite resource intensive.

Replicating the whole room history locally for such rooms might be
undesirable. This patch adds the ability to only download room history
and index messages for encrypted rooms.

Do note that the search API supports searching in multiple rooms, thus
if the indexing is restricted to encrypted rooms a search across
multiple rooms will need to make a search request to the server as well
as a local search. This mode is currently unsupported.
This commit is contained in:
Damir Jelić 2019-06-17 15:23:06 +02:00
parent 83f62b0378
commit 0940b67eb8
4 changed files with 51 additions and 9 deletions

View File

@ -66,6 +66,13 @@ Homeserver, note that this will make the search much slower. If this is set to
.Nm pantalaimon
will not make any additional HTTP requests and will leave some data fields in
the search response empty. Defaults to "Off".
.It Cm IndexEncryptedOnly
A configuration option to decide if
.Nm pantalaimon
should fetch the history for
unencrytped rooms as well as for encrypted ones. If True, only the history for
encrypted rooms is fetched and indexed. Search requests for non-encrypted
rooms are forwarded to the Homeserver.
.El
.Pp
Aditional to the homeserver section a special section with the name

View File

@ -180,6 +180,9 @@ class PanClient(AsyncClient):
display_name = room.user_name(event.sender)
avatar_url = room.avatar_url(event.sender)
if not room.encrypted and self.pan_conf.index_encrypted_only:
return
self.index.add_event(event, room.room_id, display_name, avatar_url)
@property
@ -296,13 +299,18 @@ class PanClient(AsyncClient):
self.next_batch
)
for room_id, room in response.rooms.join.items():
if room.timeline.limited:
for room_id, room_info in response.rooms.join.items():
if room_info.timeline.limited:
room = self.rooms[room_id]
if not room.encrypted and self.pan_conf.index_encrypted_only:
continue
logger.info("Room {} had a limited timeline, queueing "
"room for history fetching.".format(
self.rooms[room_id].display_name
room.display_name
))
task = FetchTask(room_id, room.timeline.prev_batch)
task = FetchTask(room_id, room_info.timeline.prev_batch)
self.pan_store.save_fetcher_task(self.server_name,
self.user_id, task)
@ -701,7 +709,6 @@ class PanClient(AsyncClient):
event_dict["context"]["start"] = context.start
event_dict["context"]["end"] = context.end
validate_json(search_terms, SEARCH_TERMS_SCHEMA)
search_terms = search_terms["search_categories"]["room_events"]
term = search_terms["search_term"]

View File

@ -35,6 +35,7 @@ class PanConfigParser(configparser.ConfigParser):
"Notifications": "on",
"UseKeyring": "yes",
"SearchRequests": "off",
"IndexEncryptedOnly": "True",
},
converters={
"address": parse_address,
@ -114,6 +115,7 @@ class ServerConfig:
ignore_verification = attr.ib(type=bool, default=False)
keyring = attr.ib(type=bool, default=True)
search_requests = attr.ib(type=bool, default=False)
index_encrypted_only = attr.ib(type=bool, default=True)
@attr.s
@ -171,6 +173,7 @@ class PanConfig:
keyring = section.getboolean("UseKeyring")
proxy = section.geturl("Proxy")
search_requests = section.getboolean("SearchRequests")
index_encrypted_only = section.getboolean("IndexEncryptedOnly")
listen_tuple = (listen_address, listen_port)
@ -189,7 +192,8 @@ class PanConfig:
ssl,
ignore_verification,
keyring,
search_requests
search_requests,
index_encrypted_only
)
self.servers[section_name] = server_conf

View File

@ -29,7 +29,8 @@ from nio import (Api, EncryptionError, LoginResponse, OlmTrustError,
SendRetryError)
from pantalaimon.client import (InvalidLimit, InvalidOrderByError, PanClient,
UnknownRoomError)
UnknownRoomError, SEARCH_TERMS_SCHEMA,
validate_json)
from pantalaimon.index import InvalidQueryError
from pantalaimon.log import logger
from pantalaimon.store import ClientInfo, PanStore
@ -932,12 +933,12 @@ class ProxyDaemon:
return self._unknown_token
try:
search_categories = await request.json()
content = await request.json()
except (JSONDecodeError, ContentTypeError):
return self._not_json
try:
result = await client.search(search_categories)
validate_json(content, SEARCH_TERMS_SCHEMA)
except ValidationError:
return web.json_response(
{
@ -946,6 +947,29 @@ class ProxyDaemon:
},
status=400,
)
# If we're indexing only encrypted rooms check if the search request is
# for an encrypted room, if it isn't forward it to the server.
# TODO if the search request contains no rooms, that is a search in all
# rooms or a mix of encrypted and unencrypted rooms we need to combine
# search a local search with a remote search.
if self.conf.index_encrypted_only:
s_filter = content["search_categories"]["room_events"]["filter"]
rooms = s_filter.get("rooms", list(client.rooms))
for room_id in rooms:
try:
room = client.rooms[room_id]
if room.encrypted:
break
except KeyError:
return await self.forward_to_web(request)
else:
return await self.forward_to_web(request)
try:
result = await client.search(content)
except (InvalidOrderByError, InvalidLimit, InvalidQueryError) as e:
return web.json_response(
{