Merge branch 'master' into develop

This commit is contained in:
Andrew Morgan 2022-06-28 15:19:48 +01:00
commit 6cba6a51af
5 changed files with 84 additions and 25 deletions

View File

@ -1,3 +1,24 @@
Synapse 1.61.1 (2022-06-28)
===========================
This patch release fixes a security issue regarding URL previews, affecting all prior versions of Synapse. Server administrators are encouraged to update Synapse as soon as possible. We are not aware of these vulnerabilities being exploited in the wild.
Server administrators who are unable to update Synapse may use the workarounds described in the linked GitHub Security Advisory below.
## Security advisory
The following issue is fixed in 1.61.1.
* [GHSA-22p3-qrh9-cx32](https://github.com/matrix-org/synapse/security/advisories/GHSA-22p3-qrh9-cx32) / [CVE-2022-31052](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-31052)
Synapse instances with the [`url_preview_enabled`](https://matrix-org.github.io/synapse/v1.61/usage/configuration/config_documentation.html#media-store) homeserver config option set to `true` are affected. URL previews of some web pages can lead to unbounded recursion, causing the request to either fail, or in some cases crash the running Synapse process.
Requesting URL previews requires authentication. Nevertheless, it is possible to exploit this maliciously, either by malicious users on the homeserver, or by remote users sending URLs that a local user's client may automatically request a URL preview for.
Homeservers with the `url_preview_enabled` configuration option set to `false` (the default) are unaffected. Instances with the `enable_media_repo` configuration option set to `false` are also unaffected, as this also disables URL preview functionality.
Fixed by [fa1308061802ac7b7d20e954ba7372c5ac292333](https://github.com/matrix-org/synapse/commit/fa1308061802ac7b7d20e954ba7372c5ac292333).
Synapse 1.61.0 (2022-06-14) Synapse 1.61.0 (2022-06-14)
=========================== ===========================

6
debian/changelog vendored
View File

@ -1,3 +1,9 @@
matrix-synapse-py3 (1.61.1) stable; urgency=medium
* New Synapse release 1.61.1.
-- Synapse Packaging team <packages@matrix.org> Tue, 28 Jun 2022 14:33:46 +0100
matrix-synapse-py3 (1.61.0) stable; urgency=medium matrix-synapse-py3 (1.61.0) stable; urgency=medium
* New Synapse release 1.61.0. * New Synapse release 1.61.0.

View File

@ -54,7 +54,7 @@ skip_gitignore = true
[tool.poetry] [tool.poetry]
name = "matrix-synapse" name = "matrix-synapse"
version = "1.61.0" version = "1.61.1"
description = "Homeserver for the Matrix decentralised comms protocol" description = "Homeserver for the Matrix decentralised comms protocol"
authors = ["Matrix.org Team and Contributors <packages@matrix.org>"] authors = ["Matrix.org Team and Contributors <packages@matrix.org>"]
license = "Apache-2.0" license = "Apache-2.0"

View File

@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import codecs import codecs
import itertools
import logging import logging
import re import re
from typing import ( from typing import (
@ -21,7 +20,7 @@ from typing import (
Dict, Dict,
Generator, Generator,
Iterable, Iterable,
Optional, List, Optional,
Set, Set,
Union, Union,
) )
@ -354,7 +353,7 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]:
from lxml import etree from lxml import etree
TAGS_TO_REMOVE = ( TAGS_TO_REMOVE = {
"header", "header",
"nav", "nav",
"aside", "aside",
@ -369,31 +368,42 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]:
"img", "img",
"picture", "picture",
etree.Comment, etree.Comment,
) }
# Split all the text nodes into paragraphs (by splitting on new # Split all the text nodes into paragraphs (by splitting on new
# lines) # lines)
text_nodes = ( text_nodes = (
re.sub(r"\s+", "\n", el).strip() re.sub(r"\s+", "\n", el).strip()
for el in _iterate_over_text(tree.find("body"), *TAGS_TO_REMOVE) for el in _iterate_over_text(tree.find("body"), TAGS_TO_REMOVE)
) )
return summarize_paragraphs(text_nodes) return summarize_paragraphs(text_nodes)
def _iterate_over_text( def _iterate_over_text(
tree: "etree.Element", *tags_to_ignore: Union[str, "etree.Comment"] tree: Optional["etree.Element"],
tags_to_ignore: Set[Union[str, "etree.Comment"]],
stack_limit: int = 1024,
) -> Generator[str, None, None]: ) -> Generator[str, None, None]:
"""Iterate over the tree returning text nodes in a depth first fashion, """Iterate over the tree returning text nodes in a depth first fashion,
skipping text nodes inside certain tags. skipping text nodes inside certain tags.
Args:
tree: The parent element to iterate. Can be None if there isn't one.
tags_to_ignore: Set of tags to ignore
stack_limit: Maximum stack size limit for depth-first traversal.
Nodes will be dropped if this limit is hit, which may truncate the
textual result.
Intended to limit the maximum working memory when generating a preview.
""" """
# This is basically a stack that we extend using itertools.chain.
# This will either consist of an element to iterate over *or* a string if tree is None:
return
# This is a stack whose items are elements to iterate over *or* strings
# to be returned. # to be returned.
elements = iter([tree]) elements: List[Union[str, "etree.Element"]] = [tree]
while True: while elements:
el = next(elements, None) el = elements.pop()
if el is None:
return
if isinstance(el, str): if isinstance(el, str):
yield el yield el
@ -407,17 +417,22 @@ def _iterate_over_text(
if el.text: if el.text:
yield el.text yield el.text
# We add to the stack all the elements children, interspersed with # We add to the stack all the element's children, interspersed with
# each child's tail text (if it exists). The tail text of a node # each child's tail text (if it exists).
# is text that comes *after* the node, so we always include it even #
# if we ignore the child node. # We iterate in reverse order so that earlier pieces of text appear
elements = itertools.chain( # closer to the top of the stack.
itertools.chain.from_iterable( # Basically a flatmap for child in el.iterchildren(reversed=True):
[child, child.tail] if child.tail else [child] if len(elements) > stack_limit:
for child in el.iterchildren() # We've hit our limit for working memory
), break
elements,
) if child.tail:
# The tail text of a node is text that comes *after* the node,
# so we always include it even if we ignore the child node.
elements.append(child.tail)
elements.append(child)
def summarize_paragraphs( def summarize_paragraphs(

View File

@ -411,6 +411,23 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
}, },
) )
def test_nested_nodes(self) -> None:
"""A body with some nested nodes. Tests that we iterate over children
in the right order (and don't reverse the order of the text)."""
html = b"""
<a href="somewhere">Welcome <b>the bold <u>and underlined text <svg>
with a cheeky SVG</svg></u> and <strong>some</strong> tail text</b></a>
"""
tree = decode_body(html, "http://example.com/test.html")
og = parse_html_to_open_graph(tree)
self.assertEqual(
og,
{
"og:title": None,
"og:description": "Welcome\n\nthe bold\n\nand underlined text\n\nand\n\nsome\n\ntail text",
},
)
class MediaEncodingTestCase(unittest.TestCase): class MediaEncodingTestCase(unittest.TestCase):
def test_meta_charset(self) -> None: def test_meta_charset(self) -> None: