Correct regex escape logic

* Fixes #7776

Implement QRegularExpression::escape within Tools::convertToRegex to allow usage on older Qt versions.

Also wrap EXACT_MODIFIER patterns in a non-capture group to prevent misinterpreted regex.
This commit is contained in:
Patrick Sean Klein 2022-05-21 16:21:33 +02:00 committed by Jonathan White
parent d1d191e2b0
commit 679b93b601
6 changed files with 136 additions and 18 deletions

View File

@ -1,6 +1,10 @@
/*
* Copyright (C) 2012 Felix Geyer <debfx@fobos.de>
* Copyright (C) 2017 Lennart Glauer <mail@lennart-glauer.de>
* Copyright (C) 2020 Giuseppe D'Angelo <dangelog@gmail.com>.
* Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com,
* author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com>
* Copyright (C) 2021 The Qt Company Ltd.
* Copyright (C) 2021 KeePassXC Team <team@keepassxc.org>
*
* This program is free software: you can redistribute it and/or modify
@ -296,8 +300,59 @@ namespace Tools
return true;
}
// Escape regex symbols
auto regexEscape = QRegularExpression(R"re(([-[\]{}()+.,\\\/^$#|*?]))re");
/****************************************************************************
*
* Copyright (C) 2020 Giuseppe D'Angelo <dangelog@gmail.com>.
* Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com,
* author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com>
* Copyright (C) 2021 The Qt Company Ltd. Contact: https://www.qt.io/licensing/
*
* This function is part of the QtCore module of the Qt Toolkit. And subject to the
* following licenses.
*
* GNU General Public License Usage
* Alternatively, this function may be used under the terms of the GNU
* General Public License version 2.0 or (at your option) the GNU General
* Public license version 3 or any later version approved by the KDE Free
* Qt Foundation. The licenses are as published by the Free Software
* Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
* included in the packaging of this file. Please review the following
* information to ensure the GNU General Public License requirements will
* be met: https://www.gnu.org/licenses/gpl-2.0.html and
* https://www.gnu.org/licenses/gpl-3.0.html.
*/
QString escapeRegex(const QString& str)
{
QString result;
const auto count = str.size();
result.reserve(count * 2);
// everything but [a-zA-Z0-9_] gets escaped,
// cf. perldoc -f quotemeta
for (int i = 0; i < count; ++i) {
const QChar current = str.at(i);
if (current == QChar::Null) {
// unlike Perl, a literal NUL must be escaped with
// "\\0" (backslash + 0) and not "\\\0" (backslash + NUL),
// because pcre16_compile uses a NUL-terminated string
result.append(u'\\');
result.append(u'0');
} else if ((current < u'a' || current > u'z') && (current < u'A' || current > u'Z')
&& (current < u'0' || current > u'9') && current != u'_') {
result.append(u'\\');
result.append(current);
if (current.isHighSurrogate() && i < (count - 1)) {
result.append(str.at(++i));
}
} else {
result.append(current);
}
}
result.squeeze();
return result;
}
QRegularExpression convertToRegex(const QString& string, int opts)
{
@ -305,7 +360,7 @@ namespace Tools
// Wildcard support (*, ?, |)
if (opts & RegexConvertOpts::WILDCARD_ALL || opts & RegexConvertOpts::ESCAPE_REGEX) {
pattern.replace(regexEscape, "\\\\1");
pattern = escapeRegex(pattern);
if (opts & RegexConvertOpts::WILDCARD_UNLIMITED_MATCH) {
pattern.replace("\\*", ".*");
@ -318,9 +373,9 @@ namespace Tools
}
}
// Exact modifier
if (opts & RegexConvertOpts::EXACT_MATCH) {
pattern = "^" + pattern + "$";
// Exact modifier
pattern = "^(?:" + pattern + ")$";
}
auto regex = QRegularExpression(pattern);

View File

@ -45,18 +45,33 @@ namespace Tools
QString envSubstitute(const QString& filepath,
QProcessEnvironment environment = QProcessEnvironment::systemEnvironment());
/**
* Escapes all characters in regex such that they do not receive any special treatment when used
* in a regular expression. Essentially, this function escapes any characters not in a-zA-Z0-9.
* @param regex The unescaped regular expression string.
* @return An escaped string safe to use in a regular expression.
*/
QString escapeRegex(const QString& regex);
enum RegexConvertOpts
{
DEFAULT = 0,
WILDCARD_UNLIMITED_MATCH = 0x1,
WILDCARD_SINGLE_MATCH = 0x2,
WILDCARD_LOGICAL_OR = 0x4,
WILDCARD_UNLIMITED_MATCH = 1,
WILDCARD_SINGLE_MATCH = 1 << 2,
WILDCARD_LOGICAL_OR = 1 << 3,
WILDCARD_ALL = WILDCARD_UNLIMITED_MATCH | WILDCARD_SINGLE_MATCH | WILDCARD_LOGICAL_OR,
EXACT_MATCH = 0x8,
CASE_SENSITIVE = 0x16,
ESCAPE_REGEX = 0x32,
EXACT_MATCH = 1 << 4,
CASE_SENSITIVE = 1 << 5,
ESCAPE_REGEX = 1 << 6,
};
/**
* Converts input string to a regular expression according to the options specified in opts.
* Note that, unless ESCAPE_REGEX is set, convertToRegex assumes a proper regular expression as input.
* @param string The input string. Assumed to be a proper regular expression unless ESCAPE_REGEX is set.
* @param opts Tools::RegexConvertOpts options the regex will be converted with.
* @return The regular expression built from string and opts.
*/
QRegularExpression convertToRegex(const QString& string, int opts = RegexConvertOpts::DEFAULT);
template <typename RandomAccessIterator, typename T>

View File

@ -224,7 +224,7 @@ void TestEntrySearcher::testSearchTermParser()
QCOMPARE(terms.length(), 2);
QCOMPARE(terms[0].field, EntrySearcher::Field::Url);
QCOMPARE(terms[0].regex.pattern(), QString("^.*\\.google\\.com$"));
QCOMPARE(terms[0].regex.pattern(), QString("^(?:.*\\.google\\.com)$"));
QCOMPARE(terms[1].field, EntrySearcher::Field::Username);
QCOMPARE(terms[1].regex.pattern(), QString("\\d+\\w{2}"));
@ -237,7 +237,7 @@ void TestEntrySearcher::testSearchTermParser()
QCOMPARE(terms[0].field, EntrySearcher::Field::AttributeValue);
QCOMPARE(terms[0].word, QString("abc"));
QCOMPARE(terms[0].regex.pattern(), QString("^efg$"));
QCOMPARE(terms[0].regex.pattern(), QString("^(?:efg)$"));
QCOMPARE(terms[1].field, EntrySearcher::Field::AttributeValue);
QCOMPARE(terms[1].word, QString("def"));

View File

@ -82,6 +82,11 @@ void TestFdoSecrets::testSpecialCharsInAttributeValue()
QCOMPARE(res.count(), 1);
QCOMPARE(res[0]->title(), QStringLiteral("titleB"));
}
{
const auto term = Collection::attributeToTerm("testAttribute", "v|");
const auto res = EntrySearcher().search({term}, root.data());
QCOMPARE(res.count(), 0);
}
}
void TestFdoSecrets::testDBusPathParse()

View File

@ -165,6 +165,34 @@ void TestTools::testBackupFilePatternSubstitution()
QCOMPARE(Tools::substituteBackupFilePath(pattern, dbFilePath), expectedSubstitution);
}
void TestTools::testEscapeRegex_data()
{
QTest::addColumn<QString>("input");
QTest::addColumn<QString>("expected");
QString all_regular_characters = "0123456789";
for (char c = 'a'; c != 'z'; ++c) {
all_regular_characters += QChar::fromLatin1(c);
}
for (char c = 'A'; c != 'Z'; ++c) {
all_regular_characters += QChar::fromLatin1(c);
}
QTest::newRow("Regular characters should not be escaped") << all_regular_characters << all_regular_characters;
QTest::newRow("Special characters should be escaped") << R"(.^$*+-?()[]{}|\)"
<< R"(\.\^\$\*\+\-\?\(\)\[\]\{\}\|\\)";
QTest::newRow("Null character") << QString::fromLatin1("ab\0c", 4) << "ab\\0c";
}
void TestTools::testEscapeRegex()
{
QFETCH(QString, input);
QFETCH(QString, expected);
auto actual = Tools::escapeRegex(input);
QCOMPARE(actual, expected);
}
void TestTools::testConvertToRegex()
{
QFETCH(QString, input);
@ -185,16 +213,29 @@ void TestTools::testConvertToRegex_data()
QTest::newRow("No Options") << input << static_cast<int>(Tools::RegexConvertOpts::DEFAULT)
<< QString(R"(te|st*t?[5]^(test);',.)");
// Escape regex
QTest::newRow("Escape Regex") << input << static_cast<int>(Tools::RegexConvertOpts::ESCAPE_REGEX)
<< Tools::escapeRegex(input);
QTest::newRow("Escape Regex and exact match")
<< input << static_cast<int>(Tools::RegexConvertOpts::ESCAPE_REGEX | Tools::RegexConvertOpts::EXACT_MATCH)
<< "^(?:" + Tools::escapeRegex(input) + ")$";
// Exact match does not escape the pattern
QTest::newRow("Exact Match") << input << static_cast<int>(Tools::RegexConvertOpts::EXACT_MATCH)
<< QString(R"(^te|st*t?[5]^(test);',.$)");
<< QString(R"(^(?:te|st*t?[5]^(test);',.)$)");
// Exact match with improper regex
QTest::newRow("Exact Match") << ")av(" << static_cast<int>(Tools::RegexConvertOpts::EXACT_MATCH)
<< QString(R"(^(?:)av()$)");
QTest::newRow("Exact Match & Wildcard")
<< input << static_cast<int>(Tools::RegexConvertOpts::EXACT_MATCH | Tools::RegexConvertOpts::WILDCARD_ALL)
<< QString(R"(^te|st.*t.\[5\]\^\(test\);'\,\.$)");
<< QString(R"(^(?:te|st.*t.\[5\]\^\(test\)\;\'\,\.)$)");
QTest::newRow("Wildcard Single Match") << input << static_cast<int>(Tools::RegexConvertOpts::WILDCARD_SINGLE_MATCH)
<< QString(R"(te\|st\*t.\[5\]\^\(test\);'\,\.)");
<< QString(R"(te\|st\*t.\[5\]\^\(test\)\;\'\,\.)");
QTest::newRow("Wildcard OR") << input << static_cast<int>(Tools::RegexConvertOpts::WILDCARD_LOGICAL_OR)
<< QString(R"(te|st\*t\?\[5\]\^\(test\);'\,\.)");
<< QString(R"(te|st\*t\?\[5\]\^\(test\)\;\'\,\.)");
QTest::newRow("Wildcard Unlimited Match")
<< input << static_cast<int>(Tools::RegexConvertOpts::WILDCARD_UNLIMITED_MATCH)
<< QString(R"(te\|st.*t\?\[5\]\^\(test\);'\,\.)");
<< QString(R"(te\|st.*t\?\[5\]\^\(test\)\;\'\,\.)");
}

View File

@ -31,6 +31,8 @@ private slots:
void testValidUuid();
void testBackupFilePatternSubstitution_data();
void testBackupFilePatternSubstitution();
void testEscapeRegex();
void testEscapeRegex_data();
void testConvertToRegex();
void testConvertToRegex_data();
};