Correct regex escape logic

* Fixes #7776

Implement QRegularExpression::escape within Tools::convertToRegex to allow usage on older Qt versions.

Also wrap EXACT_MODIFIER patterns in a non-capture group to prevent misinterpreted regex.
This commit is contained in:
Patrick Sean Klein 2022-05-21 16:21:33 +02:00 committed by Jonathan White
parent 924eb6dbc4
commit e16c007d43
6 changed files with 136 additions and 18 deletions

View file

@ -1,6 +1,10 @@
/*
* Copyright (C) 2012 Felix Geyer <debfx@fobos.de>
* Copyright (C) 2017 Lennart Glauer <mail@lennart-glauer.de>
* Copyright (C) 2020 Giuseppe D'Angelo <dangelog@gmail.com>.
* Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com,
* author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com>
* Copyright (C) 2021 The Qt Company Ltd.
* Copyright (C) 2021 KeePassXC Team <team@keepassxc.org>
*
* This program is free software: you can redistribute it and/or modify
@ -296,8 +300,59 @@ namespace Tools
return true;
}
// Escape regex symbols
auto regexEscape = QRegularExpression(R"re(([-[\]{}()+.,\\\/^$#|*?]))re");
/****************************************************************************
*
* Copyright (C) 2020 Giuseppe D'Angelo <dangelog@gmail.com>.
* Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com,
* author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com>
* Copyright (C) 2021 The Qt Company Ltd. Contact: https://www.qt.io/licensing/
*
* This function is part of the QtCore module of the Qt Toolkit. And subject to the
* following licenses.
*
* GNU General Public License Usage
* Alternatively, this function may be used under the terms of the GNU
* General Public License version 2.0 or (at your option) the GNU General
* Public license version 3 or any later version approved by the KDE Free
* Qt Foundation. The licenses are as published by the Free Software
* Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
* included in the packaging of this file. Please review the following
* information to ensure the GNU General Public License requirements will
* be met: https://www.gnu.org/licenses/gpl-2.0.html and
* https://www.gnu.org/licenses/gpl-3.0.html.
*/
QString escapeRegex(const QString& str)
{
QString result;
const auto count = str.size();
result.reserve(count * 2);
// everything but [a-zA-Z0-9_] gets escaped,
// cf. perldoc -f quotemeta
for (int i = 0; i < count; ++i) {
const QChar current = str.at(i);
if (current == QChar::Null) {
// unlike Perl, a literal NUL must be escaped with
// "\\0" (backslash + 0) and not "\\\0" (backslash + NUL),
// because pcre16_compile uses a NUL-terminated string
result.append(u'\\');
result.append(u'0');
} else if ((current < u'a' || current > u'z') && (current < u'A' || current > u'Z')
&& (current < u'0' || current > u'9') && current != u'_') {
result.append(u'\\');
result.append(current);
if (current.isHighSurrogate() && i < (count - 1)) {
result.append(str.at(++i));
}
} else {
result.append(current);
}
}
result.squeeze();
return result;
}
QRegularExpression convertToRegex(const QString& string, int opts)
{
@ -305,7 +360,7 @@ namespace Tools
// Wildcard support (*, ?, |)
if (opts & RegexConvertOpts::WILDCARD_ALL || opts & RegexConvertOpts::ESCAPE_REGEX) {
pattern.replace(regexEscape, "\\\\1");
pattern = escapeRegex(pattern);
if (opts & RegexConvertOpts::WILDCARD_UNLIMITED_MATCH) {
pattern.replace("\\*", ".*");
@ -318,9 +373,9 @@ namespace Tools
}
}
// Exact modifier
if (opts & RegexConvertOpts::EXACT_MATCH) {
pattern = "^" + pattern + "$";
// Exact modifier
pattern = "^(?:" + pattern + ")$";
}
auto regex = QRegularExpression(pattern);

View file

@ -45,18 +45,33 @@ namespace Tools
QString envSubstitute(const QString& filepath,
QProcessEnvironment environment = QProcessEnvironment::systemEnvironment());
/**
* Escapes all characters in regex such that they do not receive any special treatment when used
* in a regular expression. Essentially, this function escapes any characters not in a-zA-Z0-9.
* @param regex The unescaped regular expression string.
* @return An escaped string safe to use in a regular expression.
*/
QString escapeRegex(const QString& regex);
enum RegexConvertOpts
{
DEFAULT = 0,
WILDCARD_UNLIMITED_MATCH = 0x1,
WILDCARD_SINGLE_MATCH = 0x2,
WILDCARD_LOGICAL_OR = 0x4,
WILDCARD_UNLIMITED_MATCH = 1,
WILDCARD_SINGLE_MATCH = 1 << 2,
WILDCARD_LOGICAL_OR = 1 << 3,
WILDCARD_ALL = WILDCARD_UNLIMITED_MATCH | WILDCARD_SINGLE_MATCH | WILDCARD_LOGICAL_OR,
EXACT_MATCH = 0x8,
CASE_SENSITIVE = 0x16,
ESCAPE_REGEX = 0x32,
EXACT_MATCH = 1 << 4,
CASE_SENSITIVE = 1 << 5,
ESCAPE_REGEX = 1 << 6,
};
/**
* Converts input string to a regular expression according to the options specified in opts.
* Note that, unless ESCAPE_REGEX is set, convertToRegex assumes a proper regular expression as input.
* @param string The input string. Assumed to be a proper regular expression unless ESCAPE_REGEX is set.
* @param opts Tools::RegexConvertOpts options the regex will be converted with.
* @return The regular expression built from string and opts.
*/
QRegularExpression convertToRegex(const QString& string, int opts = RegexConvertOpts::DEFAULT);
template <typename RandomAccessIterator, typename T>