From 8a92cec03f3f97bfb74680a946345cc784b7b587 Mon Sep 17 00:00:00 2001 From: Felix Geyer Date: Tue, 2 Feb 2016 00:38:58 +0100 Subject: [PATCH] Keep valid surrogate pairs in stripInvalidXml10Chars(). --- src/format/KeePass2XmlWriter.cpp | 16 +++++++--- tests/TestKeePass2XmlReader.cpp | 54 ++++++++++++++++++++++++++++++-- tests/TestKeePass2XmlReader.h | 1 + 3 files changed, 64 insertions(+), 7 deletions(-) diff --git a/src/format/KeePass2XmlWriter.cpp b/src/format/KeePass2XmlWriter.cpp index 0d83935a0..1eb8dc114 100644 --- a/src/format/KeePass2XmlWriter.cpp +++ b/src/format/KeePass2XmlWriter.cpp @@ -552,11 +552,19 @@ QString KeePass2XmlWriter::colorPartToString(int value) QString KeePass2XmlWriter::stripInvalidXml10Chars(QString str) { for (int i = str.size() - 1; i >= 0; i--) { - const ushort uc = str.at(i).unicode(); + const QChar ch = str.at(i); + const ushort uc = ch.unicode(); - if ((uc < 0x20 && uc != 0x09 && uc != 0x0A && uc != 0x0D) - || (uc > 0xD7FF && uc < 0xE000) - || (uc > 0xFFFD)) + if (ch.isLowSurrogate() && i != 0 && str.at(i - 1).isHighSurrogate()) { + // keep valid surrogate pair + i--; + } + else if ((uc < 0x20 && uc != 0x09 && uc != 0x0A && uc != 0x0D) // control chracters + || (uc >= 0x7F && uc <= 0x84) // control chracters, valid but discouraged by XML + || (uc >= 0x86 && uc <= 0x9F) // control chracters, valid but discouraged by XML + || (uc > 0xFFFD) // noncharacter + || ch.isLowSurrogate() // single low surrogate + || ch.isHighSurrogate()) // single high surrogate { qWarning("Stripping invalid XML 1.0 codepoint %x", uc); str.remove(i, 1); diff --git a/tests/TestKeePass2XmlReader.cpp b/tests/TestKeePass2XmlReader.cpp index f8ca29614..dd8132708 100644 --- a/tests/TestKeePass2XmlReader.cpp +++ b/tests/TestKeePass2XmlReader.cpp @@ -68,6 +68,18 @@ QDateTime TestKeePass2XmlReader::genDT(int year, int month, int day, int hour, i return QDateTime(date, time, Qt::UTC); } +QByteArray TestKeePass2XmlReader::strToBytes(const QString& str) +{ + QByteArray result; + + for (int i = 0; i < str.size(); i++) { + result.append(str.at(i).unicode() >> 8); + result.append(str.at(i).unicode() & 0xFF); + } + + return result; +} + void TestKeePass2XmlReader::initTestCase() { QVERIFY(Crypto::init()); @@ -414,10 +426,35 @@ void TestKeePass2XmlReader::testInvalidXmlChars() { QScopedPointer dbWrite(new Database()); + QString strPlainInvalid = QString().append(QChar(0x02)).append(QChar(0x19)) + .append(QChar(0xFFFE)).append(QChar(0xFFFF)); + QString strPlainValid = QString().append(QChar(0x09)).append(QChar(0x0A)) + .append(QChar(0x20)).append(QChar(0xD7FF)) + .append(QChar(0xE000)).append(QChar(0xFFFD)); + // U+10437 in UTF-16: D801 DC37 + // high low surrogate + QString strSingleHighSurrogate1 = QString().append(QChar(0xD801)); + QString strSingleHighSurrogate2 = QString().append(QChar(0x31)).append(QChar(0xD801)).append(QChar(0x32)); + QString strHighHighSurrogate = QString().append(QChar(0xD801)).append(QChar(0xD801)); + QString strSingleLowSurrogate1 = QString().append(QChar(0xDC37)); + QString strSingleLowSurrogate2 = QString().append(QChar((0x31))).append(QChar(0xDC37)).append(QChar(0x32)); + QString strLowLowSurrogate = QString().append(QChar(0xDC37)).append(QChar(0xDC37)); + QString strSurrogateValid1 = QString().append(QChar(0xD801)).append(QChar(0xDC37)); + QString strSurrogateValid2 = QString().append(QChar(0x31)).append(QChar(0xD801)).append(QChar(0xDC37)).append(QChar(0x32)); + Entry* entry = new Entry(); entry->setUuid(Uuid::random()); - entry->setNotes(QString("a %1 b %2 c %3").arg(QChar(0x02)).arg(QChar(0xD800)).arg(QChar(0xFFFE))); entry->setGroup(dbWrite->rootGroup()); + entry->attributes()->set("PlainInvalid", strPlainInvalid); + entry->attributes()->set("PlainValid", strPlainValid); + entry->attributes()->set("SingleHighSurrogate1", strSingleHighSurrogate1); + entry->attributes()->set("SingleHighSurrogate2", strSingleHighSurrogate2); + entry->attributes()->set("HighHighSurrogate", strHighHighSurrogate); + entry->attributes()->set("SingleLowSurrogate1", strSingleLowSurrogate1); + entry->attributes()->set("SingleLowSurrogate2", strSingleLowSurrogate2); + entry->attributes()->set("LowLowSurrogate", strLowLowSurrogate); + entry->attributes()->set("SurrogateValid1", strSurrogateValid1); + entry->attributes()->set("SurrogateValid2", strSurrogateValid2); QBuffer buffer; buffer.open(QIODevice::ReadWrite); @@ -435,8 +472,19 @@ void TestKeePass2XmlReader::testInvalidXmlChars() QVERIFY(!reader.hasError()); QVERIFY(!dbRead.isNull()); QCOMPARE(dbRead->rootGroup()->entries().size(), 1); - // check that the invalid codepoints have been stripped - QCOMPARE(dbRead->rootGroup()->entries().first()->notes(), QString("a b c ")); + Entry* entryRead = dbRead->rootGroup()->entries().at(0); + EntryAttributes* attrRead = entryRead->attributes(); + + QCOMPARE(strToBytes(attrRead->value("PlainInvalid")), QByteArray()); + QCOMPARE(strToBytes(attrRead->value("PlainValid")), strToBytes(strPlainValid)); + QCOMPARE(strToBytes(attrRead->value("SingleHighSurrogate1")), QByteArray()); + QCOMPARE(strToBytes(attrRead->value("SingleHighSurrogate2")), strToBytes(QString("12"))); + QCOMPARE(strToBytes(attrRead->value("HighHighSurrogate")), QByteArray()); + QCOMPARE(strToBytes(attrRead->value("SingleLowSurrogate1")), QByteArray()); + QCOMPARE(strToBytes(attrRead->value("SingleLowSurrogate2")), strToBytes(QString("12"))); + QCOMPARE(strToBytes(attrRead->value("LowLowSurrogate")), QByteArray()); + QCOMPARE(strToBytes(attrRead->value("SurrogateValid1")), strToBytes(strSurrogateValid1)); + QCOMPARE(strToBytes(attrRead->value("SurrogateValid2")), strToBytes(strSurrogateValid2)); } void TestKeePass2XmlReader::cleanupTestCase() diff --git a/tests/TestKeePass2XmlReader.h b/tests/TestKeePass2XmlReader.h index 815dbcd65..b9be7b553 100644 --- a/tests/TestKeePass2XmlReader.h +++ b/tests/TestKeePass2XmlReader.h @@ -47,6 +47,7 @@ private Q_SLOTS: private: static QDateTime genDT(int year, int month, int day, int hour, int min, int second); + static QByteArray strToBytes(const QString& str); Database* m_db; };