diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2013-06-08 01:56:58 (GMT) |
---|---|---|
committer | The Qt Project <gerrit-noreply@qt-project.org> | 2013-06-21 00:31:38 (GMT) |
commit | 6827a3aa2bc852152cd3353be9558c683fb3085c (patch) | |
tree | 8b56250ed0ab85005463b23385698e935f762c96 | |
parent | 154823904672bd89255dd647a2fe609f8fbd74b0 (diff) | |
download | Qt-6827a3aa2bc852152cd3353be9558c683fb3085c.zip Qt-6827a3aa2bc852152cd3353be9558c683fb3085c.tar.gz Qt-6827a3aa2bc852152cd3353be9558c683fb3085c.tar.bz2 |
QUrl stringprep: fix handling of prohibited characters
RFC 3454 says about prohibited characters (section 2, "Preparation
Overview"):
3) Prohibit -- Check for any characters that are not allowed in the
output. If any are found, return an error. This is described in
section 5.
In other words, we mustn't simply strip the output of prohibited
characters. We must generate an error if they are present. We do that by
clearing the data.
We already had tests for prohibited output, but they were
indistinguishable from being stripped. So instead add some extra
characters so that we can tell whether the label was cleared.
(cherry-picked from qtbase commit 736a052d93d9c75e51e8f3da733bc8e4a50c39ce)
Change-Id: I2d95217c27be5e2d54deed0036cb009e3b7f4886
Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
-rw-r--r-- | src/corelib/io/qurl.cpp | 25 | ||||
-rw-r--r-- | tests/auto/qurl/tst_qurl.cpp | 48 |
2 files changed, 35 insertions, 38 deletions
diff --git a/src/corelib/io/qurl.cpp b/src/corelib/io/qurl.cpp index 14b9037..3dc9568 100644 --- a/src/corelib/io/qurl.cpp +++ b/src/corelib/io/qurl.cpp @@ -2438,18 +2438,20 @@ static bool isMappedToNothing(uint uc) } -static void stripProhibitedOutput(QString *str, int from) +static bool containsProhibitedOuptut(const QString *str, int from) { - ushort *out = (ushort *)str->data() + from; - const ushort *in = out; + const ushort *in = reinterpret_cast<const ushort *>(str->begin() + from); const ushort *end = (ushort *)str->data() + str->size(); - while (in < end) { + for ( ; in < end; ++in) { uint uc = *in; if (QChar(uc).isHighSurrogate() && in < end - 1) { ushort low = *(in + 1); if (QChar(low).isLowSurrogate()) { ++in; uc = QChar::surrogateToUcs4(uc, low); + } else { + // unpaired surrogates are prohibited + return true; } } if (uc <= 0xFFFF) { @@ -2474,7 +2476,7 @@ static void stripProhibitedOutput(QString *str, int from) || (uc >= 0xFDD0 && uc <= 0xFDEF) || uc == 0xFEFF || (uc >= 0xFFF9 && uc <= 0xFFFF))) { - *out++ = *in; + continue; } } else { if (!((uc >= 0x1D173 && uc <= 0x1D17A) @@ -2498,14 +2500,12 @@ static void stripProhibitedOutput(QString *str, int from) || (uc >= 0xFFFFE && uc <= 0xFFFFF) || (uc >= 0x100000 && uc <= 0x10FFFD) || (uc >= 0x10FFFE && uc <= 0x10FFFF))) { - *out++ = QChar::highSurrogate(uc); - *out++ = QChar::lowSurrogate(uc); + continue; } } - ++in; + return true; } - if (in != out) - str->truncate(out - str->utf16()); + return false; } static bool isBidirectionalRorAL(uint uc) @@ -3030,7 +3030,10 @@ void qt_nameprep(QString *source, int from) firstNonAscii > from ? firstNonAscii - 1 : from); // Strip prohibited output - stripProhibitedOutput(source, firstNonAscii); + if (containsProhibitedOuptut(source, firstNonAscii)) { + source->resize(from); + return; + } // Check for valid bidirectional characters bool containsLCat = false; diff --git a/tests/auto/qurl/tst_qurl.cpp b/tests/auto/qurl/tst_qurl.cpp index 64d8bda..cd696e4 100644 --- a/tests/auto/qurl/tst_qurl.cpp +++ b/tests/auto/qurl/tst_qurl.cpp @@ -3043,7 +3043,7 @@ void tst_QUrl::nameprep_testsuite_data() << QString() << 0 << 0; QTest::newRow("Non-ASCII multibyte space character U+1680") - << QString::fromUtf8("\xE1\x9A\x80") + << QString::fromUtf8("x\xE1\x9A\x80x") << QString() << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED; @@ -3068,12 +3068,12 @@ void tst_QUrl::nameprep_testsuite_data() << QString() << 0 << 0; QTest::newRow("Non-ASCII 8bit control character U+0085") - << QString::fromUtf8("\xC2\x85") + << QString::fromUtf8("x\xC2\x85x") << QString() << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED; QTest::newRow("Non-ASCII multibyte control character U+180E") - << QString::fromUtf8("\xE1\xA0\x8E") + << QString::fromUtf8("x\xE1\xA0\x8Ex") << QString() << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED; @@ -3083,47 +3083,47 @@ void tst_QUrl::nameprep_testsuite_data() << QString() << 0 << 0; QTest::newRow("Non-ASCII control character U+1D175") - << QString::fromUtf8("\xF0\x9D\x85\xB5") + << QString::fromUtf8("x\xF0\x9D\x85\xB5x") << QString() << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED; QTest::newRow("Plane 0 private use character U+F123") - << QString::fromUtf8("\xEF\x84\xA3") + << QString::fromUtf8("x\xEF\x84\xA3x") << QString() << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED; QTest::newRow("Plane 15 private use character U+F1234") - << QString::fromUtf8("\xF3\xB1\x88\xB4") + << QString::fromUtf8("x\xF3\xB1\x88\xB4x") << QString() << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED; QTest::newRow("Plane 16 private use character U+10F234") - << QString::fromUtf8("\xF4\x8F\x88\xB4") + << QString::fromUtf8("x\xF4\x8F\x88\xB4x") << QString() << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED; QTest::newRow("Non-character code point U+8FFFE") - << QString::fromUtf8("\xF2\x8F\xBF\xBE") + << QString::fromUtf8("x\xF2\x8F\xBF\xBEx") << QString() << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED; QTest::newRow("Non-character code point U+10FFFF") - << QString::fromUtf8("\xF4\x8F\xBF\xBF") + << QString::fromUtf8("x\xF4\x8F\xBF\xBFx") << QString() << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED; QTest::newRow("Surrogate code U+DF42") - << QString::fromUtf8("\xED\xBD\x82") + << QString::fromUtf8("x\xED\xBD\x82x") << QString() << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED; QTest::newRow("Non-plain text character U+FFFD") - << QString::fromUtf8("\xEF\xBF\xBD") + << QString::fromUtf8("x\xEF\xBF\xBDx") << QString() << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED; QTest::newRow("Ideographic description character U+2FF5") - << QString::fromUtf8("\xE2\xBF\xB5") + << QString::fromUtf8("x\xE2\xBF\xB5x") << QString() << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED; @@ -3133,22 +3133,22 @@ void tst_QUrl::nameprep_testsuite_data() << QString() << 0 << 0; QTest::newRow("Left-to-right mark U+200E") - << QString::fromUtf8("\xE2\x80\x8E") - << QString::fromUtf8("\xCC\x81") + << QString::fromUtf8("x\xE2\x80\x8Ex") + << QString() << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED; QTest::newRow("Deprecated U+202A") - << QString::fromUtf8("\xE2\x80\xAA") - << QString::fromUtf8("\xCC\x81") + << QString::fromUtf8("x\xE2\x80\xAA") + << QString() << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED; QTest::newRow("Language tagging character U+E0001") - << QString::fromUtf8("\xF3\xA0\x80\x81") - << QString::fromUtf8("\xCC\x81") + << QString::fromUtf8("x\xF3\xA0\x80\x81x") + << QString() << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED; QTest::newRow("Language tagging character U+E0042") - << QString::fromUtf8("\xF3\xA0\x81\x82") + << QString::fromUtf8("x\xF3\xA0\x81\x82x") << QString() << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED; @@ -3210,12 +3210,6 @@ void tst_QUrl::nameprep_testsuite() QFETCH(QString, out); QFETCH(QString, profile); - QEXPECT_FAIL("Left-to-right mark U+200E", - "Investigate further", Continue); - QEXPECT_FAIL("Deprecated U+202A", - "Investigate further", Continue); - QEXPECT_FAIL("Language tagging character U+E0001", - "Investigate further", Continue); qt_nameprep(&in, 0); QCOMPARE(in, out); #endif @@ -3246,9 +3240,9 @@ void tst_QUrl::nameprep_highcodes_data() << QString() << 0 << 0; } { - QChar st[] = { 'D', 0xdb40, 0xdc20, 'o', 0xd834, 0xdd7a, '\'', 0x2060, 'h' }; + QChar st[] = { 'D', 'o', '\'', 0x2060, 'h' }; QChar se[] = { 'd', 'o', '\'', 'h' }; - QTest::newRow("highcodes (D, U+E0020, o, U+1D17A, ', U+2060, h)") + QTest::newRow("highcodes (D, o, ', U+2060, h)") << QString(st, sizeof(st)/sizeof(st[0])) << QString(se, sizeof(se)/sizeof(se[0])) << QString() << 0 << 0; |