summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2013-06-08 01:56:58 (GMT)
committerThe Qt Project <gerrit-noreply@qt-project.org>2013-06-21 00:31:38 (GMT)
commit6827a3aa2bc852152cd3353be9558c683fb3085c (patch)
tree8b56250ed0ab85005463b23385698e935f762c96
parent154823904672bd89255dd647a2fe609f8fbd74b0 (diff)
downloadQt-6827a3aa2bc852152cd3353be9558c683fb3085c.zip
Qt-6827a3aa2bc852152cd3353be9558c683fb3085c.tar.gz
Qt-6827a3aa2bc852152cd3353be9558c683fb3085c.tar.bz2
QUrl stringprep: fix handling of prohibited characters
RFC 3454 says about prohibited characters (section 2, "Preparation Overview"): 3) Prohibit -- Check for any characters that are not allowed in the output. If any are found, return an error. This is described in section 5. In other words, we mustn't simply strip the output of prohibited characters. We must generate an error if they are present. We do that by clearing the data. We already had tests for prohibited output, but they were indistinguishable from being stripped. So instead add some extra characters so that we can tell whether the label was cleared. (cherry-picked from qtbase commit 736a052d93d9c75e51e8f3da733bc8e4a50c39ce) Change-Id: I2d95217c27be5e2d54deed0036cb009e3b7f4886 Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
-rw-r--r--src/corelib/io/qurl.cpp25
-rw-r--r--tests/auto/qurl/tst_qurl.cpp48
2 files changed, 35 insertions, 38 deletions
diff --git a/src/corelib/io/qurl.cpp b/src/corelib/io/qurl.cpp
index 14b9037..3dc9568 100644
--- a/src/corelib/io/qurl.cpp
+++ b/src/corelib/io/qurl.cpp
@@ -2438,18 +2438,20 @@ static bool isMappedToNothing(uint uc)
}
-static void stripProhibitedOutput(QString *str, int from)
+static bool containsProhibitedOuptut(const QString *str, int from)
{
- ushort *out = (ushort *)str->data() + from;
- const ushort *in = out;
+ const ushort *in = reinterpret_cast<const ushort *>(str->begin() + from);
const ushort *end = (ushort *)str->data() + str->size();
- while (in < end) {
+ for ( ; in < end; ++in) {
uint uc = *in;
if (QChar(uc).isHighSurrogate() && in < end - 1) {
ushort low = *(in + 1);
if (QChar(low).isLowSurrogate()) {
++in;
uc = QChar::surrogateToUcs4(uc, low);
+ } else {
+ // unpaired surrogates are prohibited
+ return true;
}
}
if (uc <= 0xFFFF) {
@@ -2474,7 +2476,7 @@ static void stripProhibitedOutput(QString *str, int from)
|| (uc >= 0xFDD0 && uc <= 0xFDEF)
|| uc == 0xFEFF
|| (uc >= 0xFFF9 && uc <= 0xFFFF))) {
- *out++ = *in;
+ continue;
}
} else {
if (!((uc >= 0x1D173 && uc <= 0x1D17A)
@@ -2498,14 +2500,12 @@ static void stripProhibitedOutput(QString *str, int from)
|| (uc >= 0xFFFFE && uc <= 0xFFFFF)
|| (uc >= 0x100000 && uc <= 0x10FFFD)
|| (uc >= 0x10FFFE && uc <= 0x10FFFF))) {
- *out++ = QChar::highSurrogate(uc);
- *out++ = QChar::lowSurrogate(uc);
+ continue;
}
}
- ++in;
+ return true;
}
- if (in != out)
- str->truncate(out - str->utf16());
+ return false;
}
static bool isBidirectionalRorAL(uint uc)
@@ -3030,7 +3030,10 @@ void qt_nameprep(QString *source, int from)
firstNonAscii > from ? firstNonAscii - 1 : from);
// Strip prohibited output
- stripProhibitedOutput(source, firstNonAscii);
+ if (containsProhibitedOuptut(source, firstNonAscii)) {
+ source->resize(from);
+ return;
+ }
// Check for valid bidirectional characters
bool containsLCat = false;
diff --git a/tests/auto/qurl/tst_qurl.cpp b/tests/auto/qurl/tst_qurl.cpp
index 64d8bda..cd696e4 100644
--- a/tests/auto/qurl/tst_qurl.cpp
+++ b/tests/auto/qurl/tst_qurl.cpp
@@ -3043,7 +3043,7 @@ void tst_QUrl::nameprep_testsuite_data()
<< QString() << 0 << 0;
QTest::newRow("Non-ASCII multibyte space character U+1680")
- << QString::fromUtf8("\xE1\x9A\x80")
+ << QString::fromUtf8("x\xE1\x9A\x80x")
<< QString()
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
@@ -3068,12 +3068,12 @@ void tst_QUrl::nameprep_testsuite_data()
<< QString() << 0 << 0;
QTest::newRow("Non-ASCII 8bit control character U+0085")
- << QString::fromUtf8("\xC2\x85")
+ << QString::fromUtf8("x\xC2\x85x")
<< QString()
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
QTest::newRow("Non-ASCII multibyte control character U+180E")
- << QString::fromUtf8("\xE1\xA0\x8E")
+ << QString::fromUtf8("x\xE1\xA0\x8Ex")
<< QString()
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
@@ -3083,47 +3083,47 @@ void tst_QUrl::nameprep_testsuite_data()
<< QString() << 0 << 0;
QTest::newRow("Non-ASCII control character U+1D175")
- << QString::fromUtf8("\xF0\x9D\x85\xB5")
+ << QString::fromUtf8("x\xF0\x9D\x85\xB5x")
<< QString()
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
QTest::newRow("Plane 0 private use character U+F123")
- << QString::fromUtf8("\xEF\x84\xA3")
+ << QString::fromUtf8("x\xEF\x84\xA3x")
<< QString()
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
QTest::newRow("Plane 15 private use character U+F1234")
- << QString::fromUtf8("\xF3\xB1\x88\xB4")
+ << QString::fromUtf8("x\xF3\xB1\x88\xB4x")
<< QString()
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
QTest::newRow("Plane 16 private use character U+10F234")
- << QString::fromUtf8("\xF4\x8F\x88\xB4")
+ << QString::fromUtf8("x\xF4\x8F\x88\xB4x")
<< QString()
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
QTest::newRow("Non-character code point U+8FFFE")
- << QString::fromUtf8("\xF2\x8F\xBF\xBE")
+ << QString::fromUtf8("x\xF2\x8F\xBF\xBEx")
<< QString()
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
QTest::newRow("Non-character code point U+10FFFF")
- << QString::fromUtf8("\xF4\x8F\xBF\xBF")
+ << QString::fromUtf8("x\xF4\x8F\xBF\xBFx")
<< QString()
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
QTest::newRow("Surrogate code U+DF42")
- << QString::fromUtf8("\xED\xBD\x82")
+ << QString::fromUtf8("x\xED\xBD\x82x")
<< QString()
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
QTest::newRow("Non-plain text character U+FFFD")
- << QString::fromUtf8("\xEF\xBF\xBD")
+ << QString::fromUtf8("x\xEF\xBF\xBDx")
<< QString()
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
QTest::newRow("Ideographic description character U+2FF5")
- << QString::fromUtf8("\xE2\xBF\xB5")
+ << QString::fromUtf8("x\xE2\xBF\xB5x")
<< QString()
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
@@ -3133,22 +3133,22 @@ void tst_QUrl::nameprep_testsuite_data()
<< QString() << 0 << 0;
QTest::newRow("Left-to-right mark U+200E")
- << QString::fromUtf8("\xE2\x80\x8E")
- << QString::fromUtf8("\xCC\x81")
+ << QString::fromUtf8("x\xE2\x80\x8Ex")
+ << QString()
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
QTest::newRow("Deprecated U+202A")
- << QString::fromUtf8("\xE2\x80\xAA")
- << QString::fromUtf8("\xCC\x81")
+ << QString::fromUtf8("x\xE2\x80\xAA")
+ << QString()
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
QTest::newRow("Language tagging character U+E0001")
- << QString::fromUtf8("\xF3\xA0\x80\x81")
- << QString::fromUtf8("\xCC\x81")
+ << QString::fromUtf8("x\xF3\xA0\x80\x81x")
+ << QString()
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
QTest::newRow("Language tagging character U+E0042")
- << QString::fromUtf8("\xF3\xA0\x81\x82")
+ << QString::fromUtf8("x\xF3\xA0\x81\x82x")
<< QString()
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
@@ -3210,12 +3210,6 @@ void tst_QUrl::nameprep_testsuite()
QFETCH(QString, out);
QFETCH(QString, profile);
- QEXPECT_FAIL("Left-to-right mark U+200E",
- "Investigate further", Continue);
- QEXPECT_FAIL("Deprecated U+202A",
- "Investigate further", Continue);
- QEXPECT_FAIL("Language tagging character U+E0001",
- "Investigate further", Continue);
qt_nameprep(&in, 0);
QCOMPARE(in, out);
#endif
@@ -3246,9 +3240,9 @@ void tst_QUrl::nameprep_highcodes_data()
<< QString() << 0 << 0;
}
{
- QChar st[] = { 'D', 0xdb40, 0xdc20, 'o', 0xd834, 0xdd7a, '\'', 0x2060, 'h' };
+ QChar st[] = { 'D', 'o', '\'', 0x2060, 'h' };
QChar se[] = { 'd', 'o', '\'', 'h' };
- QTest::newRow("highcodes (D, U+E0020, o, U+1D17A, ', U+2060, h)")
+ QTest::newRow("highcodes (D, o, ', U+2060, h)")
<< QString(st, sizeof(st)/sizeof(st[0]))
<< QString(se, sizeof(se)/sizeof(se[0]))
<< QString() << 0 << 0;