summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBen Boeckel <ben.boeckel@kitware.com>2019-03-14 17:26:21 (GMT)
committerBen Boeckel <ben.boeckel@kitware.com>2019-03-14 17:26:21 (GMT)
commitfb5de060bc99635a5b18b3389cc15e9937b19a0e (patch)
tree717ba5b7072330e1872b804ba62517d183e14584
parenta11e5e021b5c30a0d6490cfff0399a14a3fb798a (diff)
downloadCMake-fb5de060bc99635a5b18b3389cc15e9937b19a0e.zip
CMake-fb5de060bc99635a5b18b3389cc15e9937b19a0e.tar.gz
CMake-fb5de060bc99635a5b18b3389cc15e9937b19a0e.tar.bz2
cm_utf8: reject codepoints above 0x10FFFF
These are invalid because the Unicode standard says so (because UTF-16 as specified today cannot encode them).
-rw-r--r--Source/cm_utf8.c5
-rw-r--r--Tests/CMakeLib/testUTF8.cxx6
2 files changed, 7 insertions, 4 deletions
diff --git a/Source/cm_utf8.c b/Source/cm_utf8.c
index 2459c02..d41d097 100644
--- a/Source/cm_utf8.c
+++ b/Source/cm_utf8.c
@@ -76,6 +76,11 @@ const char* cm_utf8_decode_character(const char* first, const char* last,
return 0;
}
+ /* Invalid codepoints. */
+ if (0x10FFFF < uc) {
+ return 0;
+ }
+
*pc = uc;
return first;
}
diff --git a/Tests/CMakeLib/testUTF8.cxx b/Tests/CMakeLib/testUTF8.cxx
index f1da6df..7f52c82 100644
--- a/Tests/CMakeLib/testUTF8.cxx
+++ b/Tests/CMakeLib/testUTF8.cxx
@@ -28,10 +28,6 @@ static test_utf8_entry const good_entry[] = {
{ 4, "\xF0\xA3\x8E\xB4", 0x233B4 }, /* Example from RFC 3629. */
{ 3, "\xED\x80\x80\x00", 0xD000 }, /* Valid 0xED prefixed codepoint. */
{ 4, "\xF4\x8F\xBF\xBF", 0x10FFFF }, /* Highest valid RFC codepoint. */
- /* These are invalid according to the RFC, but accepted here. */
- { 4, "\xF4\x90\x80\x80", 0x110000 }, /* Lowest out-of-range codepoint. */
- { 4, "\xF5\x80\x80\x80",
- 0x140000 }, /* Prefix forces out-of-range codepoints. */
{ 0, { 0, 0, 0, 0, 0 }, 0 }
};
@@ -45,6 +41,8 @@ static test_utf8_char const bad_chars[] = {
"\xF0\x80\x80\x80", /* Overlong encoding. */
"\xED\xA0\x80\x00", /* UTF-16 surrogate half. */
"\xED\xBF\xBF\x00", /* UTF-16 surrogate half. */
+ "\xF4\x90\x80\x80", /* Lowest out-of-range codepoint. */
+ "\xF5\x80\x80\x80", /* Prefix forces out-of-range codepoints. */
{ 0, 0, 0, 0, 0 }
};