diff options
author | Ben Boeckel <ben.boeckel@kitware.com> | 2019-03-14 15:01:27 (GMT) |
---|---|---|
committer | Ben Boeckel <ben.boeckel@kitware.com> | 2019-03-14 17:25:24 (GMT) |
commit | a11e5e021b5c30a0d6490cfff0399a14a3fb798a (patch) | |
tree | d543d1dda68ddef6b7fb18ce4c1cb0c9b09a295b | |
parent | 7111873efd91c4ed7c38af117e50ff2ea4d8f0e5 (diff) | |
download | CMake-a11e5e021b5c30a0d6490cfff0399a14a3fb798a.zip CMake-a11e5e021b5c30a0d6490cfff0399a14a3fb798a.tar.gz CMake-a11e5e021b5c30a0d6490cfff0399a14a3fb798a.tar.bz2 |
cm_utf8: reject UTF-16 surrogate half codepoints
-rw-r--r-- | Source/cm_utf8.c | 5 | ||||
-rw-r--r-- | Tests/CMakeLib/testUTF8.cxx | 4 |
2 files changed, 7 insertions, 2 deletions
diff --git a/Source/cm_utf8.c b/Source/cm_utf8.c index 52af4a6..2459c02 100644 --- a/Source/cm_utf8.c +++ b/Source/cm_utf8.c @@ -71,6 +71,11 @@ const char* cm_utf8_decode_character(const char* first, const char* last, return 0; } + /* UTF-16 surrogate halves. */ + if (0xD800 <= uc && uc <= 0xDFFF) { + return 0; + } + *pc = uc; return first; } diff --git a/Tests/CMakeLib/testUTF8.cxx b/Tests/CMakeLib/testUTF8.cxx index e225152..f1da6df 100644 --- a/Tests/CMakeLib/testUTF8.cxx +++ b/Tests/CMakeLib/testUTF8.cxx @@ -29,8 +29,6 @@ static test_utf8_entry const good_entry[] = { { 3, "\xED\x80\x80\x00", 0xD000 }, /* Valid 0xED prefixed codepoint. */ { 4, "\xF4\x8F\xBF\xBF", 0x10FFFF }, /* Highest valid RFC codepoint. */ /* These are invalid according to the RFC, but accepted here. */ - { 3, "\xED\xA0\x80\x00", 0xD800 }, /* UTF-16 surrogate half. */ - { 3, "\xED\xBF\xBF\x00", 0xDFFF }, /* UTF-16 surrogate half. */ { 4, "\xF4\x90\x80\x80", 0x110000 }, /* Lowest out-of-range codepoint. */ { 4, "\xF5\x80\x80\x80", 0x140000 }, /* Prefix forces out-of-range codepoints. */ @@ -45,6 +43,8 @@ static test_utf8_char const bad_chars[] = { "\xE0\x00\x00\x00", /* Missing continuation bytes. */ "\xE0\x80\x80\x00", /* Overlong encoding. */ "\xF0\x80\x80\x80", /* Overlong encoding. */ + "\xED\xA0\x80\x00", /* UTF-16 surrogate half. */ + "\xED\xBF\xBF\x00", /* UTF-16 surrogate half. */ { 0, 0, 0, 0, 0 } }; |