From 0fab7463a3318c4fb204e1d463c34ca2616bf201 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 10 Jul 2020 07:26:55 +0000 Subject: Addendum to [60fab362ce]: "Also don't allow surrogates in \U??????". This change was only meant for builds with TCL_UTF_MAX > 3. --- generic/tclParse.c | 2 ++ tests/utf.test | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/generic/tclParse.c b/generic/tclParse.c index 78f3a9e..7a51dae 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -942,9 +942,11 @@ TclParseBackslash( * No hexdigits -> This is just "U". */ result = 'U'; +#if TCL_UTF_MAX > 3 } else if ((result & ~0x7FF) == 0xD800) { /* Upper or lower surrogate, not allowed in this syntax. */ result = 0xFFFD; +#endif } break; case '\n': diff --git a/tests/utf.test b/tests/utf.test index 14b2198..2f5f7f3 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -78,8 +78,8 @@ test utf-1.11 {Tcl_UniCharToUtf: 3 byte sequence, low surrogate} testbytestring test utf-1.12 {Tcl_UniCharToUtf: 4 byte sequence, high/low surrogate} {pairsTo4bytes testbytestring} { expr {"\uD842\uDC42" eq [testbytestring \xF0\xA0\xA1\x82]} } 1 -test utf-1.13 {Tcl_UniCharToUtf: Invalid surrogate} {Uesc testbytestring} { - expr {"\UD842" eq [testbytestring \xEF\xBF\xBD]} +test utf-1.13 {Tcl_UniCharToUtf: Invalid surrogate} Uesc { + expr {"\UD842" eq "\uD842"} } 1 test utf-2.1 {Tcl_UtfToUniChar: low ascii} { -- cgit v0.12