summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2020-07-10 08:32:16 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2020-07-10 08:32:16 (GMT)
commit9fca62d042d94b5c90dca523c57a9f6b83eb1cd2 (patch)
tree007b49df46bbb2f7ef9890d9c6320ebb5adb12c9
parent5079cec18ef8f675f3efaa9d7b928755ca6c136d (diff)
parent0fab7463a3318c4fb204e1d463c34ca2616bf201 (diff)
downloadtcl-9fca62d042d94b5c90dca523c57a9f6b83eb1cd2.zip
tcl-9fca62d042d94b5c90dca523c57a9f6b83eb1cd2.tar.gz
tcl-9fca62d042d94b5c90dca523c57a9f6b83eb1cd2.tar.bz2
Merge 8.6. Fix documentation for \Uhhhhhhhh escape sequences > U+FFFF, which are no longer "reserved for the future": It's already working in 8.7.
-rw-r--r--doc/Tcl.n5
-rw-r--r--tests/utf.test4
2 files changed, 6 insertions, 3 deletions
diff --git a/doc/Tcl.n b/doc/Tcl.n
index 0eb51b9..48a3488 100644
--- a/doc/Tcl.n
+++ b/doc/Tcl.n
@@ -223,7 +223,10 @@ before this range overflows, or when the maximum of eight digits
is reached. The upper bits of the Unicode character will be 0.
.RS
.PP
-The range U+010000\(enU+10FFFD is reserved for the future.
+The range U+00D800\(enU+00DFFF is reserved for surrogates, which
+are illegal on its own. Therefore, such sequences will result in
+the replacement character U+FFFD. Surrogate pairs should be
+encoded as single \e\fBU\fIhhhhhhhh\fR character.
.RE
.PP
Backslash substitution is not performed on words enclosed in braces,
diff --git a/tests/utf.test b/tests/utf.test
index fdbc4e1..c3be5ba 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -79,8 +79,8 @@ test utf-1.11 {Tcl_UniCharToUtf: 3 byte sequence, low surrogate} testbytestring
test utf-1.12 {Tcl_UniCharToUtf: 4 byte sequence, high/low surrogate} {pairsTo4bytes testbytestring} {
expr {"\uD842\uDC42" eq [testbytestring \xF0\xA0\xA1\x82]}
} 1
-test utf-1.13 {Tcl_UniCharToUtf: Invalid surrogate} {Uesc testbytestring} {
- expr {"\UD842" eq [testbytestring \xEF\xBF\xBD]}
+test utf-1.13 {Tcl_UniCharToUtf: Invalid surrogate} Uesc {
+ expr {"\UD842" eq "\uD842"}
} 1
test utf-2.1 {Tcl_UtfToUniChar: low ascii} {