summaryrefslogtreecommitdiffstats
path: root/generic/tclParse.c
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2019-12-02 20:26:58 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2019-12-02 20:26:58 (GMT)
commit4f6ea8aef90242e6052cf743e6684e312c84873c (patch)
tree24b906f5cb76769c17139a95cb70b759f52346d5 /generic/tclParse.c
parent90493b40122f01179e4f0152f055e44631b10e22 (diff)
downloadtcl-4f6ea8aef90242e6052cf743e6684e312c84873c.zip
tcl-4f6ea8aef90242e6052cf743e6684e312c84873c.tar.gz
tcl-4f6ea8aef90242e6052cf743e6684e312c84873c.tar.bz2
If TCL_UTF_MAX>=4, make Tcl_ParseBackslash combine two surrogates so they appear as one 4-byte UTF-8 byte sequence from the start. Add test-case for this.
Diffstat (limited to 'generic/tclParse.c')
-rw-r--r--generic/tclParse.c10
1 files changed, 10 insertions, 0 deletions
diff --git a/generic/tclParse.c b/generic/tclParse.c
index 1532c05..4f30f8b 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -920,6 +920,16 @@ TclParseBackslash(
* No hexadigits -> This is just "u".
*/
result = 'u';
+#if TCL_UTF_MAX > 3
+ } else if (((result & 0xDC00) == 0xD800) && (count == 6) && (p[5] == '\\') && (p[6] == 'u') && (numBytes >= 10)) {
+ /* If high surrogate is immediately followed by a low surrogate escape, combine them. */
+ int low;
+ int count2 = TclParseHex(p+7, 4, &low);
+ if ((low & 0xDC00) == 0xDC00) {
+ result = ((result & 0x3FF)<<10 | (low & 0x3FF)) + 0x10000;
+ count += count2 + 2;
+ }
+#endif
}
break;
case 'U':