summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2019-12-02 20:26:58 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2019-12-02 20:26:58 (GMT)
commit4f6ea8aef90242e6052cf743e6684e312c84873c (patch)
tree24b906f5cb76769c17139a95cb70b759f52346d5 /generic
parent90493b40122f01179e4f0152f055e44631b10e22 (diff)
downloadtcl-4f6ea8aef90242e6052cf743e6684e312c84873c.zip
tcl-4f6ea8aef90242e6052cf743e6684e312c84873c.tar.gz
tcl-4f6ea8aef90242e6052cf743e6684e312c84873c.tar.bz2
If TCL_UTF_MAX>=4, make Tcl_ParseBackslash combine two surrogates so they appear as one 4-byte UTF-8 byte sequence from the start. Add test-case for this.
Diffstat (limited to 'generic')
-rw-r--r--generic/tclParse.c10
-rw-r--r--generic/tclUtf.c2
-rw-r--r--generic/tclUtil.c2
3 files changed, 12 insertions, 2 deletions
diff --git a/generic/tclParse.c b/generic/tclParse.c
index 1532c05..4f30f8b 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -920,6 +920,16 @@ TclParseBackslash(
* No hexadigits -> This is just "u".
*/
result = 'u';
+#if TCL_UTF_MAX > 3
+ } else if (((result & 0xDC00) == 0xD800) && (count == 6) && (p[5] == '\\') && (p[6] == 'u') && (numBytes >= 10)) {
+ /* If high surrogate is immediately followed by a low surrogate escape, combine them. */
+ int low;
+ int count2 = TclParseHex(p+7, 4, &low);
+ if ((low & 0xDC00) == 0xDC00) {
+ result = ((result & 0x3FF)<<10 | (low & 0x3FF)) + 0x10000;
+ count += count2 + 2;
+ }
+#endif
}
break;
case 'U':
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 9c2ef03..ce80bd0 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -846,7 +846,7 @@ Tcl_UtfBackslash(
* We ate a whole line. Pay the price of a strlen()
*/
- result = TclParseBackslash(src, (int)strlen(src), &numRead, dst);
+ result = TclParseBackslash(src, strlen(src), &numRead, dst);
}
if (readPtr != NULL) {
*readPtr = numRead;
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 61c1973..41b3481 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -1649,7 +1649,7 @@ Tcl_Backslash(
int *readPtr) /* Fill in with number of characters read from
* src, unless NULL. */
{
- char buf[TCL_UTF_MAX] = "";
+ char buf[4] = "";
Tcl_UniChar ch = 0;
Tcl_UtfBackslash(src, readPtr, buf);