diff options
| author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2019-12-02 20:26:58 (GMT) |
|---|---|---|
| committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2019-12-02 20:26:58 (GMT) |
| commit | 4f6ea8aef90242e6052cf743e6684e312c84873c (patch) | |
| tree | 24b906f5cb76769c17139a95cb70b759f52346d5 /generic | |
| parent | 90493b40122f01179e4f0152f055e44631b10e22 (diff) | |
| download | tcl-4f6ea8aef90242e6052cf743e6684e312c84873c.zip tcl-4f6ea8aef90242e6052cf743e6684e312c84873c.tar.gz tcl-4f6ea8aef90242e6052cf743e6684e312c84873c.tar.bz2 | |
If TCL_UTF_MAX>=4, make Tcl_ParseBackslash combine two surrogates so they appear as one 4-byte UTF-8 byte sequence from the start. Add test-case for this.
Diffstat (limited to 'generic')
| -rw-r--r-- | generic/tclParse.c | 10 | ||||
| -rw-r--r-- | generic/tclUtf.c | 2 | ||||
| -rw-r--r-- | generic/tclUtil.c | 2 |
3 files changed, 12 insertions, 2 deletions
diff --git a/generic/tclParse.c b/generic/tclParse.c index 1532c05..4f30f8b 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -920,6 +920,16 @@ TclParseBackslash( * No hexadigits -> This is just "u". */ result = 'u'; +#if TCL_UTF_MAX > 3 + } else if (((result & 0xDC00) == 0xD800) && (count == 6) && (p[5] == '\\') && (p[6] == 'u') && (numBytes >= 10)) { + /* If high surrogate is immediately followed by a low surrogate escape, combine them. */ + int low; + int count2 = TclParseHex(p+7, 4, &low); + if ((low & 0xDC00) == 0xDC00) { + result = ((result & 0x3FF)<<10 | (low & 0x3FF)) + 0x10000; + count += count2 + 2; + } +#endif } break; case 'U': diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 9c2ef03..ce80bd0 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -846,7 +846,7 @@ Tcl_UtfBackslash( * We ate a whole line. Pay the price of a strlen() */ - result = TclParseBackslash(src, (int)strlen(src), &numRead, dst); + result = TclParseBackslash(src, strlen(src), &numRead, dst); } if (readPtr != NULL) { *readPtr = numRead; diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 61c1973..41b3481 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -1649,7 +1649,7 @@ Tcl_Backslash( int *readPtr) /* Fill in with number of characters read from * src, unless NULL. */ { - char buf[TCL_UTF_MAX] = ""; + char buf[4] = ""; Tcl_UniChar ch = 0; Tcl_UtfBackslash(src, readPtr, buf); |
