diff options
| author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2019-12-02 20:26:58 (GMT) |
|---|---|---|
| committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2019-12-02 20:26:58 (GMT) |
| commit | 4f6ea8aef90242e6052cf743e6684e312c84873c (patch) | |
| tree | 24b906f5cb76769c17139a95cb70b759f52346d5 /generic/tclParse.c | |
| parent | 90493b40122f01179e4f0152f055e44631b10e22 (diff) | |
| download | tcl-4f6ea8aef90242e6052cf743e6684e312c84873c.zip tcl-4f6ea8aef90242e6052cf743e6684e312c84873c.tar.gz tcl-4f6ea8aef90242e6052cf743e6684e312c84873c.tar.bz2 | |
If TCL_UTF_MAX>=4, make Tcl_ParseBackslash combine two surrogates so they appear as one 4-byte UTF-8 byte sequence from the start. Add test-case for this.
Diffstat (limited to 'generic/tclParse.c')
| -rw-r--r-- | generic/tclParse.c | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/generic/tclParse.c b/generic/tclParse.c index 1532c05..4f30f8b 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -920,6 +920,16 @@ TclParseBackslash( * No hexadigits -> This is just "u". */ result = 'u'; +#if TCL_UTF_MAX > 3 + } else if (((result & 0xDC00) == 0xD800) && (count == 6) && (p[5] == '\\') && (p[6] == 'u') && (numBytes >= 10)) { + /* If high surrogate is immediately followed by a low surrogate escape, combine them. */ + int low; + int count2 = TclParseHex(p+7, 4, &low); + if ((low & 0xDC00) == 0xDC00) { + result = ((result & 0x3FF)<<10 | (low & 0x3FF)) + 0x10000; + count += count2 + 2; + } +#endif } break; case 'U': |
