diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-04-28 06:54:05 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-04-28 06:54:05 (GMT) |
commit | 15b634b5e53cbe2414e4f73a679522c70631c6cd (patch) | |
tree | 5fc9ec91effafa406bda821080fded62c59b0734 /generic | |
parent | 215662c8ba97ab0ec4818e0b0bb0440be801219f (diff) | |
download | tcl-15b634b5e53cbe2414e4f73a679522c70631c6cd.zip tcl-15b634b5e53cbe2414e4f73a679522c70631c6cd.tar.gz tcl-15b634b5e53cbe2414e4f73a679522c70631c6cd.tar.bz2 |
Backport parsing of surrogate-pair change from 8.6 (only for TCL_UTF_MAX=4)
Adapt test-cases accordingly. Renumber and split testcases, making the numbering more equal to the numbering in 8.6/8.7/9.0
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tclParse.c | 18 |
1 files changed, 15 insertions, 3 deletions
diff --git a/generic/tclParse.c b/generic/tclParse.c index cfd6337..0b9b14f 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -912,7 +912,7 @@ TclParseBackslash( count += ParseHex(p+1, numBytes-2, &result); if (count == 2) { /* - * No hexadigits -> This is just "x". + * No hexdigits -> This is just "x". */ result = 'x'; @@ -927,9 +927,21 @@ TclParseBackslash( count += ParseHex(p+1, (numBytes > 5) ? 4 : numBytes-2, &result); if (count == 2) { /* - * No hexadigits -> This is just "u". + * No hexdigits -> This is just "u". */ result = 'u'; +#if TCL_UTF_MAX > 3 + } else if (((result & 0xDC00) == 0xD800) && (count == 6) + && (p[5] == '\\') && (p[6] == 'u') && (numBytes >= 10)) { + /* If high surrogate is immediately followed by a low surrogate + * escape, combine them into one character. */ + int low; + int count2 = ParseHex(p+7, 4, &low); + if ((count2 == 4) && ((low & 0xDC00) == 0xDC00)) { + result = ((result & 0x3FF)<<10 | (low & 0x3FF)) + 0x10000; + count += count2 + 2; + } +#endif } break; #if TCL_UTF_MAX > 3 @@ -937,7 +949,7 @@ TclParseBackslash( count += ParseHex(p+1, (numBytes > 9) ? 8 : numBytes-2, &result); if (count == 2) { /* - * No hexadigits -> This is just "U". + * No hexdigits -> This is just "U". */ result = 'U'; } else if ((result | 0x7FF) == 0xDFFF) { |