summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2020-04-28 06:54:05 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2020-04-28 06:54:05 (GMT)
commit15b634b5e53cbe2414e4f73a679522c70631c6cd (patch)
tree5fc9ec91effafa406bda821080fded62c59b0734 /generic
parent215662c8ba97ab0ec4818e0b0bb0440be801219f (diff)
downloadtcl-15b634b5e53cbe2414e4f73a679522c70631c6cd.zip
tcl-15b634b5e53cbe2414e4f73a679522c70631c6cd.tar.gz
tcl-15b634b5e53cbe2414e4f73a679522c70631c6cd.tar.bz2
Backport parsing of surrogate-pair change from 8.6 (only for TCL_UTF_MAX=4)
Adapt test-cases accordingly. Renumber and split testcases, making the numbering more equal to the numbering in 8.6/8.7/9.0
Diffstat (limited to 'generic')
-rw-r--r--generic/tclParse.c18
1 files changed, 15 insertions, 3 deletions
diff --git a/generic/tclParse.c b/generic/tclParse.c
index cfd6337..0b9b14f 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -912,7 +912,7 @@ TclParseBackslash(
count += ParseHex(p+1, numBytes-2, &result);
if (count == 2) {
/*
- * No hexadigits -> This is just "x".
+ * No hexdigits -> This is just "x".
*/
result = 'x';
@@ -927,9 +927,21 @@ TclParseBackslash(
count += ParseHex(p+1, (numBytes > 5) ? 4 : numBytes-2, &result);
if (count == 2) {
/*
- * No hexadigits -> This is just "u".
+ * No hexdigits -> This is just "u".
*/
result = 'u';
+#if TCL_UTF_MAX > 3
+ } else if (((result & 0xDC00) == 0xD800) && (count == 6)
+ && (p[5] == '\\') && (p[6] == 'u') && (numBytes >= 10)) {
+ /* If high surrogate is immediately followed by a low surrogate
+ * escape, combine them into one character. */
+ int low;
+ int count2 = ParseHex(p+7, 4, &low);
+ if ((count2 == 4) && ((low & 0xDC00) == 0xDC00)) {
+ result = ((result & 0x3FF)<<10 | (low & 0x3FF)) + 0x10000;
+ count += count2 + 2;
+ }
+#endif
}
break;
#if TCL_UTF_MAX > 3
@@ -937,7 +949,7 @@ TclParseBackslash(
count += ParseHex(p+1, (numBytes > 9) ? 8 : numBytes-2, &result);
if (count == 2) {
/*
- * No hexadigits -> This is just "U".
+ * No hexdigits -> This is just "U".
*/
result = 'U';
} else if ((result | 0x7FF) == 0xDFFF) {