summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2019-09-16 21:18:21 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2019-09-16 21:18:21 (GMT)
commitcaa904131ac249bfd2991302520766b895bcf9a7 (patch)
treefa1338bf88c332d4e21274f0223d7a9e3c4557c4 /generic
parent746ce9535f6108a5a56579ec0b81cbde9a055b9a (diff)
downloadtcl-caa904131ac249bfd2991302520766b895bcf9a7.zip
tcl-caa904131ac249bfd2991302520766b895bcf9a7.tar.gz
tcl-caa904131ac249bfd2991302520766b895bcf9a7.tar.bz2
Bugfix in Tcl_UtfPrev/Tcl_UtfNext: When handling 4-byte UTF-8 byte sequences, those should be able to move back/forward 4 bytes if TCL_UTF_MAX <= 4. Update comment accordingly.
Bugfix in Tcl_UtfFindFirst/Tcl_UtfFindLast: Those functions should be able to find both the high surrogate (if asked for) as also the full character (combination of both surrogates)
Diffstat (limited to 'generic')
-rw-r--r--generic/tclUtf.c14
1 files changed, 7 insertions, 7 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 0a275d7..9c2ef03 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -275,7 +275,7 @@ Tcl_UniCharToUtfDString(
* Tcl_UtfCharComplete() before calling this routine to ensure that
* enough bytes remain in the string.
*
- * If TCL_UTF_MAX == 4, special handling of Surrogate pairs is done:
+ * If TCL_UTF_MAX <= 4, special handling of Surrogate pairs is done:
* For any UTF-8 string containing a character outside of the BMP, the
* first call to this function will fill *chPtr with the high surrogate
* and generate a return value of 0. Calling Tcl_UtfToUniChar again
@@ -584,8 +584,8 @@ Tcl_UtfFindFirst(
while (1) {
len = TclUtfToUniChar(src, &find);
fullchar = find;
-#if TCL_UTF_MAX == 4
- if ((ch >= 0xD800) && (len < 3)) {
+#if TCL_UTF_MAX <= 4
+ if ((fullchar != ch) && (find >= 0xD800) && (len < 3)) {
len += TclUtfToUniChar(src + len, &find);
fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000;
}
@@ -632,8 +632,8 @@ Tcl_UtfFindLast(
while (1) {
len = TclUtfToUniChar(src, &find);
fullchar = find;
-#if TCL_UTF_MAX == 4
- if ((ch >= 0xD800) && (len < 3)) {
+#if TCL_UTF_MAX <= 4
+ if ((fullchar != ch) && (find >= 0xD800) && (len < 3)) {
len += TclUtfToUniChar(src + len, &find);
fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000;
}
@@ -675,7 +675,7 @@ Tcl_UtfNext(
Tcl_UniChar ch = 0;
int len = TclUtfToUniChar(src, &ch);
-#if TCL_UTF_MAX == 4
+#if TCL_UTF_MAX <= 4
if ((ch >= 0xD800) && (len < 3)) {
len += TclUtfToUniChar(src + len, &ch);
}
@@ -714,7 +714,7 @@ Tcl_UtfPrev(
int i, byte;
look = --src;
- for (i = 0; i < TCL_UTF_MAX; i++) {
+ for (i = 0; i < 4; i++) {
if (look < start) {
if (src < start) {
src = start;