diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2019-09-16 21:18:21 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2019-09-16 21:18:21 (GMT) |
commit | caa904131ac249bfd2991302520766b895bcf9a7 (patch) | |
tree | fa1338bf88c332d4e21274f0223d7a9e3c4557c4 /generic | |
parent | 746ce9535f6108a5a56579ec0b81cbde9a055b9a (diff) | |
download | tcl-caa904131ac249bfd2991302520766b895bcf9a7.zip tcl-caa904131ac249bfd2991302520766b895bcf9a7.tar.gz tcl-caa904131ac249bfd2991302520766b895bcf9a7.tar.bz2 |
Bugfix in Tcl_UtfPrev/Tcl_UtfNext: When handling 4-byte UTF-8 byte sequences, those should be able to move back/forward 4 bytes if TCL_UTF_MAX <= 4. Update comment accordingly.
Bugfix in Tcl_UtfFindFirst/Tcl_UtfFindLast: Those functions should be able to find both the high surrogate (if asked for) as also the full character (combination of both surrogates)
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tclUtf.c | 14 |
1 files changed, 7 insertions, 7 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 0a275d7..9c2ef03 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -275,7 +275,7 @@ Tcl_UniCharToUtfDString( * Tcl_UtfCharComplete() before calling this routine to ensure that * enough bytes remain in the string. * - * If TCL_UTF_MAX == 4, special handling of Surrogate pairs is done: + * If TCL_UTF_MAX <= 4, special handling of Surrogate pairs is done: * For any UTF-8 string containing a character outside of the BMP, the * first call to this function will fill *chPtr with the high surrogate * and generate a return value of 0. Calling Tcl_UtfToUniChar again @@ -584,8 +584,8 @@ Tcl_UtfFindFirst( while (1) { len = TclUtfToUniChar(src, &find); fullchar = find; -#if TCL_UTF_MAX == 4 - if ((ch >= 0xD800) && (len < 3)) { +#if TCL_UTF_MAX <= 4 + if ((fullchar != ch) && (find >= 0xD800) && (len < 3)) { len += TclUtfToUniChar(src + len, &find); fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000; } @@ -632,8 +632,8 @@ Tcl_UtfFindLast( while (1) { len = TclUtfToUniChar(src, &find); fullchar = find; -#if TCL_UTF_MAX == 4 - if ((ch >= 0xD800) && (len < 3)) { +#if TCL_UTF_MAX <= 4 + if ((fullchar != ch) && (find >= 0xD800) && (len < 3)) { len += TclUtfToUniChar(src + len, &find); fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000; } @@ -675,7 +675,7 @@ Tcl_UtfNext( Tcl_UniChar ch = 0; int len = TclUtfToUniChar(src, &ch); -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX <= 4 if ((ch >= 0xD800) && (len < 3)) { len += TclUtfToUniChar(src + len, &ch); } @@ -714,7 +714,7 @@ Tcl_UtfPrev( int i, byte; look = --src; - for (i = 0; i < TCL_UTF_MAX; i++) { + for (i = 0; i < 4; i++) { if (look < start) { if (src < start) { src = start; |