summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2020-04-25 14:37:45 (GMT)
committerdgp <dgp@users.sourceforge.net>2020-04-25 14:37:45 (GMT)
commit2a1570025d97969b9bf6de7f1ea9ba6b06758723 (patch)
tree0b532bcc3bc4013506fe7722689525f2e2482abe /generic
parente90c15381f0d9deab3c49a3c17e9cdc3d3e9d26c (diff)
parente598b0d32c0f6d2ba5356cbb58d8dcc56c1b2772 (diff)
downloadtcl-2a1570025d97969b9bf6de7f1ea9ba6b06758723.zip
tcl-2a1570025d97969b9bf6de7f1ea9ba6b06758723.tar.gz
tcl-2a1570025d97969b9bf6de7f1ea9ba6b06758723.tar.bz2
merge 8.6
Diffstat (limited to 'generic')
-rw-r--r--generic/tclUtf.c31
1 files changed, 26 insertions, 5 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 53d51e5..4f2a3a6 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -579,7 +579,7 @@ Tcl_NumUtfChars(
int length) /* The length of the string in bytes, or -1
* for strlen(string). */
{
- Tcl_UniChar ch;
+ Tcl_UniChar ch = 0;
register int i = 0;
/*
@@ -590,20 +590,33 @@ Tcl_NumUtfChars(
*/
if (length < 0) {
- while ((*src != '\0') && (i < INT_MAX)) {
+ while (*src != '\0') {
src += TclUtfToUniChar(src, &ch);
i++;
}
+ if (i < 0) i = INT_MAX; /* Bug [2738427] */
} else {
register const char *endPtr = src + length - TCL_UTF_MAX;
while (src < endPtr) {
- src += TclUtfToUniChar(src, &ch);
+ if (((unsigned)(unsigned char)*src - 0xF0) < 5) {
+ /* treat F0 - F4 as single character */
+ ch = 0;
+ src++;
+ } else {
+ src += TclUtfToUniChar(src, &ch);
+ }
i++;
}
endPtr += TCL_UTF_MAX;
while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
- src += TclUtfToUniChar(src, &ch);
+ if (((unsigned)(unsigned char)*src - 0xF0) < 5) {
+ /* treat F0 - F4 as single character */
+ ch = 0;
+ src++;
+ } else {
+ src += TclUtfToUniChar(src, &ch);
+ }
i++;
}
if (src < endPtr) {
@@ -931,11 +944,19 @@ Tcl_UtfAtIndex(
register const char *src, /* The UTF-8 string. */
register int index) /* The position of the desired character. */
{
- Tcl_UniChar ch;
+ Tcl_UniChar ch = 0;
+ int len = 0;
while (index-- > 0) {
+ len = TclUtfToUniChar(src, &ch);
+ src += len;
+ }
+#if TCL_UTF_MAX == 4
+ if ((ch >= 0xD800) && (len < 3)) {
+ /* Index points at character following high Surrogate */
src += TclUtfToUniChar(src, &ch);
}
+#endif
return src;
}