summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--doc/ToUpper.310
-rw-r--r--doc/Utf.315
-rw-r--r--doc/string.n68
-rw-r--r--generic/regc_locale.c2
-rw-r--r--generic/tcl.decls20
-rw-r--r--generic/tclCmdIL.c4
-rw-r--r--generic/tclCmdMZ.c173
-rw-r--r--generic/tclCompExpr.c8
-rw-r--r--generic/tclDecls.h56
-rw-r--r--generic/tclEncoding.c4
-rw-r--r--generic/tclExecute.c2
-rw-r--r--generic/tclIndexObj.c2
-rw-r--r--generic/tclInt.h17
-rw-r--r--generic/tclParse.c8
-rw-r--r--generic/tclStringObj.c6
-rw-r--r--generic/tclStubInit.c36
-rw-r--r--generic/tclTest.c5
-rw-r--r--generic/tclUtf.c128
-rw-r--r--generic/tclUtil.c14
-rw-r--r--tests/safe-stock86.test117
-rw-r--r--tests/string.test106
-rw-r--r--tests/utf.test150
22 files changed, 605 insertions, 346 deletions
diff --git a/doc/ToUpper.3 b/doc/ToUpper.3
index fd9ddfb..5456538 100644
--- a/doc/ToUpper.3
+++ b/doc/ToUpper.3
@@ -8,7 +8,7 @@
.so man.macros
.BS
.SH NAME
-Tcl_UniCharToUpper, Tcl_UniCharToLower, Tcl_UniCharToTitle, Tcl_UtfToUpper, Tcl_UtfToLower, Tcl_UtfToTitle \- routines for manipulating the case of Unicode characters and UTF-8 strings
+Tcl_UniCharToUpper, Tcl_UniCharToLower, Tcl_UniCharFold, Tcl_UniCharToTitle, Tcl_UtfToUpper, Tcl_UtfToLower, Tcl_UtfToTitle \- routines for manipulating the case of Unicode characters and UTF-8 strings
.SH SYNOPSIS
.nf
\fB#include <tcl.h>\fR
@@ -17,6 +17,9 @@ int
\fBTcl_UniCharToUpper\fR(\fIch\fR)
.sp
int
+\fBTcl_UniCharFold\fR(\fIch\fR)
+.sp
+int
\fBTcl_UniCharToLower\fR(\fIch\fR)
.sp
int
@@ -52,6 +55,11 @@ If \fIch\fR represents an upper-case character,
character. If no lower-case character is defined, it returns the
character unchanged.
.PP
+If \fIch\fR represents an upper-case or lower-case character,
+\fBTcl_UniCharFold\fR returns the corresponding folded
+character. If no upper-case or lower-case character is defined, it returns the
+character unchanged.
+.PP
If \fIch\fR represents a lower-case character,
\fBTcl_UniCharToTitle\fR returns the corresponding title-case
character. If no title-case character is defined, it returns the
diff --git a/doc/Utf.3 b/doc/Utf.3
index 263d4dd..6ebf57d 100644
--- a/doc/Utf.3
+++ b/doc/Utf.3
@@ -233,10 +233,10 @@ characters.
.PP
\fBTcl_UtfCharComplete\fR returns 1 if the source UTF-8 string \fIsrc\fR
of \fIlength\fR bytes is long enough to be decoded by
-\fBTcl_UtfToUniChar\fR, or 0 otherwise. This function does not guarantee
-that the UTF-8 string is properly formed. This routine is used by
-procedures that are operating on a byte at a time and need to know if a
-full Unicode character has been seen.
+\fBTcl_UtfToUniChar\fR/\fBTcl_UtfNext\fR, or 0 otherwise. This function
+does not guarantee that the UTF-8 string is properly formed. This routine
+is used by procedures that are operating on a byte at a time and need to
+know if a full Unicode character has been seen.
.PP
\fBTcl_NumUtfChars\fR corresponds to \fBstrlen\fR for UTF-8 strings. It
returns the number of Tcl_UniChars that are represented by the UTF-8 string
@@ -257,7 +257,8 @@ Given \fIsrc\fR, a pointer to some location in a UTF-8 string,
\fBTcl_UtfNext\fR returns a pointer to the next UTF-8 character in the
string. The caller must not ask for the next character after the last
character in the string if the string is not terminated by a null
-character.
+character. \fBTcl_UtfCharComplete\fR can be used in that case to
+make sure enough bytes are available before calling \fBTcl_UtfNext\fR.
.PP
\fBTcl_UtfPrev\fR is used to step backward through but not beyond the
UTF-8 string that begins at \fIstart\fR. If the UTF-8 string is made
@@ -274,12 +275,12 @@ always a pointer to a location in the string. It always returns a pointer to
a byte that begins a character when scanning for characters beginning
from \fIstart\fR. When \fIsrc\fR is greater than \fIstart\fR, it
always returns a pointer less than \fIsrc\fR and greater than or
-equal to (\fIsrc\fR - \fBTCL_UTF_MAX\fR). The character that begins
+equal to (\fIsrc\fR - \fB4\fR). The character that begins
at the returned pointer is the first one that either includes the
byte \fIsrc[-1]\fR, or might include it if the right trail bytes are
present at \fIsrc\fR and greater. \fBTcl_UtfPrev\fR never reads the
byte \fIsrc[0]\fR nor the byte \fIstart[-1]\fR nor the byte
-\fIsrc[-\fBTCL_UTF_MAX\fI-1]\fR.
+\fIsrc[-\fB5\fI]\fR.
.PP
\fBTcl_UniCharAtIndex\fR corresponds to a C string array dereference or the
Pascal Ord() function. It returns the Unicode character represented at the
diff --git a/doc/string.n b/doc/string.n
index 7cd53ca..f9cc373 100644
--- a/doc/string.n
+++ b/doc/string.n
@@ -33,6 +33,20 @@ and is more efficient than building a list of arguments and using
\fBjoin\fR with an empty join string.
.RE
.TP
+\fBstring charend \fIstring charIndex\fR
+.
+Returns the index of the next character just after position \fIcharIndex\fR
+in the \fIstring\fR. \fIcharIndex\fR may be specified using the forms in
+\fBSTRING INDICES\fR. If position \fIcharIndex\fR of \fIstring\fR holds a
+character > U+FFFF, the returned index will be 2 higher than \fIcharIndex\fR.
+.TP
+\fBstring charstart \fIstring charIndex\fR
+.
+Returns the index of the character containing \fIcharIndex\fR in the \fIstring\fR.
+\fIcharIndex\fR may be specified using the forms in \fBSTRING INDICES\fR.
+Normally this will return \fIcharIndex\fR, except if position \fIcharIndex\fR-1
+holds a chraracter > U+FFFF: In that case the returned index will be one higher.
+.TP
\fBstring compare\fR ?\fB\-nocase\fR? ?\fB\-length\fI length\fR? \fIstring1 string2\fR
.
Perform a character-by-character comparison of strings \fIstring1\fR
@@ -223,6 +237,24 @@ number of bytes used to store the string. If the value is a
byte array value (such as those returned from reading a binary encoded
channel), then this will return the actual byte length of the value.
.TP
+\fBstring lineend \fIstring charIndex\fR
+.
+Returns the index of the character just after the last one in the word
+containing character \fIcharIndex\fR of \fIstring\fR. \fIcharIndex\fR
+may be specified using the forms in \fBSTRING INDICES\fR. A word is
+considered to be any contiguous range of alphanumeric (Unicode letters
+or decimal digits) or underscore (Unicode connector punctuation)
+characters, or any single character other than these.
+.TP
+\fBstring linestart \fIstring charIndex\fR
+.
+Returns the index of the first character in the word containing character
+\fIcharIndex\fR of \fIstring\fR. \fIcharIndex\fR may be specified using the
+forms in \fBSTRING INDICES\fR. A word is considered to be any contiguous
+range of alphanumeric (Unicode letters or decimal digits) or underscore
+(Unicode connector punctuation) characters, or any single character other than
+these.
+.TP
\fBstring map\fR ?\fB\-nocase\fR? \fImapping string\fR
.
Replaces substrings in \fIstring\fR based on the key-value pairs in
@@ -371,6 +403,24 @@ characters present in the string given by \fIchars\fR are removed. If
\fIchars\fR is not specified then white space is removed (any character
for which \fBstring is space\fR returns 1, and "\e0").
.TP
+\fBstring wordend \fIstring charIndex\fR
+.
+Returns the index of the character just after the last one in the word
+containing character \fIcharIndex\fR of \fIstring\fR. \fIcharIndex\fR
+may be specified using the forms in \fBSTRING INDICES\fR. A word is
+considered to be any contiguous range of alphanumeric (Unicode letters
+or decimal digits) or underscore (Unicode connector punctuation)
+characters, or any single character other than these.
+.TP
+\fBstring wordstart \fIstring charIndex\fR
+.
+Returns the index of the first character in the word containing character
+\fIcharIndex\fR of \fIstring\fR. \fIcharIndex\fR may be specified using the
+forms in \fBSTRING INDICES\fR. A word is considered to be any contiguous
+range of alphanumeric (Unicode letters or decimal digits) or underscore
+(Unicode connector punctuation) characters, or any single character other than
+these.
+.TP
\fBstring trimright \fIstring\fR ?\fIchars\fR?
.
Returns a value equal to \fIstring\fR except that any trailing
@@ -422,24 +472,6 @@ encoding and then apply \fBstring length\fR to that.
\fBstring length\fR [encoding convertto utf-8 $theString]
.CE
.RE
-.TP
-\fBstring wordend \fIstring charIndex\fR
-.
-Returns the index of the character just after the last one in the word
-containing character \fIcharIndex\fR of \fIstring\fR. \fIcharIndex\fR
-may be specified using the forms in \fBSTRING INDICES\fR. A word is
-considered to be any contiguous range of alphanumeric (Unicode letters
-or decimal digits) or underscore (Unicode connector punctuation)
-characters, or any single character other than these.
-.TP
-\fBstring wordstart \fIstring charIndex\fR
-.
-Returns the index of the first character in the word containing character
-\fIcharIndex\fR of \fIstring\fR. \fIcharIndex\fR may be specified using the
-forms in \fBSTRING INDICES\fR. A word is considered to be any contiguous
-range of alphanumeric (Unicode letters or decimal digits) or underscore
-(Unicode connector punctuation) characters, or any single character other than
-these.
.SH "STRING INDICES"
.PP
When referring to indices into a string (e.g., for \fBstring index\fR
diff --git a/generic/regc_locale.c b/generic/regc_locale.c
index c90dd64..cc4681b 100644
--- a/generic/regc_locale.c
+++ b/generic/regc_locale.c
@@ -1269,7 +1269,7 @@ casecmp(
size_t len) /* exact length of comparison */
{
for (; len > 0; len--, x++, y++) {
- if ((*x!=*y) && (Tcl_UniCharToLower(*x) != Tcl_UniCharToLower(*y))) {
+ if ((*x!=*y) && (Tcl_UniCharFold(*x) != Tcl_UniCharFold(*y))) {
return 1;
}
}
diff --git a/generic/tcl.decls b/generic/tcl.decls
index e49ed66..3785558 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -1163,7 +1163,7 @@ declare 325 {
const char *Tcl_UtfAtIndex(const char *src, int index)
}
declare 326 {
- int Tcl_UtfCharComplete(const char *src, int length)
+ int TclUtfCharComplete(const char *src, int length)
}
declare 327 {
int Tcl_UtfBackslash(const char *src, int *readPtr, char *dst)
@@ -1175,10 +1175,10 @@ declare 329 {
const char *Tcl_UtfFindLast(const char *src, int ch)
}
declare 330 {
- const char *Tcl_UtfNext(const char *src)
+ const char *TclUtfNext(const char *src)
}
declare 331 {
- const char *Tcl_UtfPrev(const char *src, const char *start)
+ const char *TclUtfPrev(const char *src, const char *start)
}
declare 332 {
int Tcl_UtfToExternal(Tcl_Interp *interp, Tcl_Encoding encoding,
@@ -2402,6 +2402,20 @@ declare 648 {
int length, Tcl_DString *dsPtr)
}
+# TIP #575
+declare 649 {
+ int Tcl_UtfCharComplete(const char *src, int length)
+}
+declare 650 {
+ const char *Tcl_UtfNext(const char *src)
+}
+declare 651 {
+ const char *Tcl_UtfPrev(const char *src, const char *start)
+}
+declare 652 {
+ int Tcl_UniCharFold(int ch)
+}
+
# ----- BASELINE -- FOR -- 8.7.0 ----- #
##############################################################################
diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c
index da8dc65..cf900a3 100644
--- a/generic/tclCmdIL.c
+++ b/generic/tclCmdIL.c
@@ -4782,8 +4782,8 @@ DictionaryCompare(
* other interesting punctuations occur).
*/
- uniLeftLower = Tcl_UniCharToLower(uniLeft);
- uniRightLower = Tcl_UniCharToLower(uniRight);
+ uniLeftLower = Tcl_UniCharFold(uniLeft);
+ uniRightLower = Tcl_UniCharFold(uniRight);
} else {
diff = UCHAR(*left) - UCHAR(*right);
break;
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index c47490a..cf4240a 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -629,10 +629,10 @@ Tcl_RegsubObjCmd(
wlen = 0;
}
} else {
- wsrclc = Tcl_UniCharToLower(*wsrc);
+ wsrclc = Tcl_UniCharFold(*wsrc);
for (p = wfirstChar = wstring; wstring < wend; wstring++) {
if ((*wstring == *wsrc ||
- (nocase && Tcl_UniCharToLower(*wstring)==wsrclc)) &&
+ (nocase && Tcl_UniCharFold(*wstring)==wsrclc)) &&
(slen==1 || (strCmpFn(wstring, wsrc,
(unsigned long) slen) == 0))) {
if (numMatches == 0) {
@@ -2096,10 +2096,10 @@ StringMapCmd(
ustring1 = end;
} else {
mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen);
- u2lc = (nocase ? Tcl_UniCharToLower(*ustring2) : 0);
+ u2lc = (nocase ? Tcl_UniCharFold(*ustring2) : 0);
for (; ustring1 < end; ustring1++) {
if (((*ustring1 == *ustring2) ||
- (nocase&&Tcl_UniCharToLower(*ustring1)==u2lc)) &&
+ (nocase&&Tcl_UniCharFold(*ustring1)==u2lc)) &&
(length2==1 || strCmpFn(ustring1, ustring2,
(unsigned long) length2) == 0)) {
if (p != ustring1) {
@@ -2134,7 +2134,7 @@ StringMapCmd(
mapStrings[index] = Tcl_GetUnicodeFromObj(mapElemv[index],
mapLens+index);
if (nocase && ((index % 2) == 0)) {
- u2lc[index/2] = Tcl_UniCharToLower(*mapStrings[index]);
+ u2lc[index/2] = Tcl_UniCharFold(*mapStrings[index]);
}
}
for (p = ustring1; ustring1 < end; ustring1++) {
@@ -2146,7 +2146,7 @@ StringMapCmd(
ustring2 = mapStrings[index];
length2 = mapLens[index];
if ((length2 > 0) && ((*ustring1 == *ustring2) || (nocase &&
- (Tcl_UniCharToLower(*ustring1) == u2lc[index/2]))) &&
+ (Tcl_UniCharFold(*ustring1) == u2lc[index/2]))) &&
/* Restrict max compare length. */
(end-ustring1 >= length2) && ((length2 == 1) ||
!strCmpFn(ustring2, ustring1, length2))) {
@@ -2500,8 +2500,8 @@ StringStartCmd(
Tcl_Obj *const objv[]) /* Argument objects. */
{
int ch;
- const char *p, *string;
- int cur, index, length, numChars;
+ const Tcl_UniChar *p, *string;
+ int cur, index, length;
Tcl_Obj *obj;
if (objc != 3) {
@@ -2509,32 +2509,30 @@ StringStartCmd(
return TCL_ERROR;
}
- string = TclGetStringFromObj(objv[1], &length);
- numChars = Tcl_NumUtfChars(string, length);
- if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) {
+ string = Tcl_GetUnicodeFromObj(objv[1], &length);
+ if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) {
return TCL_ERROR;
}
- string = TclGetStringFromObj(objv[1], &length);
- if (index >= numChars) {
- index = numChars - 1;
+ if (index >= length) {
+ index = length - 1;
}
cur = 0;
if (index > 0) {
- p = Tcl_UtfAtIndex(string, index);
+ p = &string[index];
- TclUtfToUCS4(p, &ch);
+ (void)TclUniCharToUCS4(p, &ch);
for (cur = index; cur >= 0; cur--) {
int delta = 0;
- const char *next;
+ const Tcl_UniChar *next;
if (!Tcl_UniCharIsWordChar(ch)) {
break;
}
- next = TclUtfPrev(p, string);
+ next = TclUCS4Prev(p, string);
do {
next += delta;
- delta = TclUtfToUCS4(next, &ch);
+ delta = TclUniCharToUCS4(next, &ch);
} while (next + delta < p);
p = next;
}
@@ -2550,6 +2548,64 @@ StringStartCmd(
/*
*----------------------------------------------------------------------
*
+ * StringCharStartCmd --
+ *
+ * This procedure is invoked to process the "string charstart" Tcl
+ * command. See the user documentation for details on what it does.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+StringCharStartCmd(
+ TCL_UNUSED(ClientData),
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+#if TCL_UTF_MAX <= 3
+ const Tcl_UniChar *src;
+#else
+ const char *src;
+#endif
+ int index, length;
+
+ if (objc != 3) {
+ Tcl_WrongNumArgs(interp, 1, objv, "string index");
+ return TCL_ERROR;
+ }
+
+#if TCL_UTF_MAX <= 3
+ src = Tcl_GetUnicodeFromObj(objv[1], &length);
+#else
+ src = Tcl_GetStringFromObj(objv[1], &length);
+ length = Tcl_NumUtfChars(src, length);
+#endif
+ if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ if (index >= length) {
+ index = length;
+ } else if (index < 0) {
+ index = -1;
+#if TCL_UTF_MAX <= 3
+ } else if ((index > 0) && ((src[index-1] & 0xFC00) == 0xD800) && ((src[index] & 0xFC00) == 0xDC00)) {
+ index--;
+#endif
+ }
+ Tcl_SetObjResult(interp, Tcl_NewWideIntObj(index));
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* StringEndCmd --
*
* This procedure is invoked to process the "string wordend" Tcl command.
@@ -2572,8 +2628,8 @@ StringEndCmd(
Tcl_Obj *const objv[]) /* Argument objects. */
{
int ch;
- const char *p, *end, *string;
- int cur, index, length, numChars;
+ const Tcl_UniChar *p, *end, *string;
+ int cur, index, length;
Tcl_Obj *obj;
if (objc != 3) {
@@ -2581,20 +2637,18 @@ StringEndCmd(
return TCL_ERROR;
}
- string = TclGetStringFromObj(objv[1], &length);
- numChars = Tcl_NumUtfChars(string, length);
- if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) {
+ string = Tcl_GetUnicodeFromObj(objv[1], &length);
+ if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) {
return TCL_ERROR;
}
- string = TclGetStringFromObj(objv[1], &length);
if (index < 0) {
index = 0;
}
- if (index < numChars) {
- p = Tcl_UtfAtIndex(string, index);
+ if (index < length) {
+ p = &string[index];
end = string+length;
for (cur = index; p < end; cur++) {
- p += TclUtfToUCS4(p, &ch);
+ p += TclUniCharToUCS4(p, &ch);
if (!Tcl_UniCharIsWordChar(ch)) {
break;
}
@@ -2603,7 +2657,7 @@ StringEndCmd(
cur++;
}
} else {
- cur = numChars;
+ cur = length;
}
TclNewIntObj(obj, cur);
Tcl_SetObjResult(interp, obj);
@@ -2613,6 +2667,65 @@ StringEndCmd(
/*
*----------------------------------------------------------------------
*
+ * StringCharEndCmd --
+ *
+ * This procedure is invoked to process the "string charend" Tcl command.
+ * See the user documentation for details on what it does.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+StringCharEndCmd(
+ TCL_UNUSED(ClientData),
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+#if TCL_UTF_MAX <= 3
+ const Tcl_UniChar *src;
+#else
+ const char *src;
+#endif
+ int index, length;
+
+ if (objc != 3) {
+ Tcl_WrongNumArgs(interp, 1, objv, "string index");
+ return TCL_ERROR;
+ }
+
+#if TCL_UTF_MAX <= 3
+ src = Tcl_GetUnicodeFromObj(objv[1], &length);
+#else
+ src = Tcl_GetStringFromObj(objv[1], &length);
+ length = Tcl_NumUtfChars(src, length);
+#endif
+ if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ if (++index < 0) {
+ index = 0;
+ }
+ if (index >= length) {
+ index = length;
+#if TCL_UTF_MAX <= 3
+ } else if ((index > 0) && ((src[index-1] & 0xFC00) == 0xD800) && ((src[index] & 0xFC00) == 0xDC00)) {
+ index++;
+#endif
+ }
+ Tcl_SetObjResult(interp, Tcl_NewWideIntObj(index));
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* StringEqualCmd --
*
* This procedure is invoked to process the "string equal" Tcl command.
@@ -3312,6 +3425,8 @@ TclInitStringCmd(
static const EnsembleImplMap stringImplMap[] = {
{"bytelength", StringBytesCmd, TclCompileBasic1ArgCmd, NULL, NULL, 0},
{"cat", StringCatCmd, TclCompileStringCatCmd, NULL, NULL, 0},
+ {"charend", StringCharEndCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0},
+ {"charstart", StringCharStartCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0},
{"compare", StringCmpCmd, TclCompileStringCmpCmd, NULL, NULL, 0},
{"equal", StringEqualCmd, TclCompileStringEqualCmd, NULL, NULL, 0},
{"first", StringFirstCmd, TclCompileStringFirstCmd, NULL, NULL, 0},
diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c
index aabd764..f35038f 100644
--- a/generic/tclCompExpr.c
+++ b/generic/tclCompExpr.c
@@ -1895,7 +1895,7 @@ ParseLexeme(
{
const char *end;
int scanned;
- Tcl_UniChar ch = 0;
+ int ch;
Tcl_Obj *literal = NULL;
unsigned char byte;
@@ -2103,13 +2103,13 @@ ParseLexeme(
if (!TclIsBareword(*start) || *start == '_') {
if (Tcl_UtfCharComplete(start, numBytes)) {
- scanned = TclUtfToUniChar(start, &ch);
+ scanned = TclUtfToUCS4(start, &ch);
} else {
- char utfBytes[4];
+ char utfBytes[8];
memcpy(utfBytes, start, numBytes);
utfBytes[numBytes] = '\0';
- scanned = TclUtfToUniChar(utfBytes, &ch);
+ scanned = TclUtfToUCS4(utfBytes, &ch);
}
*lexemePtr = INVALID;
Tcl_DecrRefCount(literal);
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index 9ea6838..31bb2d4 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -999,7 +999,7 @@ EXTERN int Tcl_UniCharToUtf(int ch, char *buf);
/* 325 */
EXTERN const char * Tcl_UtfAtIndex(const char *src, int index);
/* 326 */
-EXTERN int Tcl_UtfCharComplete(const char *src, int length);
+EXTERN int TclUtfCharComplete(const char *src, int length);
/* 327 */
EXTERN int Tcl_UtfBackslash(const char *src, int *readPtr,
char *dst);
@@ -1008,9 +1008,9 @@ EXTERN const char * Tcl_UtfFindFirst(const char *src, int ch);
/* 329 */
EXTERN const char * Tcl_UtfFindLast(const char *src, int ch);
/* 330 */
-EXTERN const char * Tcl_UtfNext(const char *src);
+EXTERN const char * TclUtfNext(const char *src);
/* 331 */
-EXTERN const char * Tcl_UtfPrev(const char *src, const char *start);
+EXTERN const char * TclUtfPrev(const char *src, const char *start);
/* 332 */
EXTERN int Tcl_UtfToExternal(Tcl_Interp *interp,
Tcl_Encoding encoding, const char *src,
@@ -1920,6 +1920,14 @@ EXTERN char * Tcl_UniCharToUtfDString(const int *uniStr,
/* 648 */
EXTERN int * Tcl_UtfToUniCharDString(const char *src, int length,
Tcl_DString *dsPtr);
+/* 649 */
+EXTERN int Tcl_UtfCharComplete(const char *src, int length);
+/* 650 */
+EXTERN const char * Tcl_UtfNext(const char *src);
+/* 651 */
+EXTERN const char * Tcl_UtfPrev(const char *src, const char *start);
+/* 652 */
+EXTERN int Tcl_UniCharFold(int ch);
typedef struct {
const struct TclPlatStubs *tclPlatStubs;
@@ -2281,12 +2289,12 @@ typedef struct TclStubs {
int (*tcl_UniCharToUpper) (int ch); /* 323 */
int (*tcl_UniCharToUtf) (int ch, char *buf); /* 324 */
const char * (*tcl_UtfAtIndex) (const char *src, int index); /* 325 */
- int (*tcl_UtfCharComplete) (const char *src, int length); /* 326 */
+ int (*tclUtfCharComplete) (const char *src, int length); /* 326 */
int (*tcl_UtfBackslash) (const char *src, int *readPtr, char *dst); /* 327 */
const char * (*tcl_UtfFindFirst) (const char *src, int ch); /* 328 */
const char * (*tcl_UtfFindLast) (const char *src, int ch); /* 329 */
- const char * (*tcl_UtfNext) (const char *src); /* 330 */
- const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 331 */
+ const char * (*tclUtfNext) (const char *src); /* 330 */
+ const char * (*tclUtfPrev) (const char *src, const char *start); /* 331 */
int (*tcl_UtfToExternal) (Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); /* 332 */
char * (*tcl_UtfToExternalDString) (Tcl_Encoding encoding, const char *src, int srcLen, Tcl_DString *dsPtr); /* 333 */
int (*tcl_UtfToLower) (char *src); /* 334 */
@@ -2604,6 +2612,10 @@ typedef struct TclStubs {
int (*tcl_UtfToUniChar) (const char *src, int *chPtr); /* 646 */
char * (*tcl_UniCharToUtfDString) (const int *uniStr, int uniLength, Tcl_DString *dsPtr); /* 647 */
int * (*tcl_UtfToUniCharDString) (const char *src, int length, Tcl_DString *dsPtr); /* 648 */
+ int (*tcl_UtfCharComplete) (const char *src, int length); /* 649 */
+ const char * (*tcl_UtfNext) (const char *src); /* 650 */
+ const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 651 */
+ int (*tcl_UniCharFold) (int ch); /* 652 */
} TclStubs;
extern const TclStubs *tclStubsPtr;
@@ -3286,18 +3298,18 @@ extern const TclStubs *tclStubsPtr;
(tclStubsPtr->tcl_UniCharToUtf) /* 324 */
#define Tcl_UtfAtIndex \
(tclStubsPtr->tcl_UtfAtIndex) /* 325 */
-#define Tcl_UtfCharComplete \
- (tclStubsPtr->tcl_UtfCharComplete) /* 326 */
+#define TclUtfCharComplete \
+ (tclStubsPtr->tclUtfCharComplete) /* 326 */
#define Tcl_UtfBackslash \
(tclStubsPtr->tcl_UtfBackslash) /* 327 */
#define Tcl_UtfFindFirst \
(tclStubsPtr->tcl_UtfFindFirst) /* 328 */
#define Tcl_UtfFindLast \
(tclStubsPtr->tcl_UtfFindLast) /* 329 */
-#define Tcl_UtfNext \
- (tclStubsPtr->tcl_UtfNext) /* 330 */
-#define Tcl_UtfPrev \
- (tclStubsPtr->tcl_UtfPrev) /* 331 */
+#define TclUtfNext \
+ (tclStubsPtr->tclUtfNext) /* 330 */
+#define TclUtfPrev \
+ (tclStubsPtr->tclUtfPrev) /* 331 */
#define Tcl_UtfToExternal \
(tclStubsPtr->tcl_UtfToExternal) /* 332 */
#define Tcl_UtfToExternalDString \
@@ -3932,6 +3944,14 @@ extern const TclStubs *tclStubsPtr;
(tclStubsPtr->tcl_UniCharToUtfDString) /* 647 */
#define Tcl_UtfToUniCharDString \
(tclStubsPtr->tcl_UtfToUniCharDString) /* 648 */
+#define Tcl_UtfCharComplete \
+ (tclStubsPtr->tcl_UtfCharComplete) /* 649 */
+#define Tcl_UtfNext \
+ (tclStubsPtr->tcl_UtfNext) /* 650 */
+#define Tcl_UtfPrev \
+ (tclStubsPtr->tcl_UtfPrev) /* 651 */
+#define Tcl_UniCharFold \
+ (tclStubsPtr->tcl_UniCharFold) /* 652 */
#endif /* defined(USE_TCL_STUBS) */
@@ -4177,10 +4197,16 @@ extern const TclStubs *tclStubsPtr;
#define Tcl_Close(interp, chan) Tcl_CloseEx(interp, chan, 0)
#endif
-#if defined(USE_TCL_STUBS) && (TCL_UTF_MAX > 3)
+#undef TclUtfCharComplete
+#undef TclUtfNext
+#undef TclUtfPrev
+#if defined(USE_TCL_STUBS) && (TCL_UTF_MAX < 4) && !defined(TCL_NO_DEPRECATED)
# undef Tcl_UtfCharComplete
-# define Tcl_UtfCharComplete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \
- ? ((length) >= 4) : tclStubsPtr->tcl_UtfCharComplete((src), (length)))
+# undef Tcl_UtfNext
+# undef Tcl_UtfPrev
+# define Tcl_UtfCharComplete (tclStubsPtr->tclUtfCharComplete)
+# define Tcl_UtfNext (tclStubsPtr->tclUtfNext)
+# define Tcl_UtfPrev (tclStubsPtr->tclUtfPrev)
#endif
#define Tcl_CreateSlave Tcl_CreateChild
#define Tcl_GetSlave Tcl_GetChild
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 48ab3cf..9718f37 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2323,7 +2323,7 @@ UtfToUtfProc(
dstEnd = dst + dstLen - TCL_UTF_MAX;
for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
- if ((src > srcClose) && (!TclUCS4Complete(src, srcEnd - src))) {
+ if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
/*
* If there is more string to follow, this will ensure that the
* last UTF-8 character in the source buffer hasn't been cut off.
@@ -2353,7 +2353,7 @@ UtfToUtfProc(
*dst++ = 0;
*chPtr = 0; /* reset surrogate handling */
src += 2;
- } else if (!TclUCS4Complete(src, srcEnd - src)) {
+ } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
/*
* Always check before using TclUtfToUCS4. Not doing can so
* cause it run beyond the end of the buffer! If we happen such an
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index 54c147d..d1d7037 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -5323,7 +5323,7 @@ TEBCresume(
} else if (TclIsPureByteArray(valuePtr)) {
objResultPtr = Tcl_NewByteArrayObj(
Tcl_GetByteArrayFromObj(valuePtr, NULL)+index, 1);
- } else if (valuePtr->bytes && length == valuePtr->length) {
+ } else if (valuePtr->bytes && length == valuePtr->length && !(valuePtr->bytes[index] & 0x80)) {
objResultPtr = Tcl_NewStringObj((const char *)
valuePtr->bytes+index, 1);
} else {
diff --git a/generic/tclIndexObj.c b/generic/tclIndexObj.c
index 6ae2075..dc1fe24 100644
--- a/generic/tclIndexObj.c
+++ b/generic/tclIndexObj.c
@@ -785,7 +785,7 @@ PrefixLongestObjCmd(
* Adjust in case we stopped in the middle of a UTF char.
*/
- resultLength = TclUtfPrev(&resultString[i+1],
+ resultLength = Tcl_UtfPrev(&resultString[i+1],
resultString) - resultString;
break;
}
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 9dde88b..71459d1 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3252,15 +3252,11 @@ MODULE_SCOPE int TclUtfCount(int ch);
#if TCL_UTF_MAX > 3
# define TclUtfToUCS4 Tcl_UtfToUniChar
# define TclUniCharToUCS4(src, ptr) (*ptr = *(src),1)
-# define TclUCS4Complete Tcl_UtfCharComplete
-# define TclChar16Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \
- ? ((length) >= 3) : Tcl_UtfCharComplete((src), (length)))
+# define TclUCS4Prev(src, ptr) (((src) > (ptr)) ? ((src) - 1) : (src))
#else
- MODULE_SCOPE int TclUtfToUCS4(const char *src, int *ucs4Ptr);
- MODULE_SCOPE int TclUniCharToUCS4(const Tcl_UniChar *src, int *ucs4Ptr);
-# define TclUCS4Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \
- ? ((length) >= 4) : Tcl_UtfCharComplete((src), (length)))
-# define TclChar16Complete Tcl_UtfCharComplete
+ MODULE_SCOPE int TclUtfToUCS4(const char *, int *);
+ MODULE_SCOPE int TclUniCharToUCS4(const Tcl_UniChar *, int *);
+ MODULE_SCOPE const Tcl_UniChar *TclUCS4Prev(const Tcl_UniChar *, const Tcl_UniChar *);
#endif
MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(ClientData clientData);
MODULE_SCOPE Tcl_Obj * TclpFilesystemPathType(Tcl_Obj *pathPtr);
@@ -4700,11 +4696,6 @@ MODULE_SCOPE const TclFileAttrProcs tclpFileAttrProcs[];
(numChars) = _count; \
} while (0);
-#define TclUtfPrev(src, start) \
- (((src) < (start) + 2) ? (start) : \
- ((unsigned char) *((src) - 1)) < 0x80 ? (src) - 1 : \
- Tcl_UtfPrev(src, start))
-
/*
*----------------------------------------------------------------
* Macro that encapsulates the logic that determines when it is safe to
diff --git a/generic/tclParse.c b/generic/tclParse.c
index daad31d..d56a41d 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -789,7 +789,7 @@ TclParseBackslash(
* written. At most 4 bytes will be written there. */
{
const char *p = src+1;
- Tcl_UniChar unichar = 0;
+ int unichar;
int result;
int count;
char buf[4] = "";
@@ -936,13 +936,13 @@ TclParseBackslash(
*/
if (Tcl_UtfCharComplete(p, numBytes - 1)) {
- count = TclUtfToUniChar(p, &unichar) + 1; /* +1 for '\' */
+ count = TclUtfToUCS4(p, &unichar) + 1; /* +1 for '\' */
} else {
- char utfBytes[4];
+ char utfBytes[8];
memcpy(utfBytes, p, numBytes - 1);
utfBytes[numBytes - 1] = '\0';
- count = TclUtfToUniChar(utfBytes, &unichar) + 1;
+ count = TclUtfToUCS4(utfBytes, &unichar) + 1;
}
result = unichar;
break;
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 03aceaf..522a740 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -1171,10 +1171,10 @@ Tcl_AppendLimitedToObj(
}
eLen = strlen(ellipsis);
while (eLen > limit) {
- eLen = TclUtfPrev(ellipsis+eLen, ellipsis) - ellipsis;
+ eLen = Tcl_UtfPrev(ellipsis+eLen, ellipsis) - ellipsis;
}
- toCopy = TclUtfPrev(bytes+limit+1-eLen, bytes) - bytes;
+ toCopy = Tcl_UtfPrev(bytes+limit+1-eLen, bytes) - bytes;
}
/*
@@ -2616,7 +2616,7 @@ AppendPrintfToObjVA(
* multi-byte characters.
*/
- q = TclUtfPrev(end, bytes);
+ q = Tcl_UtfPrev(end, bytes);
if (!Tcl_UtfCharComplete(q, (int)(end - q))) {
end = q;
}
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index 36cb9b5..903ad10 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -88,6 +88,32 @@ static void uniCodePanic(void) {
# define Tcl_UniCharNcmp (int(*)(const Tcl_UniChar *, const Tcl_UniChar *, unsigned long))(void *)uniCodePanic
#endif
+#define TclUtfCharComplete UtfCharComplete
+#define TclUtfNext UtfNext
+#define TclUtfPrev UtfPrev
+
+static int TclUtfCharComplete(const char *src, int length) {
+ if ((unsigned)((unsigned char)*(src) - 0xF0) < 5) {
+ return length < 3;
+ }
+ return Tcl_UtfCharComplete(src, length);
+}
+
+static const char *TclUtfNext(const char *src) {
+ if ((unsigned)((unsigned char)*(src) - 0xF0) < 5) {
+ return src + 1;
+ }
+ return Tcl_UtfNext(src);
+}
+
+static const char *TclUtfPrev(const char *src, const char *start) {
+ if ((src >= start + 3) && ((src[-1] & 0xC0) == 0x80)
+ && ((src[-2] & 0xC0) == 0x80) && ((src[-3] & 0xC0) == 0x80)) {
+ return src - 3;
+ }
+ return Tcl_UtfPrev(src, start);
+}
+
#define TclBN_mp_add mp_add
#define TclBN_mp_and mp_and
#define TclBN_mp_clamp mp_clamp
@@ -1549,12 +1575,12 @@ const TclStubs tclStubs = {
Tcl_UniCharToUpper, /* 323 */
Tcl_UniCharToUtf, /* 324 */
Tcl_UtfAtIndex, /* 325 */
- Tcl_UtfCharComplete, /* 326 */
+ TclUtfCharComplete, /* 326 */
Tcl_UtfBackslash, /* 327 */
Tcl_UtfFindFirst, /* 328 */
Tcl_UtfFindLast, /* 329 */
- Tcl_UtfNext, /* 330 */
- Tcl_UtfPrev, /* 331 */
+ TclUtfNext, /* 330 */
+ TclUtfPrev, /* 331 */
Tcl_UtfToExternal, /* 332 */
Tcl_UtfToExternalDString, /* 333 */
Tcl_UtfToLower, /* 334 */
@@ -1872,6 +1898,10 @@ const TclStubs tclStubs = {
Tcl_UtfToUniChar, /* 646 */
Tcl_UniCharToUtfDString, /* 647 */
Tcl_UtfToUniCharDString, /* 648 */
+ Tcl_UtfCharComplete, /* 649 */
+ Tcl_UtfNext, /* 650 */
+ Tcl_UtfPrev, /* 651 */
+ Tcl_UniCharFold, /* 652 */
};
/* !END!: Do not edit above this line. */
diff --git a/generic/tclTest.c b/generic/tclTest.c
index a8ca463..0c14e8f 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -19,6 +19,9 @@
#ifndef USE_TCL_STUBS
# define USE_TCL_STUBS
#endif
+#ifndef TCL_NO_DEPRECATED
+# define TCL_NO_DEPRECATED
+#endif
#include "tclInt.h"
#ifdef TCL_WITH_EXTERNAL_TOMMATH
# include "tommath.h"
@@ -6960,7 +6963,7 @@ TestUtfPrevCmd(
} else {
offset = numBytes;
}
- result = TclUtfPrev(bytes + offset, bytes);
+ result = Tcl_UtfPrev(bytes + offset, bytes);
Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes));
return TCL_OK;
}
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 11bde5c..807e087 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -64,20 +64,12 @@ static const unsigned char totalBytes[256] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-/* Tcl_UtfCharComplete() might point to 2nd byte of valid 4-byte sequence */
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
-/* End of "continuation byte section" */
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
-#if TCL_UTF_MAX > 3
- 4,4,4,4,4,
-#else
- 1,1,1,1,1,
-#endif
- 1,1,1,1,1,1,1,1,1,1,1
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1
};
-
+
static const unsigned char complete[256] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@@ -88,15 +80,9 @@ static const unsigned char complete[256] = {
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
/* End of "continuation byte section" */
2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
-#if TCL_UTF_MAX > 3
- 4,4,4,4,4,
-#else
- 3,3,3,3,3,
-#endif
- 1,1,1,1,1,1,1,1,1,1,1
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1
};
-
+
/*
* Functions used only in this module.
*/
@@ -694,7 +680,7 @@ Tcl_UtfToUniCharDString(
p += TclUtfToUCS4(p, &ch);
*w++ = ch;
}
- while ((p < endPtr) && TclUCS4Complete(p, endPtr-p)) {
+ while ((p < endPtr) && Tcl_UtfCharComplete(p, endPtr-p)) {
p += TclUtfToUCS4(p, &ch);
*w++ = ch;
}
@@ -752,7 +738,7 @@ Tcl_UtfToChar16DString(
*w++ = ch;
}
while (p < endPtr) {
- if (TclChar16Complete(p, endPtr-p)) {
+ if (Tcl_UtfCharComplete(p, endPtr-p)) {
p += Tcl_UtfToChar16(p, &ch);
*w++ = ch;
} else {
@@ -833,7 +819,7 @@ Tcl_NumUtfChars(
/* Pointer to the end of string. Never read endPtr[0] */
const char *endPtr = src + length;
/* Pointer to last byte where optimization still can be used */
- const char *optPtr = endPtr - TCL_UTF_MAX;
+ const char *optPtr = endPtr - 4;
/*
* Optimize away the call in this loop. Justified because...
@@ -1064,7 +1050,7 @@ Tcl_UtfPrev(
* it (the fallback) is correct.
*/
- || (trailBytesSeen >= complete[byte])) {
+ || (trailBytesSeen >= totalBytes[byte])) {
/*
* That is, (1 + trailBytesSeen > needed).
* We've examined more bytes than needed to complete
@@ -1105,19 +1091,14 @@ Tcl_UtfPrev(
/* Continue the search backwards... */
look--;
- } while (trailBytesSeen < TCL_UTF_MAX);
+ } while (trailBytesSeen < 4);
/*
- * We've seen TCL_UTF_MAX trail bytes, so we know there will not be a
+ * We've seen 4 trail bytes, so we know there will not be a
* properly formed byte sequence to find, and we can stop looking,
- * accepting the fallback (for TCL_UTF_MAX > 3) or just go back as
- * far as we can.
+ * accepting the fallback.
*/
-#if TCL_UTF_MAX > 3
return fallback;
-#else
- return src - TCL_UTF_MAX;
-#endif
}
/*
@@ -1576,8 +1557,8 @@ Tcl_UtfNcasecmp(
return -ch2;
}
#endif
- ch1 = Tcl_UniCharToLower(ch1);
- ch2 = Tcl_UniCharToLower(ch2);
+ ch1 = Tcl_UniCharFold(ch1);
+ ch2 = Tcl_UniCharFold(ch2);
if (ch1 != ch2) {
return (ch1 - ch2);
}
@@ -1671,8 +1652,8 @@ TclUtfCasecmp(
return -ch2;
}
#endif
- ch1 = Tcl_UniCharToLower(ch1);
- ch2 = Tcl_UniCharToLower(ch2);
+ ch1 = Tcl_UniCharFold(ch1);
+ ch2 = Tcl_UniCharFold(ch2);
if (ch1 != ch2) {
return ch1 - ch2;
}
@@ -1744,6 +1725,38 @@ Tcl_UniCharToLower(
/* Clear away extension bits, if any */
return ch & 0x1FFFFF;
}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_UniCharFold --
+ *
+ * Compute the lowercase equivalent of the given Unicode character.
+ *
+ * Results:
+ * Returns the lowercase Unicode character.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharFold(
+ int ch) /* Unicode character to convert. */
+{
+ if (!UNICODE_OUT_OF_RANGE(ch)) {
+ int info = GetUniCharInfo(ch);
+ int mode = GetCaseType(info);
+
+ if ((mode & 0x02) && (mode != 0x7)) {
+ ch += GetDelta(info);
+ }
+ }
+ /* Clear away extension bits, if any */
+ return ch & 0x1FFFFF;
+}
/*
*----------------------------------------------------------------------
@@ -1885,8 +1898,8 @@ Tcl_UniCharNcasecmp(
{
for ( ; numChars != 0; numChars--, ucs++, uct++) {
if (*ucs != *uct) {
- Tcl_UniChar lcs = Tcl_UniCharToLower(*ucs);
- Tcl_UniChar lct = Tcl_UniCharToLower(*uct);
+ Tcl_UniChar lcs = Tcl_UniCharFold(*ucs);
+ Tcl_UniChar lct = Tcl_UniCharFold(*uct);
if (lcs != lct) {
return (lcs - lct);
@@ -2274,7 +2287,7 @@ Tcl_UniCharCaseMatch(
return 1;
}
if (nocase) {
- p = Tcl_UniCharToLower(p);
+ p = Tcl_UniCharFold(p);
}
while (1) {
/*
@@ -2326,13 +2339,13 @@ Tcl_UniCharCaseMatch(
Tcl_UniChar startChar, endChar;
uniPattern++;
- ch1 = (nocase ? Tcl_UniCharToLower(*uniStr) : *uniStr);
+ ch1 = (nocase ? Tcl_UniCharFold(*uniStr) : *uniStr);
uniStr++;
while (1) {
if ((*uniPattern == ']') || (*uniPattern == 0)) {
return 0;
}
- startChar = (nocase ? Tcl_UniCharToLower(*uniPattern)
+ startChar = (nocase ? Tcl_UniCharFold(*uniPattern)
: *uniPattern);
uniPattern++;
if (*uniPattern == '-') {
@@ -2340,7 +2353,7 @@ Tcl_UniCharCaseMatch(
if (*uniPattern == 0) {
return 0;
}
- endChar = (nocase ? Tcl_UniCharToLower(*uniPattern)
+ endChar = (nocase ? Tcl_UniCharFold(*uniPattern)
: *uniPattern);
uniPattern++;
if (((startChar <= ch1) && (ch1 <= endChar))
@@ -2382,8 +2395,8 @@ Tcl_UniCharCaseMatch(
*/
if (nocase) {
- if (Tcl_UniCharToLower(*uniStr) !=
- Tcl_UniCharToLower(*uniPattern)) {
+ if (Tcl_UniCharFold(*uniStr) !=
+ Tcl_UniCharFold(*uniPattern)) {
return 0;
}
} else if (*uniStr != *uniPattern) {
@@ -2466,7 +2479,7 @@ TclUniCharMatch(
}
p = *pattern;
if (nocase) {
- p = Tcl_UniCharToLower(p);
+ p = Tcl_UniCharFold(p);
}
while (1) {
/*
@@ -2478,7 +2491,7 @@ TclUniCharMatch(
if ((p != '[') && (p != '?') && (p != '\\')) {
if (nocase) {
while ((string < stringEnd) && (p != *string)
- && (p != Tcl_UniCharToLower(*string))) {
+ && (p != Tcl_UniCharFold(*string))) {
string++;
}
} else {
@@ -2519,20 +2532,20 @@ TclUniCharMatch(
Tcl_UniChar ch1, startChar, endChar;
pattern++;
- ch1 = (nocase ? Tcl_UniCharToLower(*string) : *string);
+ ch1 = (nocase ? Tcl_UniCharFold(*string) : *string);
string++;
while (1) {
if ((*pattern == ']') || (pattern == patternEnd)) {
return 0;
}
- startChar = (nocase ? Tcl_UniCharToLower(*pattern) : *pattern);
+ startChar = (nocase ? Tcl_UniCharFold(*pattern) : *pattern);
pattern++;
if (*pattern == '-') {
pattern++;
if (pattern == patternEnd) {
return 0;
}
- endChar = (nocase ? Tcl_UniCharToLower(*pattern)
+ endChar = (nocase ? Tcl_UniCharFold(*pattern)
: *pattern);
pattern++;
if (((startChar <= ch1) && (ch1 <= endChar))
@@ -2574,7 +2587,7 @@ TclUniCharMatch(
*/
if (nocase) {
- if (Tcl_UniCharToLower(*string) != Tcl_UniCharToLower(*pattern)) {
+ if (Tcl_UniCharFold(*string) != Tcl_UniCharFold(*pattern)) {
return 0;
}
} else if (*string != *pattern) {
@@ -2629,12 +2642,25 @@ TclUniCharToUCS4(
* by the Tcl_UniChar string. */
{
if (((src[0] & 0xFC00) == 0xD800) && ((src[1] & 0xFC00) == 0xDC00)) {
- *ucs4Ptr = (((src[0] & 0x3FF) << 10) | (src[01] & 0x3FF)) + 0x10000;
+ *ucs4Ptr = (((src[0] & 0x3FF) << 10) | (src[1] & 0x3FF)) + 0x10000;
return 2;
}
*ucs4Ptr = src[0];
return 1;
}
+
+const Tcl_UniChar *TclUCS4Prev(const Tcl_UniChar *src, const Tcl_UniChar *ptr) {
+ if (src <= ptr + 1) {
+ return ptr;
+ }
+ if (((src[-1] & 0xFC00) == 0xDC00) && ((src[-2] & 0xFC00) == 0xD800)) {
+ return src - 2;
+ }
+ return src - 1;
+}
+
+
+
#endif
/*
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 170a85e..0cf5d98 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -1707,7 +1707,7 @@ TclTrimRight(
const char *q = trim;
int pInc = 0, bytesLeft = numTrim;
- pp = TclUtfPrev(p, bytes);
+ pp = Tcl_UtfPrev(p, bytes);
do {
pp += pInc;
pInc = TclUtfToUCS4(pp, &ch1);
@@ -2208,7 +2208,7 @@ Tcl_StringCaseMatch(
} else {
TclUtfToUCS4(pattern, &ch2);
if (nocase) {
- ch2 = Tcl_UniCharToLower(ch2);
+ ch2 = Tcl_UniCharFold(ch2);
}
}
@@ -2223,7 +2223,7 @@ Tcl_StringCaseMatch(
if (nocase) {
while (*str) {
charLen = TclUtfToUCS4(str, &ch1);
- if (ch2==ch1 || ch2==Tcl_UniCharToLower(ch1)) {
+ if (ch2==ch1 || ch2==Tcl_UniCharFold(ch1)) {
break;
}
str += charLen;
@@ -2282,7 +2282,7 @@ Tcl_StringCaseMatch(
} else {
str += TclUtfToUCS4(str, &ch1);
if (nocase) {
- ch1 = Tcl_UniCharToLower(ch1);
+ ch1 = Tcl_UniCharFold(ch1);
}
}
while (1) {
@@ -2296,7 +2296,7 @@ Tcl_StringCaseMatch(
} else {
pattern += TclUtfToUCS4(pattern, &startChar);
if (nocase) {
- startChar = Tcl_UniCharToLower(startChar);
+ startChar = Tcl_UniCharFold(startChar);
}
}
if (*pattern == '-') {
@@ -2311,7 +2311,7 @@ Tcl_StringCaseMatch(
} else {
pattern += TclUtfToUCS4(pattern, &endChar);
if (nocase) {
- endChar = Tcl_UniCharToLower(endChar);
+ endChar = Tcl_UniCharFold(endChar);
}
}
if (((startChar <= ch1) && (ch1 <= endChar))
@@ -2360,7 +2360,7 @@ Tcl_StringCaseMatch(
str += TclUtfToUCS4(str, &ch1);
pattern += TclUtfToUCS4(pattern, &ch2);
if (nocase) {
- if (Tcl_UniCharToLower(ch1) != Tcl_UniCharToLower(ch2)) {
+ if (Tcl_UniCharFold(ch1) != Tcl_UniCharFold(ch2)) {
return 0;
}
} else if (ch1 != ch2) {
diff --git a/tests/safe-stock86.test b/tests/safe-stock86.test
deleted file mode 100644
index 72e9d34..0000000
--- a/tests/safe-stock86.test
+++ /dev/null
@@ -1,117 +0,0 @@
-# safe-stock86.test --
-#
-# This file contains tests for safe Tcl that were previously in the file
-# safe.test, and use files and packages of stock Tcl 8.6 to perform the tests.
-# These files may be changed or disappear in future revisions of Tcl,
-# for example package http 1.0 will be removed from Tcl 8.7.
-#
-# The tests are replaced in safe.tcl with tests that use files provided in the
-# tests directory. Test numbering is for comparison with similar tests in
-# safe.test.
-#
-# Sourcing this file into tcl runs the tests and generates output for errors.
-# No output means no errors were found.
-#
-# Copyright (c) 1995-1996 Sun Microsystems, Inc.
-# Copyright (c) 1998-1999 by Scriptics Corporation.
-#
-# See the file "license.terms" for information on usage and redistribution of
-# this file, and for a DISCLAIMER OF ALL WARRANTIES.
-
-package require Tcl 8.5-
-
-if {[lsearch [namespace children] ::tcltest] == -1} {
- package require tcltest 2
- namespace import -force ::tcltest::*
-}
-
-foreach i [interp slaves] {
- interp delete $i
-}
-
-set SaveAutoPath $::auto_path
-set ::auto_path [info library]
-set TestsDir [file normalize [file dirname [info script]]]
-set PathMapp [list $tcl_library TCLLIB $TestsDir TESTSDIR]
-
-proc mapList {map listIn} {
- set listOut {}
- foreach element $listIn {
- lappend listOut [string map $map $element]
- }
- return $listOut
-}
-
-# Force actual loading of the safe package because we use un-exported (and
-# thus un-autoindexed) APIs in this test result arguments:
-catch {safe::interpConfigure}
-
-# testing that nested and statics do what is advertised (we use a static
-# package - Tcltest - but it might be absent if we're in standard tclsh)
-
-testConstraint TcltestPackage [expr {![catch {package require Tcltest}]}]
-
-# high level general test
-test safe-stock86-7.1 {tests that everything works at high level, uses http 2} -body {
- set i [safe::interpCreate]
- # no error shall occur:
- # (because the default access_path shall include 1st level sub dirs so
- # package require in a slave works like in the master)
- set v [interp eval $i {package require http 2}]
- # no error shall occur:
- interp eval $i {http::config}
- safe::interpDelete $i
- set v
-} -match glob -result 2.*
-test safe-stock86-7.2 {tests specific path and interpFind/AddToAccessPath, uses http1.0} -body {
- set i [safe::interpCreate -nostat -nested 1 -accessPath [list [info library]]]
- # should not add anything (p0)
- set token1 [safe::interpAddToAccessPath $i [info library]]
- # should add as p1
- set token2 [safe::interpAddToAccessPath $i "/dummy/unixlike/test/path"]
- set confA [safe::interpConfigure $i]
- set mappA [mapList $PathMapp [dict get $confA -accessPath]]
- # an error shall occur (http is not anymore in the secure 0-level
- # provided deep path)
- list $token1 $token2 -- \
- [catch {interp eval $i {package require http 1}} msg] $msg -- \
- $mappA -- [safe::interpDelete $i]
-} -match glob -result {{$p(:0:)} {$p(:*:)} -- 1 {can't find package http 1} --\
- {TCLLIB */dummy/unixlike/test/path} -- {}}
-# Disable because http 1 is no longer present in the Tcl 8.7 distribution.
-test safe-stock86-7.4 {tests specific path and positive search, uses http1.0} -constraints nonPortable -body {
- set i [safe::interpCreate -nostat -nested 1 -accessPath [list [info library]]]
- # should not add anything (p0)
- set token1 [safe::interpAddToAccessPath $i [info library]]
- # should add as p1
- set token2 [safe::interpAddToAccessPath $i [file join [info library] http1.0]]
- set confA [safe::interpConfigure $i]
- set mappA [mapList $PathMapp [dict get $confA -accessPath]]
- # this time, unlike test safe-stock86-7.2, http should be found
- list $token1 $token2 -- \
- [catch {interp eval $i {package require http 1}} msg] $msg -- \
- $mappA -- [safe::interpDelete $i]
-} -match glob -result {{$p(:0:)} {$p(:*:)} -- 0 1.0 -- {TCLLIB *TCLLIB/http1.0} -- {}}
-
-# The following test checks whether the definition of tcl_endOfWord can be
-# obtained from auto_loading. It was previously test "safe-5.1".
-test safe-stock86-9.8 {test auto-loading in safe interpreters, was test 5.1} -setup {
- catch {safe::interpDelete a}
- safe::interpCreate a
-} -body {
- interp eval a {tcl_endOfWord "" 0}
-} -cleanup {
- safe::interpDelete a
-} -result -1
-
-set ::auto_path $SaveAutoPath
-unset SaveAutoPath TestsDir PathMapp
-rename mapList {}
-
-# cleanup
-::tcltest::cleanupTests
-return
-
-# Local Variables:
-# mode: tcl
-# End:
diff --git a/tests/string.test b/tests/string.test
index 4a8746d..47aec29 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -74,7 +74,7 @@ if {$noComp} {
test string-1.1.$noComp {error conditions} {
list [catch {run {string gorp a b}} msg] $msg
-} {1 {unknown or ambiguous subcommand "gorp": must be bytelength, cat, compare, equal, first, index, insert, is, last, length, map, match, range, repeat, replace, reverse, tolower, totitle, toupper, trim, trimleft, trimright, wordend, or wordstart}}
+} {1 {unknown or ambiguous subcommand "gorp": must be bytelength, cat, charend, charstart, compare, equal, first, index, insert, is, last, length, map, match, range, repeat, replace, reverse, tolower, totitle, toupper, trim, trimleft, trimright, wordend, or wordstart}}
test string-1.2.$noComp {error conditions} {
list [catch {run {string}} msg] $msg
} {1 {wrong # args: should be "string subcommand ?arg ...?"}}
@@ -1818,7 +1818,7 @@ test string-20.1.$noComp {string trimright errors} {
} {1 {wrong # args: should be "string trimright string ?chars?"}}
test string-20.2.$noComp {string trimright errors} {
list [catch {run {string trimg a}} msg] $msg
-} {1 {unknown or ambiguous subcommand "trimg": must be bytelength, cat, compare, equal, first, index, insert, is, last, length, map, match, range, repeat, replace, reverse, tolower, totitle, toupper, trim, trimleft, trimright, wordend, or wordstart}}
+} {1 {unknown or ambiguous subcommand "trimg": must be bytelength, cat, charend, charstart, compare, equal, first, index, insert, is, last, length, map, match, range, repeat, replace, reverse, tolower, totitle, toupper, trim, trimleft, trimright, wordend, or wordstart}}
test string-20.3.$noComp {string trimright} {
run {string trimright " XYZ "}
} { XYZ}
@@ -1911,7 +1911,7 @@ test string-21.16.$noComp {string wordend, unicode} -constraints utf16 -body {
test string-22.1.$noComp {string wordstart} -body {
list [catch {run {string word a}} msg] $msg
-} -result {1 {unknown or ambiguous subcommand "word": must be bytelength, cat, compare, equal, first, index, insert, is, last, length, map, match, range, repeat, replace, reverse, tolower, totitle, toupper, trim, trimleft, trimright, wordend, or wordstart}}
+} -result {1 {unknown or ambiguous subcommand "word": must be bytelength, cat, charend, charstart, compare, equal, first, index, insert, is, last, length, map, match, range, repeat, replace, reverse, tolower, totitle, toupper, trim, trimleft, trimright, wordend, or wordstart}}
test string-22.2.$noComp {string wordstart} -body {
list [catch {run {string wordstart a}} msg] $msg
} -result {1 {wrong # args: should be "string wordstart string index"}}
@@ -2550,6 +2550,106 @@ test string-32.17.$noComp {string is dict, valid dict packed in invalid dict} {
string is dict {{a b c d e f g h}}
} 0
+test string-33.1.$noComp {string charend} -body {
+ list [catch {run {string charend a}} msg] $msg
+} -result {1 {wrong # args: should be "string charend string index"}}
+test string-33.2.$noComp {string charend} -body {
+ list [catch {run {string charend a b c}} msg] $msg
+} -result {1 {wrong # args: should be "string charend string index"}}
+test string-33.3.$noComp {string charend} -body {
+ list [catch {run {string charend a gorp}} msg] $msg
+} -result {1 {bad index "gorp": must be integer?[+-]integer? or end?[+-]integer?}}
+test string-33.4.$noComp {string charend} -body {
+ run {string charend abc. -1}
+} -result 0
+test string-33.5.$noComp {string charend} -body {
+ run {string charend abc. 100}
+} -result 4
+test string-33.6.$noComp {string charend} -body {
+ run {string charend "word_one two three" 2}
+} -result 3
+test string-33.7.$noComp {string charend} -body {
+ run {string charend "one .&# three" 5}
+} -result 6
+test string-33.8.$noComp {string charend} -body {
+ run {string worde "x.y" 0}
+} -result 1
+test string-33.9.$noComp {string charend} -body {
+ run {string worde "x.y" end-1}
+} -result 2
+test string-33.10.$noComp {string charend, unicode} -body {
+ run {string charend "xyz\xC7de fg" 0}
+} -result 1
+test string-33.11.$noComp {string charend, unicode} -body {
+ run {string charend "xyz\uC700de fg" 0}
+} -result 1
+test string-33.12.$noComp {string charend, unicode} -body {
+ run {string charend "xyz\u203Fde fg" 0}
+} -result 1
+test string-33.13.$noComp {string charend, unicode} -body {
+ run {string charend "xyz\u2045de fg" 0}
+} -result 1
+test string-33.14.$noComp {string charend, unicode} -body {
+ run {string charend "\uC700\uC700 abc" 8}
+} -result 6
+test string-33.15.$noComp {string charend, unicode} -constraints utf16 -body {
+ run {string charend "\U1D7CA\U1D7CA abc" 0}
+} -result 2
+test string-33.16.$noComp {string charend, unicode} -constraints utf16 -body {
+ run {string charend "\U1D7CA\U1D7CA abc" 10}
+} -result 8
+
+test string-34.1.$noComp {string charstart} -body {
+ list [catch {run {string word a}} msg] $msg
+} -match regexp -result {1 {unknown or ambiguous subcommand "word": must be (bytelength, |)cat, charend, charstart, compare, equal, first, index, insert, is, last, length, map, match, range, repeat, replace, reverse, tolower, totitle, toupper, trim, trimleft, trimright, wordend, or wordstart}}
+test string-34.2.$noComp {string charstart} -body {
+ list [catch {run {string charstart a}} msg] $msg
+} -result {1 {wrong # args: should be "string charstart string index"}}
+test string-34.3.$noComp {string charstart} -body {
+ list [catch {run {string charstart a b c}} msg] $msg
+} -result {1 {wrong # args: should be "string charstart string index"}}
+test string-34.4.$noComp {string charstart} -body {
+ list [catch {run {string charstart a gorp}} msg] $msg
+} -result {1 {bad index "gorp": must be integer?[+-]integer? or end?[+-]integer?}}
+test string-34.5.$noComp {string charstart} -body {
+ run {string charstart "one two three_words" 400}
+} -result 19
+test string-34.6.$noComp {string charstart} -body {
+ run {string charstart "one two three_words" 2}
+} -result 2
+test string-34.7.$noComp {string charstart} -body {
+ run {string charstart "one two three_words" -2}
+} -result -1
+test string-34.8.$noComp {string charstart} -body {
+ run {string charstart "one .*&^ three" 6}
+} -result 6
+test string-34.9.$noComp {string charstart} -body {
+ run {string charstart "one two three" 4}
+} -result 4
+test string-34.10.$noComp {string charstart} -body {
+ run {string charstart "one two three" end-5}
+} -result 7
+test string-34.11.$noComp {string charstart, unicode} -body {
+ run {string charstart "one tw\xC7o three" 7}
+} -result 7
+test string-34.12.$noComp {string charstart, unicode} -body {
+ run {string charstart "ab\uC700\uC700 cdef ghi" 12}
+} -result 12
+test string-34.13.$noComp {string charstart, unicode} -body {
+ run {string charstart "\uC700\uC700 abc" 8}
+} -result 6
+test string-34.14.$noComp {string charstart, invalid UTF-8} -constraints testbytestring -body {
+ # See Bug c61818e4c9
+ set demo [testbytestring "abc def\xE0\xA9ghi"]
+ run {string index $demo [string charstart $demo 10]}
+} -result h
+test string-34.15.$noComp {string charstart, unicode} -body {
+ run {string charstart "\U1D7CA\U1D7CA abc" 0}
+} -result 0
+test string-34.16.$noComp {string charstart, unicode} -constraints utf16 -body {
+ run {string charstart "\U1D7CA\U1D7CA abc" 10}
+} -result 8
+
}; # foreach noComp {0 1}
# cleanup
diff --git a/tests/utf.test b/tests/utf.test
index 935830c..3a75726 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -216,7 +216,10 @@ test utf-6.9 {Tcl_UtfNext} {testutfnext testbytestring} {
test utf-6.10 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xA0]G
} 1
-test utf-6.11 {Tcl_UtfNext} {testutfnext testbytestring} {
+test utf-6.11.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
+ testutfnext [testbytestring \xA0\xA0\x00]
+} 1
+test utf-6.11.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xA0\xA0\x00]
} 2
test utf-6.12 {Tcl_UtfNext} {testutfnext testbytestring} {
@@ -273,19 +276,19 @@ test utf-6.28 {Tcl_UtfNext} {testutfnext testbytestring} {
test utf-6.29 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xE8\xF8]
} 1
-test utf-6.30.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.30.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2]
} 1
-test utf-6.30.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.30.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2]
} -1
test utf-6.31 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2]G
} 1
-test utf-6.32.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.32.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0]
} 1
-test utf-6.32.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.32.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0]
} -1
test utf-6.33 {Tcl_UtfNext} {testutfnext testbytestring} {
@@ -396,10 +399,10 @@ test utf-6.67 {Tcl_UtfNext} {testutfnext testbytestring} {
test utf-6.68 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\xA0]G
} 1
-test utf-6.69.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.69.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0]
} 1
-test utf-6.69.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.69.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0]
} 4
test utf-6.70 {Tcl_UtfNext} {testutfnext testbytestring} {
@@ -414,40 +417,40 @@ test utf-6.72 {Tcl_UtfNext} {testutfnext testbytestring} {
test utf-6.73 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\xA0\xF8]
} 1
-test utf-6.74.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.74.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0]G
} 1
-test utf-6.74.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.74.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0]G
} 4
-test utf-6.75.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.75.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0]
} 1
-test utf-6.75.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.75.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0]
} 4
-test utf-6.76.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.76.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xD0]
} 1
-test utf-6.76.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.76.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xD0]
} 4
-test utf-6.77.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.77.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xE8]
} 1
-test utf-6.77.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.77.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xE8]
} 4
-test utf-6.78.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.78.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xF2]
} 1
-test utf-6.78.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.78.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xF2]
} 4
-test utf-6.79.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.79.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0G\xF8]
} 1
-test utf-6.79.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.79.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0G\xF8]
} 4
test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext {
@@ -471,37 +474,55 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} {
test utf-6.86 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} {
testutfnext [testbytestring \xF0\x80\x80\x80]
} 1
-test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF0\x90\x80\x80]
} 1
-test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring ucs4} {
+test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF0\x90\x80\x80]
} 4
-test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring} {
+test utf-6.88.0 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring ucs2} {
+ testutfnext [testbytestring \xA0\xA0\x00]
+} 1
+test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xA0\xA0\x00]
} 2
-test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring} {
+test utf-6.89.0 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring ucs2} {
+ testutfnext [testbytestring \x80\x80\x00]
+} 2
+test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \x80\x80\x00]
} 2
-test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF4\x8F\xBF\xBF]
} 1
-test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring ucs4} {
+test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF4\x8F\xBF\xBF]
} 4
test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring} {
testutfnext [testbytestring \xF4\x90\x80\x80]
} 1
-test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring} {
+test utf-6.92.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring ucs2} {
+ testutfnext [testbytestring \xA0\xA0\xA0]
+} 1
+test utf-6.92.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xA0\xA0\xA0]
} 3
-test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring} {
+test utf-6.93.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring ucs2} {
+ testutfnext [testbytestring \x80\x80\x80]
+} 1
+test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \x80\x80\x80]
} 3
-test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring} {
+test utf-6.94.0 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring ucs2} {
+ testutfnext [testbytestring \xA0\xA0\xA0\xA0]
+} 1
+test utf-6.94.1 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xA0\xA0\xA0\xA0]
} 3
-test utf-6.95 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring} {
+test utf-6.95.0 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring ucs2} {
+ testutfnext [testbytestring \x80\x80\x80\x80]
+} 1
+test utf-6.95.1 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \x80\x80\x80\x80]
} 3
@@ -661,30 +682,33 @@ test utf-7.17.1 {Tcl_UtfPrev} {testutfprev testbytestring} {
test utf-7.17.2 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xD0\xA0\xA0\xF8] 4
} 3
-test utf-7.18.0 {Tcl_UtfPrev} {testutfprev testbytestring utf16} {
+test utf-7.18.0 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xA0\xA0\xA0]
-} 1
-test utf-7.18.1 {Tcl_UtfPrev} {testutfprev testbytestring utf16} {
+} 3
+test utf-7.18.1 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xA0\xA0\xA0\xA0] 4
-} 1
-test utf-7.18.2 {Tcl_UtfPrev} {testutfprev testbytestring utf16} {
+} 3
+test utf-7.18.2 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xA0\xA0\xA0\xF8] 4
-} 1
-test utf-7.19 {Tcl_UtfPrev} {testutfprev testbytestring utf16} {
+} 3
+test utf-7.19 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF8\xA0\xA0\xA0]
-} 2
-test utf-7.20 {Tcl_UtfPrev} {testutfprev testbytestring utf16} {
+} 4
+test utf-7.20.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} {
testutfprev A[testbytestring \xF2\xA0\xA0\xA0]
-} 2
-test utf-7.21 {Tcl_UtfPrev} {testutfprev testbytestring utf16} {
+} 4
+test utf-7.20.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} {
+ testutfprev A[testbytestring \xF2\xA0\xA0\xA0]
+} 1
+test utf-7.21 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A\u8820[testbytestring \xA0]
-} 2
-test utf-7.22 {Tcl_UtfPrev} {testutfprev testbytestring utf16} {
+} 4
+test utf-7.22 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xD0\xA0\xA0\xA0]
-} 2
-test utf-7.23 {Tcl_UtfPrev} {testutfprev testbytestring utf16} {
+} 4
+test utf-7.23 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xA0\xA0\xA0\xA0]
-} 2
+} 4
test utf-7.24 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
testutfprev A[testbytestring \xC0\x81]
} 2
@@ -706,9 +730,9 @@ test utf-7.28 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
test utf-7.28.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
testutfprev A[testbytestring \xE0\x80\x80] 2
} 1
-test utf-7.29 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring utf16} {
+test utf-7.29 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF0\x80\x80\x80]
-} 2
+} 4
test utf-7.30 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF0\x80\x80\x80] 4
} 3
@@ -736,9 +760,12 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
test utf-7.38 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
testutfprev A[testbytestring \xE0\xA0\x80] 2
} 1
-test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring utf16} {
+test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} {
testutfprev A[testbytestring \xF0\x90\x80\x80]
-} 2
+} 4
+test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring fullutf} {
+ testutfprev A[testbytestring \xF0\x90\x80\x80]
+} 1
test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} {
testutfprev A[testbytestring \xF0\x90\x80\x80] 4
} 3
@@ -763,9 +790,9 @@ test utf-7.44 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestrin
test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring} {
testutfprev [testbytestring \xA0\xA0\xA0]
} 2
-test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring utf16} {
+test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring} {
testutfprev [testbytestring \xA0\xA0\xA0\xA0]
-} 1
+} 3
test utf-7.47 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev testbytestring} {
testutfprev [testbytestring \xE8\xA0]
} 0
@@ -775,27 +802,30 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} tes
test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev testbytestring} {
testutfprev [testbytestring \xE8\xA0\x00] 2
} 0
-test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring utf16} {
+test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} {
testutfprev A[testbytestring \xF4\x8F\xBF\xBF]
-} 2
-test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} {
+} 4
+test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} {
+ testutfprev A[testbytestring \xF4\x8F\xBF\xBF]
+} 1
+test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} {
testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 4
} 3
-test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} {
+test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} {
testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 4
} 1
-test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} {
+test utf-7.48.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} {
testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 3
} 2
-test utf-7.48.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} {
+test utf-7.48.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} {
testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 3
} 1
-test utf-7.48.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} {
+test utf-7.48.6 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 2
} 1
-test utf-7.49.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring utf16} {
+test utf-7.49.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF4\x90\x80\x80]
-} 2
+} 4
test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF4\x90\x80\x80] 4
} 3