summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--doc/Utf.311
-rw-r--r--generic/tcl.decls17
-rw-r--r--generic/tclCompExpr.c2
-rw-r--r--generic/tclDecls.h51
-rw-r--r--generic/tclEncoding.c4
-rw-r--r--generic/tclIndexObj.c2
-rw-r--r--generic/tclInt.h11
-rw-r--r--generic/tclParse.c2
-rw-r--r--generic/tclStringObj.c6
-rw-r--r--generic/tclStubInit.c35
-rw-r--r--generic/tclTest.c5
-rw-r--r--generic/tclUtf.c47
-rw-r--r--generic/tclUtil.c6
-rw-r--r--tests/utf.test150
14 files changed, 205 insertions, 144 deletions
diff --git a/doc/Utf.3 b/doc/Utf.3
index 263d4dd..f00ddf8 100644
--- a/doc/Utf.3
+++ b/doc/Utf.3
@@ -233,10 +233,10 @@ characters.
.PP
\fBTcl_UtfCharComplete\fR returns 1 if the source UTF-8 string \fIsrc\fR
of \fIlength\fR bytes is long enough to be decoded by
-\fBTcl_UtfToUniChar\fR, or 0 otherwise. This function does not guarantee
-that the UTF-8 string is properly formed. This routine is used by
-procedures that are operating on a byte at a time and need to know if a
-full Unicode character has been seen.
+\fBTcl_UtfToUniChar\fR/\fBTcl_UtfNext\fR, or 0 otherwise. This function
+does not guarantee that the UTF-8 string is properly formed. This routine
+is used by procedures that are operating on a byte at a time and need to
+know if a full Unicode character has been seen.
.PP
\fBTcl_NumUtfChars\fR corresponds to \fBstrlen\fR for UTF-8 strings. It
returns the number of Tcl_UniChars that are represented by the UTF-8 string
@@ -257,7 +257,8 @@ Given \fIsrc\fR, a pointer to some location in a UTF-8 string,
\fBTcl_UtfNext\fR returns a pointer to the next UTF-8 character in the
string. The caller must not ask for the next character after the last
character in the string if the string is not terminated by a null
-character.
+character. \fBTcl_UtfCharComplete\fR can be used in that case to
+make sure enough bytes are available before calling \fBTcl_UtfNext\fR.
.PP
\fBTcl_UtfPrev\fR is used to step backward through but not beyond the
UTF-8 string that begins at \fIstart\fR. If the UTF-8 string is made
diff --git a/generic/tcl.decls b/generic/tcl.decls
index 5895946..b9744ba 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -1163,7 +1163,7 @@ declare 325 {
const char *Tcl_UtfAtIndex(const char *src, int index)
}
declare 326 {
- int Tcl_UtfCharComplete(const char *src, int length)
+ int TclUtfCharComplete(const char *src, int length)
}
declare 327 {
int Tcl_UtfBackslash(const char *src, int *readPtr, char *dst)
@@ -1175,10 +1175,10 @@ declare 329 {
const char *Tcl_UtfFindLast(const char *src, int ch)
}
declare 330 {
- const char *Tcl_UtfNext(const char *src)
+ const char *TclUtfNext(const char *src)
}
declare 331 {
- const char *Tcl_UtfPrev(const char *src, const char *start)
+ const char *TclUtfPrev(const char *src, const char *start)
}
declare 332 {
int Tcl_UtfToExternal(Tcl_Interp *interp, Tcl_Encoding encoding,
@@ -2413,6 +2413,17 @@ declare 653 {
unsigned char *TclGetByteArrayFromObj(Tcl_Obj *objPtr, size_t *lengthPtr)
}
+# TIP #575
+declare 654 {
+ int Tcl_UtfCharComplete(const char *src, int length)
+}
+declare 655 {
+ const char *Tcl_UtfNext(const char *src)
+}
+declare 656 {
+ const char *Tcl_UtfPrev(const char *src, const char *start)
+}
+
# ----- BASELINE -- FOR -- 8.7.0 ----- #
##############################################################################
diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c
index fa15fba..03aebe3 100644
--- a/generic/tclCompExpr.c
+++ b/generic/tclCompExpr.c
@@ -2145,7 +2145,7 @@ ParseLexeme(
*/
if (!TclIsBareword(*start) || *start == '_') {
- if (TclUCS4Complete(start, numBytes)) {
+ if (Tcl_UtfCharComplete(start, numBytes)) {
scanned = TclUtfToUCS4(start, &ch);
} else {
char utfBytes[8];
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index dc39657..68a7c66 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -999,7 +999,7 @@ EXTERN int Tcl_UniCharToUtf(int ch, char *buf);
/* 325 */
EXTERN const char * Tcl_UtfAtIndex(const char *src, int index);
/* 326 */
-EXTERN int Tcl_UtfCharComplete(const char *src, int length);
+EXTERN int TclUtfCharComplete(const char *src, int length);
/* 327 */
EXTERN int Tcl_UtfBackslash(const char *src, int *readPtr,
char *dst);
@@ -1008,9 +1008,9 @@ EXTERN const char * Tcl_UtfFindFirst(const char *src, int ch);
/* 329 */
EXTERN const char * Tcl_UtfFindLast(const char *src, int ch);
/* 330 */
-EXTERN const char * Tcl_UtfNext(const char *src);
+EXTERN const char * TclUtfNext(const char *src);
/* 331 */
-EXTERN const char * Tcl_UtfPrev(const char *src, const char *start);
+EXTERN const char * TclUtfPrev(const char *src, const char *start);
/* 332 */
EXTERN int Tcl_UtfToExternal(Tcl_Interp *interp,
Tcl_Encoding encoding, const char *src,
@@ -1931,6 +1931,12 @@ EXTERN Tcl_UniChar * TclGetUnicodeFromObj(Tcl_Obj *objPtr,
/* 653 */
EXTERN unsigned char * TclGetByteArrayFromObj(Tcl_Obj *objPtr,
size_t *lengthPtr);
+/* 654 */
+EXTERN int Tcl_UtfCharComplete(const char *src, int length);
+/* 655 */
+EXTERN const char * Tcl_UtfNext(const char *src);
+/* 656 */
+EXTERN const char * Tcl_UtfPrev(const char *src, const char *start);
typedef struct {
const struct TclPlatStubs *tclPlatStubs;
@@ -2292,12 +2298,12 @@ typedef struct TclStubs {
int (*tcl_UniCharToUpper) (int ch); /* 323 */
int (*tcl_UniCharToUtf) (int ch, char *buf); /* 324 */
const char * (*tcl_UtfAtIndex) (const char *src, int index); /* 325 */
- int (*tcl_UtfCharComplete) (const char *src, int length); /* 326 */
+ int (*tclUtfCharComplete) (const char *src, int length); /* 326 */
int (*tcl_UtfBackslash) (const char *src, int *readPtr, char *dst); /* 327 */
const char * (*tcl_UtfFindFirst) (const char *src, int ch); /* 328 */
const char * (*tcl_UtfFindLast) (const char *src, int ch); /* 329 */
- const char * (*tcl_UtfNext) (const char *src); /* 330 */
- const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 331 */
+ const char * (*tclUtfNext) (const char *src); /* 330 */
+ const char * (*tclUtfPrev) (const char *src, const char *start); /* 331 */
int (*tcl_UtfToExternal) (Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); /* 332 */
char * (*tcl_UtfToExternalDString) (Tcl_Encoding encoding, const char *src, int srcLen, Tcl_DString *dsPtr); /* 333 */
int (*tcl_UtfToLower) (char *src); /* 334 */
@@ -2620,6 +2626,9 @@ typedef struct TclStubs {
char * (*tclGetStringFromObj) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 651 */
Tcl_UniChar * (*tclGetUnicodeFromObj) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 652 */
unsigned char * (*tclGetByteArrayFromObj) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 653 */
+ int (*tcl_UtfCharComplete) (const char *src, int length); /* 654 */
+ const char * (*tcl_UtfNext) (const char *src); /* 655 */
+ const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 656 */
} TclStubs;
extern const TclStubs *tclStubsPtr;
@@ -3302,18 +3311,18 @@ extern const TclStubs *tclStubsPtr;
(tclStubsPtr->tcl_UniCharToUtf) /* 324 */
#define Tcl_UtfAtIndex \
(tclStubsPtr->tcl_UtfAtIndex) /* 325 */
-#define Tcl_UtfCharComplete \
- (tclStubsPtr->tcl_UtfCharComplete) /* 326 */
+#define TclUtfCharComplete \
+ (tclStubsPtr->tclUtfCharComplete) /* 326 */
#define Tcl_UtfBackslash \
(tclStubsPtr->tcl_UtfBackslash) /* 327 */
#define Tcl_UtfFindFirst \
(tclStubsPtr->tcl_UtfFindFirst) /* 328 */
#define Tcl_UtfFindLast \
(tclStubsPtr->tcl_UtfFindLast) /* 329 */
-#define Tcl_UtfNext \
- (tclStubsPtr->tcl_UtfNext) /* 330 */
-#define Tcl_UtfPrev \
- (tclStubsPtr->tcl_UtfPrev) /* 331 */
+#define TclUtfNext \
+ (tclStubsPtr->tclUtfNext) /* 330 */
+#define TclUtfPrev \
+ (tclStubsPtr->tclUtfPrev) /* 331 */
#define Tcl_UtfToExternal \
(tclStubsPtr->tcl_UtfToExternal) /* 332 */
#define Tcl_UtfToExternalDString \
@@ -3956,6 +3965,12 @@ extern const TclStubs *tclStubsPtr;
(tclStubsPtr->tclGetUnicodeFromObj) /* 652 */
#define TclGetByteArrayFromObj \
(tclStubsPtr->tclGetByteArrayFromObj) /* 653 */
+#define Tcl_UtfCharComplete \
+ (tclStubsPtr->tcl_UtfCharComplete) /* 654 */
+#define Tcl_UtfNext \
+ (tclStubsPtr->tcl_UtfNext) /* 655 */
+#define Tcl_UtfPrev \
+ (tclStubsPtr->tcl_UtfPrev) /* 656 */
#endif /* defined(USE_TCL_STUBS) */
@@ -4230,10 +4245,16 @@ extern const TclStubs *tclStubsPtr;
#define Tcl_Close(interp, chan) Tcl_CloseEx(interp, chan, 0)
#endif
-#if defined(USE_TCL_STUBS) && (TCL_UTF_MAX > 3)
+#undef TclUtfCharComplete
+#undef TclUtfNext
+#undef TclUtfPrev
+#if defined(USE_TCL_STUBS) && (TCL_UTF_MAX < 4) && !defined(TCL_NO_DEPRECATED)
# undef Tcl_UtfCharComplete
-# define Tcl_UtfCharComplete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \
- ? ((length) >= 4) : tclStubsPtr->tcl_UtfCharComplete((src), (length)))
+# undef Tcl_UtfNext
+# undef Tcl_UtfPrev
+# define Tcl_UtfCharComplete (tclStubsPtr->tclUtfCharComplete)
+# define Tcl_UtfNext (tclStubsPtr->tclUtfNext)
+# define Tcl_UtfPrev (tclStubsPtr->tclUtfPrev)
#endif
#define Tcl_CreateSlave Tcl_CreateChild
#define Tcl_GetSlave Tcl_GetChild
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index c4ef159..ea2d6fa 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2323,7 +2323,7 @@ UtfToUtfProc(
dstEnd = dst + dstLen - TCL_UTF_MAX;
for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
- if ((src > srcClose) && (!TclUCS4Complete(src, srcEnd - src))) {
+ if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
/*
* If there is more string to follow, this will ensure that the
* last UTF-8 character in the source buffer hasn't been cut off.
@@ -2353,7 +2353,7 @@ UtfToUtfProc(
*dst++ = 0;
*chPtr = 0; /* reset surrogate handling */
src += 2;
- } else if (!TclUCS4Complete(src, srcEnd - src)) {
+ } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
/*
* Always check before using TclUtfToUCS4. Not doing can so
* cause it run beyond the end of the buffer! If we happen such an
diff --git a/generic/tclIndexObj.c b/generic/tclIndexObj.c
index f08278b..30d6cbd 100644
--- a/generic/tclIndexObj.c
+++ b/generic/tclIndexObj.c
@@ -785,7 +785,7 @@ PrefixLongestObjCmd(
* Adjust in case we stopped in the middle of a UTF char.
*/
- resultLength = TclUtfPrev(&resultString[i+1],
+ resultLength = Tcl_UtfPrev(&resultString[i+1],
resultString) - resultString;
break;
}
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 66bbc17..96315fb 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3253,16 +3253,10 @@ MODULE_SCOPE int TclUtfCount(int ch);
# define TclUtfToUCS4 Tcl_UtfToUniChar
# define TclUniCharToUCS4(src, ptr) (*ptr = *(src),1)
# define TclUCS4Prev(src, ptr) (((src) > (ptr)) ? ((src) - 1) : (src))
-# define TclUCS4Complete Tcl_UtfCharComplete
-# define TclChar16Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \
- ? ((length) >= 3) : Tcl_UtfCharComplete((src), (length)))
#else
MODULE_SCOPE int TclUtfToUCS4(const char *, int *);
MODULE_SCOPE int TclUniCharToUCS4(const Tcl_UniChar *, int *);
MODULE_SCOPE const Tcl_UniChar *TclUCS4Prev(const Tcl_UniChar *, const Tcl_UniChar *);
-# define TclUCS4Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \
- ? ((length) >= 4) : Tcl_UtfCharComplete((src), (length)))
-# define TclChar16Complete Tcl_UtfCharComplete
#endif
MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(ClientData clientData);
MODULE_SCOPE Tcl_Obj * TclpFilesystemPathType(Tcl_Obj *pathPtr);
@@ -4696,11 +4690,6 @@ MODULE_SCOPE const TclFileAttrProcs tclpFileAttrProcs[];
(numChars) = _count; \
} while (0);
-#define TclUtfPrev(src, start) \
- (((src) < (start) + 2) ? (start) : \
- ((unsigned char) *((src) - 1)) < 0x80 ? (src) - 1 : \
- Tcl_UtfPrev(src, start))
-
/*
*----------------------------------------------------------------
* Macro that encapsulates the logic that determines when it is safe to
diff --git a/generic/tclParse.c b/generic/tclParse.c
index df6c9bf..4de0356 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -935,7 +935,7 @@ TclParseBackslash(
* #217987] test subst-3.2
*/
- if (TclUCS4Complete(p, numBytes - 1)) {
+ if (Tcl_UtfCharComplete(p, numBytes - 1)) {
count = TclUtfToUCS4(p, &unichar) + 1; /* +1 for '\' */
} else {
char utfBytes[8];
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 50b473a..bb718ce 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -1199,10 +1199,10 @@ Tcl_AppendLimitedToObj(
}
eLen = strlen(ellipsis);
while (eLen > limit) {
- eLen = TclUtfPrev(ellipsis+eLen, ellipsis) - ellipsis;
+ eLen = Tcl_UtfPrev(ellipsis+eLen, ellipsis) - ellipsis;
}
- toCopy = TclUtfPrev(bytes+limit+1-eLen, bytes) - bytes;
+ toCopy = Tcl_UtfPrev(bytes+limit+1-eLen, bytes) - bytes;
}
/*
@@ -2644,7 +2644,7 @@ AppendPrintfToObjVA(
* multi-byte characters.
*/
- q = TclUtfPrev(end, bytes);
+ q = Tcl_UtfPrev(end, bytes);
if (!Tcl_UtfCharComplete(q, (int)(end - q))) {
end = q;
}
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index dd7dc26..7321805 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -90,6 +90,32 @@ static void uniCodePanic(void) {
# define Tcl_UniCharNcmp (int(*)(const Tcl_UniChar *, const Tcl_UniChar *, unsigned long))(void *)uniCodePanic
#endif
+#define TclUtfCharComplete UtfCharComplete
+#define TclUtfNext UtfNext
+#define TclUtfPrev UtfPrev
+
+static int TclUtfCharComplete(const char *src, int length) {
+ if ((unsigned)((unsigned char)*(src) - 0xF0) < 5) {
+ return length < 3;
+ }
+ return Tcl_UtfCharComplete(src, length);
+}
+
+static const char *TclUtfNext(const char *src) {
+ if ((unsigned)((unsigned char)*(src) - 0xF0) < 5) {
+ return src + 1;
+ }
+ return Tcl_UtfNext(src);
+}
+
+static const char *TclUtfPrev(const char *src, const char *start) {
+ if ((src >= start + 3) && ((src[-1] & 0xC0) == 0x80)
+ && ((src[-2] & 0xC0) == 0x80) && ((src[-3] & 0xC0) == 0x80)) {
+ return src - 3;
+ }
+ return Tcl_UtfPrev(src, start);
+}
+
#define TclBN_mp_add mp_add
#define TclBN_mp_and mp_and
#define TclBN_mp_clamp mp_clamp
@@ -1575,12 +1601,12 @@ const TclStubs tclStubs = {
Tcl_UniCharToUpper, /* 323 */
Tcl_UniCharToUtf, /* 324 */
Tcl_UtfAtIndex, /* 325 */
- Tcl_UtfCharComplete, /* 326 */
+ TclUtfCharComplete, /* 326 */
Tcl_UtfBackslash, /* 327 */
Tcl_UtfFindFirst, /* 328 */
Tcl_UtfFindLast, /* 329 */
- Tcl_UtfNext, /* 330 */
- Tcl_UtfPrev, /* 331 */
+ TclUtfNext, /* 330 */
+ TclUtfPrev, /* 331 */
Tcl_UtfToExternal, /* 332 */
Tcl_UtfToExternalDString, /* 333 */
Tcl_UtfToLower, /* 334 */
@@ -1903,6 +1929,9 @@ const TclStubs tclStubs = {
TclGetStringFromObj, /* 651 */
TclGetUnicodeFromObj, /* 652 */
TclGetByteArrayFromObj, /* 653 */
+ Tcl_UtfCharComplete, /* 654 */
+ Tcl_UtfNext, /* 655 */
+ Tcl_UtfPrev, /* 656 */
};
/* !END!: Do not edit above this line. */
diff --git a/generic/tclTest.c b/generic/tclTest.c
index 45b5ca3..1c35ab2 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -19,6 +19,9 @@
#ifndef USE_TCL_STUBS
# define USE_TCL_STUBS
#endif
+#ifndef TCL_NO_DEPRECATED
+# define TCL_NO_DEPRECATED
+#endif
#include "tclInt.h"
#ifdef TCL_WITH_EXTERNAL_TOMMATH
# include "tommath.h"
@@ -6962,7 +6965,7 @@ TestUtfPrevCmd(
} else {
offset = numBytes;
}
- result = TclUtfPrev(bytes + offset, bytes);
+ result = Tcl_UtfPrev(bytes + offset, bytes);
Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes));
return TCL_OK;
}
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 5db7343..017688b 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -64,20 +64,12 @@ static const unsigned char totalBytes[256] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-/* Tcl_UtfCharComplete() might point to 2nd byte of valid 4-byte sequence */
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
-/* End of "continuation byte section" */
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
-#if TCL_UTF_MAX > 3
- 4,4,4,4,4,
-#else
- 1,1,1,1,1,
-#endif
- 1,1,1,1,1,1,1,1,1,1,1
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1
};
-
+
static const unsigned char complete[256] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@@ -88,15 +80,9 @@ static const unsigned char complete[256] = {
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
/* End of "continuation byte section" */
2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
-#if TCL_UTF_MAX > 3
- 4,4,4,4,4,
-#else
- 3,3,3,3,3,
-#endif
- 1,1,1,1,1,1,1,1,1,1,1
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1
};
-
+
/*
* Functions used only in this module.
*/
@@ -694,7 +680,7 @@ Tcl_UtfToUniCharDString(
p += TclUtfToUCS4(p, &ch);
*w++ = ch;
}
- while ((p < endPtr) && TclUCS4Complete(p, endPtr-p)) {
+ while ((p < endPtr) && Tcl_UtfCharComplete(p, endPtr-p)) {
p += TclUtfToUCS4(p, &ch);
*w++ = ch;
}
@@ -752,7 +738,7 @@ Tcl_UtfToChar16DString(
*w++ = ch;
}
while (p < endPtr) {
- if (TclChar16Complete(p, endPtr-p)) {
+ if (Tcl_UtfCharComplete(p, endPtr-p)) {
p += Tcl_UtfToChar16(p, &ch);
*w++ = ch;
} else {
@@ -833,7 +819,7 @@ Tcl_NumUtfChars(
/* Pointer to the end of string. Never read endPtr[0] */
const char *endPtr = src + length;
/* Pointer to last byte where optimization still can be used */
- const char *optPtr = endPtr - TCL_UTF_MAX;
+ const char *optPtr = endPtr - 4;
/*
* Optimize away the call in this loop. Justified because...
@@ -1064,7 +1050,7 @@ Tcl_UtfPrev(
* it (the fallback) is correct.
*/
- || (trailBytesSeen >= complete[byte])) {
+ || (trailBytesSeen >= totalBytes[byte])) {
/*
* That is, (1 + trailBytesSeen > needed).
* We've examined more bytes than needed to complete
@@ -1105,19 +1091,14 @@ Tcl_UtfPrev(
/* Continue the search backwards... */
look--;
- } while (trailBytesSeen < TCL_UTF_MAX);
+ } while (trailBytesSeen < 4);
/*
- * We've seen TCL_UTF_MAX trail bytes, so we know there will not be a
+ * We've seen 4 trail bytes, so we know there will not be a
* properly formed byte sequence to find, and we can stop looking,
- * accepting the fallback (for TCL_UTF_MAX > 3) or just go back as
- * far as we can.
+ * accepting the fallback.
*/
-#if TCL_UTF_MAX > 3
return fallback;
-#else
- return src - TCL_UTF_MAX;
-#endif
}
/*
@@ -1744,7 +1725,7 @@ Tcl_UniCharToLower(
/* Clear away extension bits, if any */
return ch & 0x1FFFFF;
}
-
+
/*
*----------------------------------------------------------------------
*
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 1904e2f..d5ec040 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -1707,11 +1707,7 @@ TclTrimRight(
const char *q = trim;
int pInc = 0, bytesLeft = numTrim;
- pp = TclUtfPrev(p, bytes);
-#if TCL_UTF_MAX < 4 /* Needed because TclUtfPrev() cannot always jump back */
- /* sufficiently. See [d43f96c1a8] */
- pp = TclUtfPrev(pp, bytes);
-#endif
+ pp = Tcl_UtfPrev(p, bytes);
do {
pp += pInc;
pInc = TclUtfToUCS4(pp, &ch1);
diff --git a/tests/utf.test b/tests/utf.test
index b6c23ba..d4db773 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -216,7 +216,10 @@ test utf-6.9 {Tcl_UtfNext} {testutfnext testbytestring} {
test utf-6.10 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xA0]G
} 1
-test utf-6.11 {Tcl_UtfNext} {testutfnext testbytestring} {
+test utf-6.11.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
+ testutfnext [testbytestring \xA0\xA0\x00]
+} 1
+test utf-6.11.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xA0\xA0\x00]
} 2
test utf-6.12 {Tcl_UtfNext} {testutfnext testbytestring} {
@@ -273,19 +276,19 @@ test utf-6.28 {Tcl_UtfNext} {testutfnext testbytestring} {
test utf-6.29 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xE8\xF8]
} 1
-test utf-6.30.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.30.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2]
} 1
-test utf-6.30.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.30.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\x00]
} 1
test utf-6.31 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2]G
} 1
-test utf-6.32.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.32.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0]
} 1
-test utf-6.32.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.32.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\x00]
} 1
test utf-6.33 {Tcl_UtfNext} {testutfnext testbytestring} {
@@ -396,10 +399,10 @@ test utf-6.67 {Tcl_UtfNext} {testutfnext testbytestring} {
test utf-6.68 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\xA0]G
} 1
-test utf-6.69.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.69.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0]
} 1
-test utf-6.69.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.69.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0]
} 4
test utf-6.70 {Tcl_UtfNext} {testutfnext testbytestring} {
@@ -414,40 +417,40 @@ test utf-6.72 {Tcl_UtfNext} {testutfnext testbytestring} {
test utf-6.73 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\xA0\xF8]
} 1
-test utf-6.74.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.74.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0]G
} 1
-test utf-6.74.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.74.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0]G
} 4
-test utf-6.75.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.75.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0]
} 1
-test utf-6.75.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.75.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0]
} 4
-test utf-6.76.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.76.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xD0]
} 1
-test utf-6.76.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.76.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xD0]
} 4
-test utf-6.77.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.77.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xE8]
} 1
-test utf-6.77.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.77.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xE8]
} 4
-test utf-6.78.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.78.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xF2]
} 1
-test utf-6.78.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.78.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xF2]
} 4
-test utf-6.79.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.79.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0G\xF8]
} 1
-test utf-6.79.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.79.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0G\xF8]
} 4
test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext {
@@ -471,37 +474,55 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} {
test utf-6.86 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} {
testutfnext [testbytestring \xF0\x80\x80\x80]
} 1
-test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF0\x90\x80\x80]
} 1
-test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring ucs4} {
+test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF0\x90\x80\x80]
} 4
-test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring} {
+test utf-6.88.0 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring ucs2} {
+ testutfnext [testbytestring \xA0\xA0\x00]
+} 1
+test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xA0\xA0\x00]
} 2
-test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring} {
+test utf-6.89.0 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring ucs2} {
+ testutfnext [testbytestring \x80\x80\x00]
+} 2
+test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \x80\x80\x00]
} 2
-test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF4\x8F\xBF\xBF]
} 1
-test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring ucs4} {
+test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF4\x8F\xBF\xBF]
} 4
test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring} {
testutfnext [testbytestring \xF4\x90\x80\x80]
} 1
-test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring} {
+test utf-6.92.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring ucs2} {
+ testutfnext [testbytestring \xA0\xA0\xA0]
+} 1
+test utf-6.92.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xA0\xA0\xA0]
} 3
-test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring} {
+test utf-6.93.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring ucs2} {
+ testutfnext [testbytestring \x80\x80\x80]
+} 1
+test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \x80\x80\x80]
} 3
-test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring} {
+test utf-6.94.0 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring ucs2} {
+ testutfnext [testbytestring \xA0\xA0\xA0\xA0]
+} 1
+test utf-6.94.1 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xA0\xA0\xA0\xA0]
} 3
-test utf-6.95 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring} {
+test utf-6.95.0 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring ucs2} {
+ testutfnext [testbytestring \x80\x80\x80\x80]
+} 1
+test utf-6.95.1 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \x80\x80\x80\x80]
} 3
@@ -661,30 +682,33 @@ test utf-7.17.1 {Tcl_UtfPrev} {testutfprev testbytestring} {
test utf-7.17.2 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xD0\xA0\xA0\xF8] 4
} 3
-test utf-7.18.0 {Tcl_UtfPrev} {testutfprev testbytestring utf16} {
+test utf-7.18.0 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xA0\xA0\xA0]
-} 1
-test utf-7.18.1 {Tcl_UtfPrev} {testutfprev testbytestring utf16} {
+} 3
+test utf-7.18.1 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xA0\xA0\xA0\xA0] 4
-} 1
-test utf-7.18.2 {Tcl_UtfPrev} {testutfprev testbytestring utf16} {
+} 3
+test utf-7.18.2 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xA0\xA0\xA0\xF8] 4
-} 1
-test utf-7.19 {Tcl_UtfPrev} {testutfprev testbytestring utf16} {
+} 3
+test utf-7.19 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF8\xA0\xA0\xA0]
-} 2
-test utf-7.20 {Tcl_UtfPrev} {testutfprev testbytestring utf16} {
+} 4
+test utf-7.20.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} {
testutfprev A[testbytestring \xF2\xA0\xA0\xA0]
-} 2
-test utf-7.21 {Tcl_UtfPrev} {testutfprev testbytestring utf16} {
+} 4
+test utf-7.20.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} {
+ testutfprev A[testbytestring \xF2\xA0\xA0\xA0]
+} 1
+test utf-7.21 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A\u8820[testbytestring \xA0]
-} 2
-test utf-7.22 {Tcl_UtfPrev} {testutfprev testbytestring utf16} {
+} 4
+test utf-7.22 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xD0\xA0\xA0\xA0]
-} 2
-test utf-7.23 {Tcl_UtfPrev} {testutfprev testbytestring utf16} {
+} 4
+test utf-7.23 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xA0\xA0\xA0\xA0]
-} 2
+} 4
test utf-7.24 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
testutfprev A[testbytestring \xC0\x81]
} 2
@@ -706,9 +730,9 @@ test utf-7.28 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
test utf-7.28.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
testutfprev A[testbytestring \xE0\x80\x80] 2
} 1
-test utf-7.29 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring utf16} {
+test utf-7.29 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF0\x80\x80\x80]
-} 2
+} 4
test utf-7.30 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF0\x80\x80\x80] 4
} 3
@@ -736,9 +760,12 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
test utf-7.38 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
testutfprev A[testbytestring \xE0\xA0\x80] 2
} 1
-test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring utf16} {
+test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} {
testutfprev A[testbytestring \xF0\x90\x80\x80]
-} 2
+} 4
+test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring fullutf} {
+ testutfprev A[testbytestring \xF0\x90\x80\x80]
+} 1
test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} {
testutfprev A[testbytestring \xF0\x90\x80\x80] 4
} 3
@@ -763,9 +790,9 @@ test utf-7.44 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestrin
test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring} {
testutfprev [testbytestring \xA0\xA0\xA0]
} 2
-test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring utf16} {
+test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring} {
testutfprev [testbytestring \xA0\xA0\xA0\xA0]
-} 1
+} 3
test utf-7.47 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev testbytestring} {
testutfprev [testbytestring \xE8\xA0]
} 0
@@ -775,27 +802,30 @@ test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} tes
test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev testbytestring} {
testutfprev [testbytestring \xE8\xA0\x00] 2
} 0
-test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring utf16} {
+test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} {
testutfprev A[testbytestring \xF4\x8F\xBF\xBF]
-} 2
-test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} {
+} 4
+test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} {
+ testutfprev A[testbytestring \xF4\x8F\xBF\xBF]
+} 1
+test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} {
testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 4
} 3
-test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} {
+test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} {
testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 4
} 1
-test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} {
+test utf-7.48.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} {
testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 3
} 2
-test utf-7.48.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} {
+test utf-7.48.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} {
testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 3
} 1
-test utf-7.48.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} {
+test utf-7.48.6 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 2
} 1
-test utf-7.49.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring utf16} {
+test utf-7.49.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF4\x90\x80\x80]
-} 2
+} 4
test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF4\x90\x80\x80] 4
} 3