summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2021-03-15 12:31:41 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2021-03-15 12:31:41 (GMT)
commitc646a67fd295620422863a3ab3ac74eff316aec3 (patch)
tree4bbdf6fc46ff8f43aae43f83aee84ba9441588b8
parent3345ff1956ae6d7e9411917d2619c0f88ccefd2a (diff)
parent6e657cfb83595bc0481d833b708554a44e2142fb (diff)
downloadtcl-c646a67fd295620422863a3ab3ac74eff316aec3.zip
tcl-c646a67fd295620422863a3ab3ac74eff316aec3.tar.gz
tcl-c646a67fd295620422863a3ab3ac74eff316aec3.tar.bz2
Merge 8.7 (this is the TIP #575 implementation for Tcl 9.0)
-rw-r--r--doc/Utf.315
-rw-r--r--generic/tcl.decls16
-rw-r--r--generic/tclCompExpr.c2
-rw-r--r--generic/tclDecls.h56
-rw-r--r--generic/tclEncoding.c4
-rw-r--r--generic/tclIndexObj.c2
-rw-r--r--generic/tclInt.h11
-rw-r--r--generic/tclParse.c2
-rw-r--r--generic/tclStringObj.c6
-rw-r--r--generic/tclStubInit.c17
-rw-r--r--generic/tclTest.c5
-rw-r--r--generic/tclUtf.c49
-rw-r--r--generic/tclUtil.c6
-rw-r--r--tests/utf.test109
14 files changed, 141 insertions, 159 deletions
diff --git a/doc/Utf.3 b/doc/Utf.3
index e7e4a92..e8dee13 100644
--- a/doc/Utf.3
+++ b/doc/Utf.3
@@ -231,10 +231,10 @@ characters.
.PP
\fBTcl_UtfCharComplete\fR returns 1 if the source UTF-8 string \fIsrc\fR
of \fIlength\fR bytes is long enough to be decoded by
-\fBTcl_UtfToUniChar\fR, or 0 otherwise. This function does not guarantee
-that the UTF-8 string is properly formed. This routine is used by
-procedures that are operating on a byte at a time and need to know if a
-full Unicode character has been seen.
+\fBTcl_UtfToUniChar\fR/\fBTcl_UtfNext\fR, or 0 otherwise. This function
+does not guarantee that the UTF-8 string is properly formed. This routine
+is used by procedures that are operating on a byte at a time and need to
+know if a full Unicode character has been seen.
.PP
\fBTcl_NumUtfChars\fR corresponds to \fBstrlen\fR for UTF-8 strings. It
returns the number of Tcl_UniChars that are represented by the UTF-8 string
@@ -255,7 +255,8 @@ Given \fIsrc\fR, a pointer to some location in a UTF-8 string,
\fBTcl_UtfNext\fR returns a pointer to the next UTF-8 character in the
string. The caller must not ask for the next character after the last
character in the string if the string is not terminated by a null
-character.
+character. \fBTcl_UtfCharComplete\fR can be used in that case to
+make sure enough bytes are available before calling \fBTcl_UtfNext\fR.
.PP
\fBTcl_UtfPrev\fR is used to step backward through but not beyond the
UTF-8 string that begins at \fIstart\fR. If the UTF-8 string is made
@@ -272,12 +273,12 @@ always a pointer to a location in the string. It always returns a pointer to
a byte that begins a character when scanning for characters beginning
from \fIstart\fR. When \fIsrc\fR is greater than \fIstart\fR, it
always returns a pointer less than \fIsrc\fR and greater than or
-equal to (\fIsrc\fR - \fBTCL_UTF_MAX\fR). The character that begins
+equal to (\fIsrc\fR - 4). The character that begins
at the returned pointer is the first one that either includes the
byte \fIsrc[-1]\fR, or might include it if the right trail bytes are
present at \fIsrc\fR and greater. \fBTcl_UtfPrev\fR never reads the
byte \fIsrc[0]\fR nor the byte \fIstart[-1]\fR nor the byte
-\fIsrc[-\fBTCL_UTF_MAX\fI-1]\fR.
+\fIsrc[-5]\fR.
.PP
\fBTcl_UniCharAtIndex\fR corresponds to a C string array dereference or the
Pascal Ord() function. It returns the Unicode character represented at the
diff --git a/generic/tcl.decls b/generic/tcl.decls
index 5b1c3d2..790d9fb 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -1209,7 +1209,7 @@ declare 325 {
const char *Tcl_UtfAtIndex(const char *src, size_t index)
}
declare 326 {
- int Tcl_UtfCharComplete(const char *src, size_t length)
+ int TclUtfCharComplete(const char *src, size_t length)
}
declare 327 {
size_t Tcl_UtfBackslash(const char *src, int *readPtr, char *dst)
@@ -1221,10 +1221,10 @@ declare 329 {
const char *Tcl_UtfFindLast(const char *src, int ch)
}
declare 330 {
- const char *Tcl_UtfNext(const char *src)
+ const char *TclUtfNext(const char *src)
}
declare 331 {
- const char *Tcl_UtfPrev(const char *src, const char *start)
+ const char *TclUtfPrev(const char *src, const char *start)
}
declare 332 {
int Tcl_UtfToExternal(Tcl_Interp *interp, Tcl_Encoding encoding,
@@ -2475,8 +2475,16 @@ declare 652 {
declare 653 {
unsigned char *Tcl_GetByteArrayFromObj(Tcl_Obj *objPtr, size_t *lengthPtr)
}
+
+# TIP #575
+declare 654 {
+ int Tcl_UtfCharComplete(const char *src, size_t length)
+}
+declare 655 {
+ const char *Tcl_UtfNext(const char *src)
+}
declare 656 {
- void TclUnusedStubEntry(void)
+ const char *Tcl_UtfPrev(const char *src, const char *start)
}
# ----- BASELINE -- FOR -- 8.7.0 ----- #
diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c
index 847a240..8de3e7d 100644
--- a/generic/tclCompExpr.c
+++ b/generic/tclCompExpr.c
@@ -2148,7 +2148,7 @@ ParseLexeme(
if (!TclIsBareword(*start) || *start == '_') {
size_t scanned;
- if (TclUCS4Complete(start, numBytes)) {
+ if (Tcl_UtfCharComplete(start, numBytes)) {
scanned = TclUtfToUCS4(start, &ch);
} else {
char utfBytes[8];
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index a1454a5..d29d344 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -859,7 +859,7 @@ EXTERN int Tcl_UniCharToUtf(int ch, char *buf);
/* 325 */
EXTERN const char * Tcl_UtfAtIndex(const char *src, size_t index);
/* 326 */
-EXTERN int Tcl_UtfCharComplete(const char *src, size_t length);
+EXTERN int TclUtfCharComplete(const char *src, size_t length);
/* 327 */
EXTERN size_t Tcl_UtfBackslash(const char *src, int *readPtr,
char *dst);
@@ -868,9 +868,9 @@ EXTERN const char * Tcl_UtfFindFirst(const char *src, int ch);
/* 329 */
EXTERN const char * Tcl_UtfFindLast(const char *src, int ch);
/* 330 */
-EXTERN const char * Tcl_UtfNext(const char *src);
+EXTERN const char * TclUtfNext(const char *src);
/* 331 */
-EXTERN const char * Tcl_UtfPrev(const char *src, const char *start);
+EXTERN const char * TclUtfPrev(const char *src, const char *start);
/* 332 */
EXTERN int Tcl_UtfToExternal(Tcl_Interp *interp,
Tcl_Encoding encoding, const char *src,
@@ -1740,10 +1740,12 @@ EXTERN Tcl_UniChar * Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr,
/* 653 */
EXTERN unsigned char * Tcl_GetByteArrayFromObj(Tcl_Obj *objPtr,
size_t *lengthPtr);
-/* Slot 654 is reserved */
-/* Slot 655 is reserved */
+/* 654 */
+EXTERN int Tcl_UtfCharComplete(const char *src, size_t length);
+/* 655 */
+EXTERN const char * Tcl_UtfNext(const char *src);
/* 656 */
-EXTERN void TclUnusedStubEntry(void);
+EXTERN const char * Tcl_UtfPrev(const char *src, const char *start);
typedef struct {
const struct TclPlatStubs *tclPlatStubs;
@@ -2081,12 +2083,12 @@ typedef struct TclStubs {
int (*tcl_UniCharToUpper) (int ch); /* 323 */
int (*tcl_UniCharToUtf) (int ch, char *buf); /* 324 */
const char * (*tcl_UtfAtIndex) (const char *src, size_t index); /* 325 */
- int (*tcl_UtfCharComplete) (const char *src, size_t length); /* 326 */
+ int (*tclUtfCharComplete) (const char *src, size_t length); /* 326 */
size_t (*tcl_UtfBackslash) (const char *src, int *readPtr, char *dst); /* 327 */
const char * (*tcl_UtfFindFirst) (const char *src, int ch); /* 328 */
const char * (*tcl_UtfFindLast) (const char *src, int ch); /* 329 */
- const char * (*tcl_UtfNext) (const char *src); /* 330 */
- const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 331 */
+ const char * (*tclUtfNext) (const char *src); /* 330 */
+ const char * (*tclUtfPrev) (const char *src, const char *start); /* 331 */
int (*tcl_UtfToExternal) (Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, size_t srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, size_t dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); /* 332 */
char * (*tcl_UtfToExternalDString) (Tcl_Encoding encoding, const char *src, size_t srcLen, Tcl_DString *dsPtr); /* 333 */
int (*tcl_UtfToLower) (char *src); /* 334 */
@@ -2409,9 +2411,9 @@ typedef struct TclStubs {
char * (*tcl_GetStringFromObj) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 651 */
Tcl_UniChar * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 652 */
unsigned char * (*tcl_GetByteArrayFromObj) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 653 */
- void (*reserved654)(void);
- void (*reserved655)(void);
- void (*tclUnusedStubEntry) (void); /* 656 */
+ int (*tcl_UtfCharComplete) (const char *src, size_t length); /* 654 */
+ const char * (*tcl_UtfNext) (const char *src); /* 655 */
+ const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 656 */
} TclStubs;
extern const TclStubs *tclStubsPtr;
@@ -3030,18 +3032,18 @@ extern const TclStubs *tclStubsPtr;
(tclStubsPtr->tcl_UniCharToUtf) /* 324 */
#define Tcl_UtfAtIndex \
(tclStubsPtr->tcl_UtfAtIndex) /* 325 */
-#define Tcl_UtfCharComplete \
- (tclStubsPtr->tcl_UtfCharComplete) /* 326 */
+#define TclUtfCharComplete \
+ (tclStubsPtr->tclUtfCharComplete) /* 326 */
#define Tcl_UtfBackslash \
(tclStubsPtr->tcl_UtfBackslash) /* 327 */
#define Tcl_UtfFindFirst \
(tclStubsPtr->tcl_UtfFindFirst) /* 328 */
#define Tcl_UtfFindLast \
(tclStubsPtr->tcl_UtfFindLast) /* 329 */
-#define Tcl_UtfNext \
- (tclStubsPtr->tcl_UtfNext) /* 330 */
-#define Tcl_UtfPrev \
- (tclStubsPtr->tcl_UtfPrev) /* 331 */
+#define TclUtfNext \
+ (tclStubsPtr->tclUtfNext) /* 330 */
+#define TclUtfPrev \
+ (tclStubsPtr->tclUtfPrev) /* 331 */
#define Tcl_UtfToExternal \
(tclStubsPtr->tcl_UtfToExternal) /* 332 */
#define Tcl_UtfToExternalDString \
@@ -3668,10 +3670,12 @@ extern const TclStubs *tclStubsPtr;
(tclStubsPtr->tcl_GetUnicodeFromObj) /* 652 */
#define Tcl_GetByteArrayFromObj \
(tclStubsPtr->tcl_GetByteArrayFromObj) /* 653 */
-/* Slot 654 is reserved */
-/* Slot 655 is reserved */
-#define TclUnusedStubEntry \
- (tclStubsPtr->tclUnusedStubEntry) /* 656 */
+#define Tcl_UtfCharComplete \
+ (tclStubsPtr->tcl_UtfCharComplete) /* 654 */
+#define Tcl_UtfNext \
+ (tclStubsPtr->tcl_UtfNext) /* 655 */
+#define Tcl_UtfPrev \
+ (tclStubsPtr->tcl_UtfPrev) /* 656 */
#endif /* defined(USE_TCL_STUBS) */
@@ -3922,11 +3926,9 @@ extern const TclStubs *tclStubsPtr;
#define Tcl_Close(interp, chan) Tcl_CloseEx(interp, chan, 0)
-#if defined(USE_TCL_STUBS) && (TCL_UTF_MAX <= 3)
-# undef Tcl_UtfCharComplete
-# define Tcl_UtfCharComplete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \
- ? ((length) >= TCL_UTF_MAX) : tclStubsPtr->tcl_UtfCharComplete((src), (length)))
-#endif
+#undef TclUtfCharComplete
+#undef TclUtfNext
+#undef TclUtfPrev
#ifndef TCL_NO_DEPRECATED
# define Tcl_CreateSlave Tcl_CreateChild
# define Tcl_GetSlave Tcl_GetChild
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 1a19652..2601614 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2261,7 +2261,7 @@ UtfToUtfProc(
dstEnd = dst + dstLen - TCL_UTF_MAX;
for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
- if ((src > srcClose) && (!TclUCS4Complete(src, srcEnd - src))) {
+ if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
/*
* If there is more string to follow, this will ensure that the
* last UTF-8 character in the source buffer hasn't been cut off.
@@ -2289,7 +2289,7 @@ UtfToUtfProc(
*dst++ = 0;
src += 2;
- } else if (!TclUCS4Complete(src, srcEnd - src)) {
+ } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
/*
* Always check before using TclUtfToUCS4. Not doing can so
* cause it run beyond the end of the buffer! If we happen such an
diff --git a/generic/tclIndexObj.c b/generic/tclIndexObj.c
index 02d0e31..3a9e07f 100644
--- a/generic/tclIndexObj.c
+++ b/generic/tclIndexObj.c
@@ -718,7 +718,7 @@ PrefixLongestObjCmd(
* Adjust in case we stopped in the middle of a UTF char.
*/
- resultLength = TclUtfPrev(&resultString[i+1],
+ resultLength = Tcl_UtfPrev(&resultString[i+1],
resultString) - resultString;
break;
}
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 0d545bc..30e3ab7 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3184,16 +3184,10 @@ MODULE_SCOPE size_t TclUtfCount(int ch);
# define TclUtfToUCS4 Tcl_UtfToUniChar
# define TclUniCharToUCS4(src, ptr) (*ptr = *(src),1)
# define TclUCS4Prev(src, ptr) (((src) > (ptr)) ? ((src) - 1) : (src))
-# define TclUCS4Complete Tcl_UtfCharComplete
-# define TclChar16Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \
- ? ((length) >= 3) : Tcl_UtfCharComplete((src), (length)))
#else
MODULE_SCOPE int TclUtfToUCS4(const char *, int *);
MODULE_SCOPE int TclUniCharToUCS4(const Tcl_UniChar *, int *);
MODULE_SCOPE const Tcl_UniChar *TclUCS4Prev(const Tcl_UniChar *, const Tcl_UniChar *);
-# define TclUCS4Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \
- ? ((length) >= 4) : Tcl_UtfCharComplete((src), (length)))
-# define TclChar16Complete Tcl_UtfCharComplete
#endif
MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(void *clientData);
MODULE_SCOPE Tcl_Obj * TclpFilesystemPathType(Tcl_Obj *pathPtr);
@@ -4629,11 +4623,6 @@ MODULE_SCOPE const TclFileAttrProcs tclpFileAttrProcs[];
(numChars) = _count; \
} while (0);
-#define TclUtfPrev(src, start) \
- (((src) < (start) + 2) ? (start) : \
- ((unsigned char) *((src) - 1)) < 0x80 ? (src) - 1 : \
- Tcl_UtfPrev(src, start))
-
/*
*----------------------------------------------------------------
* Macro that encapsulates the logic that determines when it is safe to
diff --git a/generic/tclParse.c b/generic/tclParse.c
index f894906..614401f 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -936,7 +936,7 @@ TclParseBackslash(
* #217987] test subst-3.2
*/
- if (TclUCS4Complete(p, numBytes - 1)) {
+ if (Tcl_UtfCharComplete(p, numBytes - 1)) {
count = TclUtfToUCS4(p, &unichar) + 1; /* +1 for '\' */
} else {
char utfBytes[8];
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 78e4c1d..78eafa7 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -1127,10 +1127,10 @@ Tcl_AppendLimitedToObj(
}
eLen = strlen(ellipsis);
while (eLen > limit) {
- eLen = TclUtfPrev(ellipsis+eLen, ellipsis) - ellipsis;
+ eLen = Tcl_UtfPrev(ellipsis+eLen, ellipsis) - ellipsis;
}
- toCopy = TclUtfPrev(bytes+limit+1-eLen, bytes) - bytes;
+ toCopy = Tcl_UtfPrev(bytes+limit+1-eLen, bytes) - bytes;
}
/*
@@ -2537,7 +2537,7 @@ AppendPrintfToObjVA(
* multi-byte characters.
*/
- q = TclUtfPrev(end, bytes);
+ q = Tcl_UtfPrev(end, bytes);
if (!Tcl_UtfCharComplete(q, (end - q))) {
end = q;
}
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index e1ba73e..c9169bb 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -78,6 +78,11 @@ static void uniCodePanic() {
# define Tcl_SetUnicodeObj (void(*)(Tcl_Obj *, const Tcl_UniChar *, size_t))(void *)uniCodePanic
#endif
+#define TclUtfCharComplete Tcl_UtfCharComplete
+#define TclUtfNext Tcl_UtfNext
+#define TclUtfPrev Tcl_UtfPrev
+
+
#define TclBN_mp_add mp_add
#define TclBN_mp_add_d mp_add_d
#define TclBN_mp_and mp_and
@@ -1086,12 +1091,12 @@ const TclStubs tclStubs = {
Tcl_UniCharToUpper, /* 323 */
Tcl_UniCharToUtf, /* 324 */
Tcl_UtfAtIndex, /* 325 */
- Tcl_UtfCharComplete, /* 326 */
+ TclUtfCharComplete, /* 326 */
Tcl_UtfBackslash, /* 327 */
Tcl_UtfFindFirst, /* 328 */
Tcl_UtfFindLast, /* 329 */
- Tcl_UtfNext, /* 330 */
- Tcl_UtfPrev, /* 331 */
+ TclUtfNext, /* 330 */
+ TclUtfPrev, /* 331 */
Tcl_UtfToExternal, /* 332 */
Tcl_UtfToExternalDString, /* 333 */
Tcl_UtfToLower, /* 334 */
@@ -1414,9 +1419,9 @@ const TclStubs tclStubs = {
Tcl_GetStringFromObj, /* 651 */
Tcl_GetUnicodeFromObj, /* 652 */
Tcl_GetByteArrayFromObj, /* 653 */
- 0, /* 654 */
- 0, /* 655 */
- TclUnusedStubEntry, /* 656 */
+ Tcl_UtfCharComplete, /* 654 */
+ Tcl_UtfNext, /* 655 */
+ Tcl_UtfPrev, /* 656 */
};
/* !END!: Do not edit above this line. */
diff --git a/generic/tclTest.c b/generic/tclTest.c
index df61dbb..5c719c1 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -19,6 +19,9 @@
#ifndef USE_TCL_STUBS
# define USE_TCL_STUBS
#endif
+#ifndef TCL_NO_DEPRECATED
+# define TCL_NO_DEPRECATED
+#endif
#include "tclInt.h"
#ifdef TCL_WITH_EXTERNAL_TOMMATH
# include "tommath.h"
@@ -6963,7 +6966,7 @@ TestUtfPrevCmd(
} else {
offset = numBytes;
}
- result = TclUtfPrev(bytes + offset, bytes);
+ result = Tcl_UtfPrev(bytes + offset, bytes);
Tcl_SetObjResult(interp, Tcl_NewWideIntObj(result - bytes));
return TCL_OK;
}
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 447a5c9..9e49e87 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -64,20 +64,12 @@ static const unsigned char totalBytes[256] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-/* Tcl_UtfCharComplete() might point to 2nd byte of valid 4-byte sequence */
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
-/* End of "continuation byte section" */
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
-#if TCL_UTF_MAX > 3
- 4,4,4,4,4,
-#else
- 1,1,1,1,1,
-#endif
- 1,1,1,1,1,1,1,1,1,1,1
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1
};
-
+
static const unsigned char complete[256] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@@ -88,17 +80,9 @@ static const unsigned char complete[256] = {
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
/* End of "continuation byte section" */
2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
-#if TCL_UTF_MAX > 3
- 4,4,4,4,4,
-#else
- /* Tcl_UtfToUniChar() accesses src[1] and src[2] to check whether
- * the UTF-8 sequence is valid, so we cannot use 1 here. */
- 3,3,3,3,3,
-#endif
- 1,1,1,1,1,1,1,1,1,1,1
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1
};
-
+
/*
* Functions used only in this module.
*/
@@ -697,7 +681,7 @@ Tcl_UtfToUniCharDString(
p += TclUtfToUCS4(p, &ch);
*w++ = ch;
}
- while ((p < endPtr) && TclUCS4Complete(p, endPtr-p)) {
+ while ((p < endPtr) && Tcl_UtfCharComplete(p, endPtr-p)) {
p += TclUtfToUCS4(p, &ch);
*w++ = ch;
}
@@ -755,7 +739,7 @@ Tcl_UtfToChar16DString(
*w++ = ch;
}
while (p < endPtr) {
- if (TclChar16Complete(p, endPtr-p)) {
+ if (Tcl_UtfCharComplete(p, endPtr-p)) {
p += Tcl_UtfToChar16(p, &ch);
*w++ = ch;
} else {
@@ -836,7 +820,7 @@ Tcl_NumUtfChars(
/* Pointer to the end of string. Never read endPtr[0] */
const char *endPtr = src + length;
/* Pointer to last byte where optimization still can be used */
- const char *optPtr = endPtr - TCL_UTF_MAX;
+ const char *optPtr = endPtr - 4;
/*
* Optimize away the call in this loop. Justified because...
@@ -1071,7 +1055,7 @@ Tcl_UtfPrev(
* it (the fallback) is correct.
*/
- || (trailBytesSeen >= complete[byte])) {
+ || (trailBytesSeen >= totalBytes[byte])) {
/*
* That is, (1 + trailBytesSeen > needed).
* We've examined more bytes than needed to complete
@@ -1112,19 +1096,14 @@ Tcl_UtfPrev(
/* Continue the search backwards... */
look--;
- } while (trailBytesSeen < TCL_UTF_MAX);
+ } while (trailBytesSeen < 4);
/*
- * We've seen TCL_UTF_MAX trail bytes, so we know there will not be a
+ * We've seen 4 trail bytes, so we know there will not be a
* properly formed byte sequence to find, and we can stop looking,
- * accepting the fallback (for TCL_UTF_MAX > 3) or just go back as
- * far as we can.
+ * accepting the fallback.
*/
-#if TCL_UTF_MAX > 3
return fallback;
-#else
- return src - TCL_UTF_MAX;
-#endif
}
/*
@@ -1757,7 +1736,7 @@ Tcl_UniCharToLower(
/* Clear away extension bits, if any */
return ch & 0x1FFFFF;
}
-
+
/*
*----------------------------------------------------------------------
*
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 55b12ac..c9c3b8f 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -1655,11 +1655,7 @@ TclTrimRight(
const char *q = trim;
size_t pInc = 0, bytesLeft = numTrim;
- pp = TclUtfPrev(p, bytes);
-#if TCL_UTF_MAX < 4 /* Needed because TclUtfPrev() cannot always jump back */
- /* sufficiently. See [d43f96c1a8] */
- pp = TclUtfPrev(pp, bytes);
-#endif
+ pp = Tcl_UtfPrev(p, bytes);
do {
pp += pInc;
pInc = TclUtfToUCS4(pp, &ch1);
diff --git a/tests/utf.test b/tests/utf.test
index b6c4597..05e30c7 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -21,7 +21,6 @@ testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}]
testConstraint utf16 [expr {[string length [format %c 0x10000]] == 2}]
testConstraint ucs4 [expr {[testConstraint fullutf]
&& [string length [format %c 0x10000]] == 1}]
-testConstraint ucs2_utf16 [expr {![testConstraint ucs4]}]
testConstraint Uesc [expr {"\U0041" eq "A"}]
testConstraint pre388 [expr {"\x741" eq "A"}]
@@ -285,19 +284,19 @@ test utf-6.28 {Tcl_UtfNext} {testutfnext testbytestring} {
test utf-6.29 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xE8\xF8]
} 1
-test utf-6.30.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.30.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2]
} 1
-test utf-6.30.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.30.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\x00]
} 1
test utf-6.31 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2]G
} 1
-test utf-6.32.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.32.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0]
} 1
-test utf-6.32.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.32.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\x00]
} 1
test utf-6.33 {Tcl_UtfNext} {testutfnext testbytestring} {
@@ -408,10 +407,10 @@ test utf-6.67 {Tcl_UtfNext} {testutfnext testbytestring} {
test utf-6.68 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\xA0]G
} 1
-test utf-6.69.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.69.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0]
} 1
-test utf-6.69.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.69.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0]
} 4
test utf-6.70 {Tcl_UtfNext} {testutfnext testbytestring} {
@@ -426,40 +425,40 @@ test utf-6.72 {Tcl_UtfNext} {testutfnext testbytestring} {
test utf-6.73 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext [testbytestring \xF2\xA0\xA0\xF8]
} 1
-test utf-6.74.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.74.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0]G
} 1
-test utf-6.74.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.74.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0]G
} 4
-test utf-6.75.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.75.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0]
} 1
-test utf-6.75.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.75.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0]
} 4
-test utf-6.76.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.76.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xD0]
} 1
-test utf-6.76.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.76.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xD0]
} 4
-test utf-6.77.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.77.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xE8]
} 1
-test utf-6.77.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.77.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xE8]
} 4
-test utf-6.78.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.78.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xF2]
} 1
-test utf-6.78.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.78.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xF2]
} 4
-test utf-6.79.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.79.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0G\xF8]
} 1
-test utf-6.79.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} {
+test utf-6.79.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0G\xF8]
} 4
test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext {
@@ -483,28 +482,28 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} {
test utf-6.86 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} {
testutfnext [testbytestring \xF0\x80\x80\x80]
} 1
-test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF0\x90\x80\x80]
} 1
-test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring ucs4} {
+test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF0\x90\x80\x80]
} 4
test utf-6.88.0 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xA0\xA0\x00]
} 1
-test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring} {
+test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xA0\xA0\x00]
} 2
test utf-6.89.0 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \x80\x80\x00]
} 1
-test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring} {
+test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \x80\x80\x00]
} 2
-test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF4\x8F\xBF\xBF]
} 1
-test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring ucs4} {
+test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF4\x8F\xBF\xBF]
} 4
test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring} {
@@ -513,25 +512,25 @@ test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytest
test utf-6.92.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xA0\xA0\xA0]
} 1
-test utf-6.92.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring} {
+test utf-6.92.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xA0\xA0\xA0]
} 3
test utf-6.93.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \x80\x80\x80]
} 1
-test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring} {
+test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \x80\x80\x80]
} 3
test utf-6.94.0 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xA0\xA0\xA0\xA0]
} 1
-test utf-6.94.1 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring} {
+test utf-6.94.1 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xA0\xA0\xA0\xA0]
} 3
test utf-6.95.0 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \x80\x80\x80\x80]
} 1
-test utf-6.95.1 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring} {
+test utf-6.95.1 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \x80\x80\x80\x80]
} 3
@@ -691,33 +690,33 @@ test utf-7.17.1 {Tcl_UtfPrev} {testutfprev testbytestring} {
test utf-7.17.2 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xD0\xA0\xA0\xF8] 4
} 3
-test utf-7.18.0 {Tcl_UtfPrev} {testutfprev testbytestring utf16} {
+test utf-7.18.0 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xA0\xA0\xA0]
-} 1
-test utf-7.18.1 {Tcl_UtfPrev} {testutfprev testbytestring utf16} {
+} 3
+test utf-7.18.1 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xA0\xA0\xA0\xA0] 4
-} 1
-test utf-7.18.2 {Tcl_UtfPrev} {testutfprev testbytestring utf16} {
+} 3
+test utf-7.18.2 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xA0\xA0\xA0\xF8] 4
-} 1
-test utf-7.19 {Tcl_UtfPrev} {testutfprev testbytestring utf16} {
+} 3
+test utf-7.19 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF8\xA0\xA0\xA0]
-} 2
+} 4
test utf-7.20.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} {
testutfprev A[testbytestring \xF2\xA0\xA0\xA0]
} 4
-test utf-7.20.1 {Tcl_UtfPrev} {testutfprev testbytestring utf16} {
+test utf-7.20.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} {
testutfprev A[testbytestring \xF2\xA0\xA0\xA0]
-} 2
-test utf-7.21 {Tcl_UtfPrev} {testutfprev testbytestring utf16} {
+} 1
+test utf-7.21 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A\u8820[testbytestring \xA0]
-} 2
-test utf-7.22 {Tcl_UtfPrev} {testutfprev testbytestring utf16} {
+} 4
+test utf-7.22 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xD0\xA0\xA0\xA0]
-} 2
-test utf-7.23 {Tcl_UtfPrev} {testutfprev testbytestring utf16} {
+} 4
+test utf-7.23 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xA0\xA0\xA0\xA0]
-} 2
+} 4
test utf-7.24 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
testutfprev A[testbytestring \xC0\x81]
} 2
@@ -739,9 +738,9 @@ test utf-7.28 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
test utf-7.28.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
testutfprev A[testbytestring \xE0\x80\x80] 2
} 1
-test utf-7.29 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring utf16} {
+test utf-7.29 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF0\x80\x80\x80]
-} 2
+} 4
test utf-7.30 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF0\x80\x80\x80] 4
} 3
@@ -772,9 +771,9 @@ test utf-7.38 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} {
testutfprev A[testbytestring \xF0\x90\x80\x80]
} 4
-test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring utf16} {
+test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring fullutf} {
testutfprev A[testbytestring \xF0\x90\x80\x80]
-} 2
+} 1
test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} {
testutfprev A[testbytestring \xF0\x90\x80\x80] 4
} 3
@@ -799,9 +798,9 @@ test utf-7.44 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestrin
test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring} {
testutfprev [testbytestring \xA0\xA0\xA0]
} 2
-test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring utf16} {
+test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring} {
testutfprev [testbytestring \xA0\xA0\xA0\xA0]
-} 1
+} 3
test utf-7.47 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev testbytestring} {
testutfprev [testbytestring \xE8\xA0]
} 0
@@ -814,9 +813,9 @@ test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {
test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} {
testutfprev A[testbytestring \xF4\x8F\xBF\xBF]
} 4
-test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring utf16} {
+test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} {
testutfprev A[testbytestring \xF4\x8F\xBF\xBF]
-} 2
+} 1
test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} {
testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 4
} 3
@@ -832,9 +831,9 @@ test utf-7.48.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbyte
test utf-7.48.6 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 2
} 1
-test utf-7.49.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring utf16} {
+test utf-7.49.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF4\x90\x80\x80]
-} 2
+} 4
test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF4\x90\x80\x80] 4
} 3