summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/tcl.decls17
-rw-r--r--generic/tclCmdMZ.c2
-rw-r--r--generic/tclCompExpr.c8
-rw-r--r--generic/tclDecls.h51
-rw-r--r--generic/tclEncoding.c4
-rw-r--r--generic/tclInt.h14
-rw-r--r--generic/tclParse.c8
-rw-r--r--generic/tclStringObj.c6
-rw-r--r--generic/tclStubInit.c34
-rw-r--r--generic/tclTest.c8
-rw-r--r--generic/tclUtf.c14
-rw-r--r--generic/tclUtil.c4
12 files changed, 105 insertions, 65 deletions
diff --git a/generic/tcl.decls b/generic/tcl.decls
index a550411..4ccedd1 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -1163,7 +1163,7 @@ declare 325 {
const char *Tcl_UtfAtIndex(const char *src, int index)
}
declare 326 {
- int Tcl_UtfCharComplete(const char *src, int length)
+ int TclUtfCharComplete(const char *src, int length)
}
declare 327 {
int Tcl_UtfBackslash(const char *src, int *readPtr, char *dst)
@@ -1175,10 +1175,10 @@ declare 329 {
const char *Tcl_UtfFindLast(const char *src, int ch)
}
declare 330 {
- const char *Tcl_UtfNext(const char *src)
+ const char *TclUtfNext(const char *src)
}
declare 331 {
- const char *Tcl_UtfPrev(const char *src, const char *start)
+ const char *TclUtfPrev(const char *src, const char *start)
}
declare 332 {
int Tcl_UtfToExternal(Tcl_Interp *interp, Tcl_Encoding encoding,
@@ -2402,6 +2402,17 @@ declare 648 {
int length, Tcl_DString *dsPtr)
}
+# TIP #575
+declare 649 {
+ int Tcl_UtfCharComplete(const char *src, int length)
+}
+declare 650 {
+ const char *Tcl_UtfNext(const char *src)
+}
+declare 651 {
+ const char *Tcl_UtfPrev(const char *src, const char *start)
+}
+
# ----- BASELINE -- FOR -- 8.7.0 ----- #
##############################################################################
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 56df0dd..8f0465d 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -2529,7 +2529,7 @@ StringStartCmd(
break;
}
- next = TclUtfPrev(p, string);
+ next = Tcl_UtfPrev(p, string);
do {
next += delta;
delta = TclUtfToUCS4(next, &ch);
diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c
index 4fb41fc..4d448b9 100644
--- a/generic/tclCompExpr.c
+++ b/generic/tclCompExpr.c
@@ -1893,7 +1893,7 @@ ParseLexeme(
{
const char *end;
int scanned;
- Tcl_UniChar ch = 0;
+ int ch;
Tcl_Obj *literal = NULL;
unsigned char byte;
@@ -2101,13 +2101,13 @@ ParseLexeme(
if (!TclIsBareword(*start) || *start == '_') {
if (Tcl_UtfCharComplete(start, numBytes)) {
- scanned = TclUtfToUniChar(start, &ch);
+ scanned = TclUtfToUCS4(start, &ch);
} else {
- char utfBytes[4];
+ char utfBytes[8];
memcpy(utfBytes, start, numBytes);
utfBytes[numBytes] = '\0';
- scanned = TclUtfToUniChar(utfBytes, &ch);
+ scanned = TclUtfToUCS4(utfBytes, &ch);
}
*lexemePtr = INVALID;
Tcl_DecrRefCount(literal);
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index c713469..7c1b22b 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -1000,7 +1000,7 @@ EXTERN int Tcl_UniCharToUtf(int ch, char *buf);
/* 325 */
EXTERN const char * Tcl_UtfAtIndex(const char *src, int index);
/* 326 */
-EXTERN int Tcl_UtfCharComplete(const char *src, int length);
+EXTERN int TclUtfCharComplete(const char *src, int length);
/* 327 */
EXTERN int Tcl_UtfBackslash(const char *src, int *readPtr,
char *dst);
@@ -1009,9 +1009,9 @@ EXTERN const char * Tcl_UtfFindFirst(const char *src, int ch);
/* 329 */
EXTERN const char * Tcl_UtfFindLast(const char *src, int ch);
/* 330 */
-EXTERN const char * Tcl_UtfNext(const char *src);
+EXTERN const char * TclUtfNext(const char *src);
/* 331 */
-EXTERN const char * Tcl_UtfPrev(const char *src, const char *start);
+EXTERN const char * TclUtfPrev(const char *src, const char *start);
/* 332 */
EXTERN int Tcl_UtfToExternal(Tcl_Interp *interp,
Tcl_Encoding encoding, const char *src,
@@ -1921,6 +1921,12 @@ EXTERN char * Tcl_UniCharToUtfDString(const int *uniStr,
/* 648 */
EXTERN int * Tcl_UtfToUniCharDString(const char *src, int length,
Tcl_DString *dsPtr);
+/* 649 */
+EXTERN int Tcl_UtfCharComplete(const char *src, int length);
+/* 650 */
+EXTERN const char * Tcl_UtfNext(const char *src);
+/* 651 */
+EXTERN const char * Tcl_UtfPrev(const char *src, const char *start);
typedef struct {
const struct TclPlatStubs *tclPlatStubs;
@@ -2282,12 +2288,12 @@ typedef struct TclStubs {
int (*tcl_UniCharToUpper) (int ch); /* 323 */
int (*tcl_UniCharToUtf) (int ch, char *buf); /* 324 */
const char * (*tcl_UtfAtIndex) (const char *src, int index); /* 325 */
- int (*tcl_UtfCharComplete) (const char *src, int length); /* 326 */
+ int (*tclUtfCharComplete) (const char *src, int length); /* 326 */
int (*tcl_UtfBackslash) (const char *src, int *readPtr, char *dst); /* 327 */
const char * (*tcl_UtfFindFirst) (const char *src, int ch); /* 328 */
const char * (*tcl_UtfFindLast) (const char *src, int ch); /* 329 */
- const char * (*tcl_UtfNext) (const char *src); /* 330 */
- const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 331 */
+ const char * (*tclUtfNext) (const char *src); /* 330 */
+ const char * (*tclUtfPrev) (const char *src, const char *start); /* 331 */
int (*tcl_UtfToExternal) (Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); /* 332 */
char * (*tcl_UtfToExternalDString) (Tcl_Encoding encoding, const char *src, int srcLen, Tcl_DString *dsPtr); /* 333 */
int (*tcl_UtfToLower) (char *src); /* 334 */
@@ -2605,6 +2611,9 @@ typedef struct TclStubs {
int (*tcl_UtfToUniChar) (const char *src, int *chPtr); /* 646 */
char * (*tcl_UniCharToUtfDString) (const int *uniStr, int uniLength, Tcl_DString *dsPtr); /* 647 */
int * (*tcl_UtfToUniCharDString) (const char *src, int length, Tcl_DString *dsPtr); /* 648 */
+ int (*tcl_UtfCharComplete) (const char *src, int length); /* 649 */
+ const char * (*tcl_UtfNext) (const char *src); /* 650 */
+ const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 651 */
} TclStubs;
extern const TclStubs *tclStubsPtr;
@@ -3287,18 +3296,18 @@ extern const TclStubs *tclStubsPtr;
(tclStubsPtr->tcl_UniCharToUtf) /* 324 */
#define Tcl_UtfAtIndex \
(tclStubsPtr->tcl_UtfAtIndex) /* 325 */
-#define Tcl_UtfCharComplete \
- (tclStubsPtr->tcl_UtfCharComplete) /* 326 */
+#define TclUtfCharComplete \
+ (tclStubsPtr->tclUtfCharComplete) /* 326 */
#define Tcl_UtfBackslash \
(tclStubsPtr->tcl_UtfBackslash) /* 327 */
#define Tcl_UtfFindFirst \
(tclStubsPtr->tcl_UtfFindFirst) /* 328 */
#define Tcl_UtfFindLast \
(tclStubsPtr->tcl_UtfFindLast) /* 329 */
-#define Tcl_UtfNext \
- (tclStubsPtr->tcl_UtfNext) /* 330 */
-#define Tcl_UtfPrev \
- (tclStubsPtr->tcl_UtfPrev) /* 331 */
+#define TclUtfNext \
+ (tclStubsPtr->tclUtfNext) /* 330 */
+#define TclUtfPrev \
+ (tclStubsPtr->tclUtfPrev) /* 331 */
#define Tcl_UtfToExternal \
(tclStubsPtr->tcl_UtfToExternal) /* 332 */
#define Tcl_UtfToExternalDString \
@@ -3933,6 +3942,12 @@ extern const TclStubs *tclStubsPtr;
(tclStubsPtr->tcl_UniCharToUtfDString) /* 647 */
#define Tcl_UtfToUniCharDString \
(tclStubsPtr->tcl_UtfToUniCharDString) /* 648 */
+#define Tcl_UtfCharComplete \
+ (tclStubsPtr->tcl_UtfCharComplete) /* 649 */
+#define Tcl_UtfNext \
+ (tclStubsPtr->tcl_UtfNext) /* 650 */
+#define Tcl_UtfPrev \
+ (tclStubsPtr->tcl_UtfPrev) /* 651 */
#endif /* defined(USE_TCL_STUBS) */
@@ -4178,10 +4193,16 @@ extern const TclStubs *tclStubsPtr;
#define Tcl_Close(interp, chan) Tcl_CloseEx(interp, chan, 0)
#endif
-#if defined(USE_TCL_STUBS) && (TCL_UTF_MAX > 3)
+#undef TclUtfCharComplete
+#undef TclUtfNext
+#undef TclUtfPrev
+#if defined(USE_TCL_STUBS) && (TCL_UTF_MAX < 4) && !defined(TCL_NO_DEPRECATED)
# undef Tcl_UtfCharComplete
-# define Tcl_UtfCharComplete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \
- ? ((length) >= 4) : tclStubsPtr->tcl_UtfCharComplete((src), (length)))
+# undef Tcl_UtfNext
+# undef Tcl_UtfPrev
+# define Tcl_UtfCharComplete (tclStubsPtr->tclUtfCharComplete)
+# define Tcl_UtfNext (tclStubsPtr->tclUtfNext)
+# define Tcl_UtfPrev (tclStubsPtr->tclUtfPrev)
#endif
#endif /* _TCLDECLS */
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index ae02821..784d8d6 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2321,7 +2321,7 @@ UtfToUtfProc(
dstEnd = dst + dstLen - TCL_UTF_MAX;
for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
- if ((src > srcClose) && (!TclUCS4Complete(src, srcEnd - src))) {
+ if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
/*
* If there is more string to follow, this will ensure that the
* last UTF-8 character in the source buffer hasn't been cut off.
@@ -2351,7 +2351,7 @@ UtfToUtfProc(
*dst++ = 0;
*chPtr = 0; /* reset surrogate handling */
src += 2;
- } else if (!TclUCS4Complete(src, srcEnd - src)) {
+ } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
/*
* Always check before using TclUtfToUCS4. Not doing can so
* cause it run beyond the end of the buffer! If we happen such an
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 78d9f93..1b95754 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3252,14 +3252,8 @@ MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct);
MODULE_SCOPE int TclUtfCount(int ch);
#if TCL_UTF_MAX > 3
# define TclUtfToUCS4 Tcl_UtfToUniChar
-# define TclUCS4Complete Tcl_UtfCharComplete
-# define TclChar16Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \
- ? ((length) >= 3) : Tcl_UtfCharComplete((src), (length)))
#else
MODULE_SCOPE int TclUtfToUCS4(const char *src, int *ucs4Ptr);
-# define TclUCS4Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \
- ? ((length) >= 4) : Tcl_UtfCharComplete((src), (length)))
-# define TclChar16Complete Tcl_UtfCharComplete
#endif
MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(ClientData clientData);
MODULE_SCOPE Tcl_Obj * TclpFilesystemPathType(Tcl_Obj *pathPtr);
@@ -4695,14 +4689,6 @@ MODULE_SCOPE const TclFileAttrProcs tclpFileAttrProcs[];
(numChars) = _count; \
} while (0);
-#define TclUtfPrev(src, start) \
- (((src) < (start) + 2) ? (start) : \
- ((unsigned char) *((src) - 1)) < 0x80 ? (src) - 1 : \
- Tcl_UtfPrev(src, start))
-
-#define TclUtfNext(src) \
- ((((unsigned char) *(src)) < 0x80) ? (src) + 1 : Tcl_UtfNext(src))
-
/*
*----------------------------------------------------------------
* Macro that encapsulates the logic that determines when it is safe to
diff --git a/generic/tclParse.c b/generic/tclParse.c
index 132e804..49ee348 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -789,7 +789,7 @@ TclParseBackslash(
* written. At most 4 bytes will be written there. */
{
const char *p = src+1;
- Tcl_UniChar unichar = 0;
+ int unichar;
int result;
int count;
char buf[4] = "";
@@ -936,13 +936,13 @@ TclParseBackslash(
*/
if (Tcl_UtfCharComplete(p, numBytes - 1)) {
- count = TclUtfToUniChar(p, &unichar) + 1; /* +1 for '\' */
+ count = TclUtfToUCS4(p, &unichar) + 1; /* +1 for '\' */
} else {
- char utfBytes[4];
+ char utfBytes[8];
memcpy(utfBytes, p, numBytes - 1);
utfBytes[numBytes - 1] = '\0';
- count = TclUtfToUniChar(utfBytes, &unichar) + 1;
+ count = TclUtfToUCS4(utfBytes, &unichar) + 1;
}
result = unichar;
break;
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 78e49f9..2025674 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -1171,10 +1171,10 @@ Tcl_AppendLimitedToObj(
}
eLen = strlen(ellipsis);
while (eLen > limit) {
- eLen = TclUtfPrev(ellipsis+eLen, ellipsis) - ellipsis;
+ eLen = Tcl_UtfPrev(ellipsis+eLen, ellipsis) - ellipsis;
}
- toCopy = TclUtfPrev(bytes+limit+1-eLen, bytes) - bytes;
+ toCopy = Tcl_UtfPrev(bytes+limit+1-eLen, bytes) - bytes;
}
/*
@@ -2614,7 +2614,7 @@ AppendPrintfToObjVA(
* multi-byte characters.
*/
- q = TclUtfPrev(end, bytes);
+ q = Tcl_UtfPrev(end, bytes);
if (!Tcl_UtfCharComplete(q, (int)(end - q))) {
end = q;
}
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index 2d2bc63..ae9a4e3 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -88,6 +88,31 @@ static void uniCodePanic(void) {
# define Tcl_UniCharNcmp (int(*)(const Tcl_UniChar *, const Tcl_UniChar *, unsigned long))(void *)uniCodePanic
#endif
+#define TclUtfCharComplete UtfCharComplete
+#define TclUtfNext UtfNext
+#define TclUtfPrev UtfPrev
+
+static int TclUtfCharComplete(const char *src, int length) {
+ if ((unsigned)((unsigned char)*(src) - 0xF0) < 5) {
+ return length < 5;
+ }
+ return Tcl_UtfCharComplete(src, length);
+}
+
+static const char *TclUtfNext(const char *src) {
+ if ((unsigned)((unsigned char)*(src) - 0xF0) < 5) {
+ return src + 1;
+ }
+ return Tcl_UtfNext(src);
+}
+
+static const char *TclUtfPrev(const char *src, const char *start) {
+ if (((unsigned)((unsigned char)*(src) - 0xF0) < 5) && (src >= start)) {
+ return src - 1;
+ }
+ return Tcl_UtfPrev(src, start);
+}
+
#define TclBN_mp_add mp_add
#define TclBN_mp_and mp_and
#define TclBN_mp_clamp mp_clamp
@@ -1549,12 +1574,12 @@ const TclStubs tclStubs = {
Tcl_UniCharToUpper, /* 323 */
Tcl_UniCharToUtf, /* 324 */
Tcl_UtfAtIndex, /* 325 */
- Tcl_UtfCharComplete, /* 326 */
+ TclUtfCharComplete, /* 326 */
Tcl_UtfBackslash, /* 327 */
Tcl_UtfFindFirst, /* 328 */
Tcl_UtfFindLast, /* 329 */
- Tcl_UtfNext, /* 330 */
- Tcl_UtfPrev, /* 331 */
+ TclUtfNext, /* 330 */
+ TclUtfPrev, /* 331 */
Tcl_UtfToExternal, /* 332 */
Tcl_UtfToExternalDString, /* 333 */
Tcl_UtfToLower, /* 334 */
@@ -1872,6 +1897,9 @@ const TclStubs tclStubs = {
Tcl_UtfToUniChar, /* 646 */
Tcl_UniCharToUtfDString, /* 647 */
Tcl_UtfToUniCharDString, /* 648 */
+ Tcl_UtfCharComplete, /* 649 */
+ Tcl_UtfNext, /* 650 */
+ Tcl_UtfPrev, /* 651 */
};
/* !END!: Do not edit above this line. */
diff --git a/generic/tclTest.c b/generic/tclTest.c
index 1f6882f..78645b6 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -6844,10 +6844,10 @@ TestUtfNextCmd(
memcpy(buffer + 1, bytes, numBytes);
buffer[0] = buffer[numBytes + 1] = buffer[numBytes + 2] = buffer[numBytes + 3] = '\xA0';
- first = result = TclUtfNext(buffer + 1);
+ first = result = Tcl_UtfNext(buffer + 1);
while ((buffer[0] = *p++) != '\0') {
/* Run Tcl_UtfNext with many more possible bytes at src[-1], all should give the same result */
- result = TclUtfNext(buffer + 1);
+ result = Tcl_UtfNext(buffer + 1);
if (first != result) {
Tcl_AppendResult(interp, "Tcl_UtfNext is not supposed to read src[-1]", NULL);
return TCL_ERROR;
@@ -6856,7 +6856,7 @@ TestUtfNextCmd(
p = tobetested;
while ((buffer[numBytes + 1] = *p++) != '\0') {
/* Run Tcl_UtfNext with many more possible bytes at src[end], all should give the same result */
- result = TclUtfNext(buffer + 1);
+ result = Tcl_UtfNext(buffer + 1);
if (first != result) {
first = buffer;
break;
@@ -6904,7 +6904,7 @@ TestUtfPrevCmd(
} else {
offset = numBytes;
}
- result = TclUtfPrev(bytes + offset, bytes);
+ result = Tcl_UtfPrev(bytes + offset, bytes);
Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes));
return TCL_OK;
}
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 7c09283..6f03053 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -88,13 +88,7 @@ static const unsigned char complete[256] = {
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
/* End of "continuation byte section" */
2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
-#if TCL_UTF_MAX > 3
- 4,4,4,4,4,
-#else
- 3,3,3,3,3,
-#endif
- 1,1,1,1,1,1,1,1,1,1,1
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1
};
/*
@@ -694,7 +688,7 @@ Tcl_UtfToUniCharDString(
p += TclUtfToUCS4(p, &ch);
*w++ = ch;
}
- while ((p < endPtr) && TclUCS4Complete(p, endPtr-p)) {
+ while ((p < endPtr) && Tcl_UtfCharComplete(p, endPtr-p)) {
p += TclUtfToUCS4(p, &ch);
*w++ = ch;
}
@@ -752,7 +746,7 @@ Tcl_UtfToChar16DString(
*w++ = ch;
}
while (p < endPtr) {
- if (TclChar16Complete(p, endPtr-p)) {
+ if (Tcl_UtfCharComplete(p, endPtr-p)) {
p += Tcl_UtfToChar16(p, &ch);
*w++ = ch;
} else {
@@ -833,7 +827,7 @@ Tcl_NumUtfChars(
/* Pointer to the end of string. Never read endPtr[0] */
const char *endPtr = src + length;
/* Pointer to last byte where optimization still can be used */
- const char *optPtr = endPtr - TCL_UTF_MAX;
+ const char *optPtr = endPtr - 4;
/*
* Optimize away the call in this loop. Justified because...
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index ebc8656..40b249d 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -1707,7 +1707,7 @@ TclTrimRight(
const char *q = trim;
int pInc = 0, bytesLeft = numTrim;
- pp = TclUtfPrev(p, bytes);
+ pp = Tcl_UtfPrev(p, bytes);
do {
pp += pInc;
pInc = TclUtfToUCS4(pp, &ch1);
@@ -1858,7 +1858,7 @@ TclTrim(
* that we will not trim. Skip over it. */
if (numBytes > 0) {
const char *first = bytes + trimLeft;
- bytes = TclUtfNext(first);
+ bytes = Tcl_UtfNext(first);
numBytes -= (bytes - first);
if (numBytes > 0) {