summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
Diffstat (limited to 'generic')
-rw-r--r--generic/regc_locale.c2
-rw-r--r--generic/tcl.decls20
-rw-r--r--generic/tclCmdIL.c4
-rw-r--r--generic/tclCmdMZ.c173
-rw-r--r--generic/tclCompExpr.c8
-rw-r--r--generic/tclDecls.h56
-rw-r--r--generic/tclEncoding.c4
-rw-r--r--generic/tclExecute.c2
-rw-r--r--generic/tclIndexObj.c2
-rw-r--r--generic/tclInt.h17
-rw-r--r--generic/tclParse.c8
-rw-r--r--generic/tclStringObj.c6
-rw-r--r--generic/tclStubInit.c36
-rw-r--r--generic/tclTest.c5
-rw-r--r--generic/tclUtf.c128
-rw-r--r--generic/tclUtil.c14
16 files changed, 345 insertions, 140 deletions
diff --git a/generic/regc_locale.c b/generic/regc_locale.c
index c90dd64..cc4681b 100644
--- a/generic/regc_locale.c
+++ b/generic/regc_locale.c
@@ -1269,7 +1269,7 @@ casecmp(
size_t len) /* exact length of comparison */
{
for (; len > 0; len--, x++, y++) {
- if ((*x!=*y) && (Tcl_UniCharToLower(*x) != Tcl_UniCharToLower(*y))) {
+ if ((*x!=*y) && (Tcl_UniCharFold(*x) != Tcl_UniCharFold(*y))) {
return 1;
}
}
diff --git a/generic/tcl.decls b/generic/tcl.decls
index e49ed66..3785558 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -1163,7 +1163,7 @@ declare 325 {
const char *Tcl_UtfAtIndex(const char *src, int index)
}
declare 326 {
- int Tcl_UtfCharComplete(const char *src, int length)
+ int TclUtfCharComplete(const char *src, int length)
}
declare 327 {
int Tcl_UtfBackslash(const char *src, int *readPtr, char *dst)
@@ -1175,10 +1175,10 @@ declare 329 {
const char *Tcl_UtfFindLast(const char *src, int ch)
}
declare 330 {
- const char *Tcl_UtfNext(const char *src)
+ const char *TclUtfNext(const char *src)
}
declare 331 {
- const char *Tcl_UtfPrev(const char *src, const char *start)
+ const char *TclUtfPrev(const char *src, const char *start)
}
declare 332 {
int Tcl_UtfToExternal(Tcl_Interp *interp, Tcl_Encoding encoding,
@@ -2402,6 +2402,20 @@ declare 648 {
int length, Tcl_DString *dsPtr)
}
+# TIP #575
+declare 649 {
+ int Tcl_UtfCharComplete(const char *src, int length)
+}
+declare 650 {
+ const char *Tcl_UtfNext(const char *src)
+}
+declare 651 {
+ const char *Tcl_UtfPrev(const char *src, const char *start)
+}
+declare 652 {
+ int Tcl_UniCharFold(int ch)
+}
+
# ----- BASELINE -- FOR -- 8.7.0 ----- #
##############################################################################
diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c
index da8dc65..cf900a3 100644
--- a/generic/tclCmdIL.c
+++ b/generic/tclCmdIL.c
@@ -4782,8 +4782,8 @@ DictionaryCompare(
* other interesting punctuations occur).
*/
- uniLeftLower = Tcl_UniCharToLower(uniLeft);
- uniRightLower = Tcl_UniCharToLower(uniRight);
+ uniLeftLower = Tcl_UniCharFold(uniLeft);
+ uniRightLower = Tcl_UniCharFold(uniRight);
} else {
diff = UCHAR(*left) - UCHAR(*right);
break;
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index c47490a..cf4240a 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -629,10 +629,10 @@ Tcl_RegsubObjCmd(
wlen = 0;
}
} else {
- wsrclc = Tcl_UniCharToLower(*wsrc);
+ wsrclc = Tcl_UniCharFold(*wsrc);
for (p = wfirstChar = wstring; wstring < wend; wstring++) {
if ((*wstring == *wsrc ||
- (nocase && Tcl_UniCharToLower(*wstring)==wsrclc)) &&
+ (nocase && Tcl_UniCharFold(*wstring)==wsrclc)) &&
(slen==1 || (strCmpFn(wstring, wsrc,
(unsigned long) slen) == 0))) {
if (numMatches == 0) {
@@ -2096,10 +2096,10 @@ StringMapCmd(
ustring1 = end;
} else {
mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen);
- u2lc = (nocase ? Tcl_UniCharToLower(*ustring2) : 0);
+ u2lc = (nocase ? Tcl_UniCharFold(*ustring2) : 0);
for (; ustring1 < end; ustring1++) {
if (((*ustring1 == *ustring2) ||
- (nocase&&Tcl_UniCharToLower(*ustring1)==u2lc)) &&
+ (nocase&&Tcl_UniCharFold(*ustring1)==u2lc)) &&
(length2==1 || strCmpFn(ustring1, ustring2,
(unsigned long) length2) == 0)) {
if (p != ustring1) {
@@ -2134,7 +2134,7 @@ StringMapCmd(
mapStrings[index] = Tcl_GetUnicodeFromObj(mapElemv[index],
mapLens+index);
if (nocase && ((index % 2) == 0)) {
- u2lc[index/2] = Tcl_UniCharToLower(*mapStrings[index]);
+ u2lc[index/2] = Tcl_UniCharFold(*mapStrings[index]);
}
}
for (p = ustring1; ustring1 < end; ustring1++) {
@@ -2146,7 +2146,7 @@ StringMapCmd(
ustring2 = mapStrings[index];
length2 = mapLens[index];
if ((length2 > 0) && ((*ustring1 == *ustring2) || (nocase &&
- (Tcl_UniCharToLower(*ustring1) == u2lc[index/2]))) &&
+ (Tcl_UniCharFold(*ustring1) == u2lc[index/2]))) &&
/* Restrict max compare length. */
(end-ustring1 >= length2) && ((length2 == 1) ||
!strCmpFn(ustring2, ustring1, length2))) {
@@ -2500,8 +2500,8 @@ StringStartCmd(
Tcl_Obj *const objv[]) /* Argument objects. */
{
int ch;
- const char *p, *string;
- int cur, index, length, numChars;
+ const Tcl_UniChar *p, *string;
+ int cur, index, length;
Tcl_Obj *obj;
if (objc != 3) {
@@ -2509,32 +2509,30 @@ StringStartCmd(
return TCL_ERROR;
}
- string = TclGetStringFromObj(objv[1], &length);
- numChars = Tcl_NumUtfChars(string, length);
- if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) {
+ string = Tcl_GetUnicodeFromObj(objv[1], &length);
+ if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) {
return TCL_ERROR;
}
- string = TclGetStringFromObj(objv[1], &length);
- if (index >= numChars) {
- index = numChars - 1;
+ if (index >= length) {
+ index = length - 1;
}
cur = 0;
if (index > 0) {
- p = Tcl_UtfAtIndex(string, index);
+ p = &string[index];
- TclUtfToUCS4(p, &ch);
+ (void)TclUniCharToUCS4(p, &ch);
for (cur = index; cur >= 0; cur--) {
int delta = 0;
- const char *next;
+ const Tcl_UniChar *next;
if (!Tcl_UniCharIsWordChar(ch)) {
break;
}
- next = TclUtfPrev(p, string);
+ next = TclUCS4Prev(p, string);
do {
next += delta;
- delta = TclUtfToUCS4(next, &ch);
+ delta = TclUniCharToUCS4(next, &ch);
} while (next + delta < p);
p = next;
}
@@ -2550,6 +2548,64 @@ StringStartCmd(
/*
*----------------------------------------------------------------------
*
+ * StringCharStartCmd --
+ *
+ * This procedure is invoked to process the "string charstart" Tcl
+ * command. See the user documentation for details on what it does.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+StringCharStartCmd(
+ TCL_UNUSED(ClientData),
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+#if TCL_UTF_MAX <= 3
+ const Tcl_UniChar *src;
+#else
+ const char *src;
+#endif
+ int index, length;
+
+ if (objc != 3) {
+ Tcl_WrongNumArgs(interp, 1, objv, "string index");
+ return TCL_ERROR;
+ }
+
+#if TCL_UTF_MAX <= 3
+ src = Tcl_GetUnicodeFromObj(objv[1], &length);
+#else
+ src = Tcl_GetStringFromObj(objv[1], &length);
+ length = Tcl_NumUtfChars(src, length);
+#endif
+ if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ if (index >= length) {
+ index = length;
+ } else if (index < 0) {
+ index = -1;
+#if TCL_UTF_MAX <= 3
+ } else if ((index > 0) && ((src[index-1] & 0xFC00) == 0xD800) && ((src[index] & 0xFC00) == 0xDC00)) {
+ index--;
+#endif
+ }
+ Tcl_SetObjResult(interp, Tcl_NewWideIntObj(index));
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* StringEndCmd --
*
* This procedure is invoked to process the "string wordend" Tcl command.
@@ -2572,8 +2628,8 @@ StringEndCmd(
Tcl_Obj *const objv[]) /* Argument objects. */
{
int ch;
- const char *p, *end, *string;
- int cur, index, length, numChars;
+ const Tcl_UniChar *p, *end, *string;
+ int cur, index, length;
Tcl_Obj *obj;
if (objc != 3) {
@@ -2581,20 +2637,18 @@ StringEndCmd(
return TCL_ERROR;
}
- string = TclGetStringFromObj(objv[1], &length);
- numChars = Tcl_NumUtfChars(string, length);
- if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) {
+ string = Tcl_GetUnicodeFromObj(objv[1], &length);
+ if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) {
return TCL_ERROR;
}
- string = TclGetStringFromObj(objv[1], &length);
if (index < 0) {
index = 0;
}
- if (index < numChars) {
- p = Tcl_UtfAtIndex(string, index);
+ if (index < length) {
+ p = &string[index];
end = string+length;
for (cur = index; p < end; cur++) {
- p += TclUtfToUCS4(p, &ch);
+ p += TclUniCharToUCS4(p, &ch);
if (!Tcl_UniCharIsWordChar(ch)) {
break;
}
@@ -2603,7 +2657,7 @@ StringEndCmd(
cur++;
}
} else {
- cur = numChars;
+ cur = length;
}
TclNewIntObj(obj, cur);
Tcl_SetObjResult(interp, obj);
@@ -2613,6 +2667,65 @@ StringEndCmd(
/*
*----------------------------------------------------------------------
*
+ * StringCharEndCmd --
+ *
+ * This procedure is invoked to process the "string charend" Tcl command.
+ * See the user documentation for details on what it does.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+StringCharEndCmd(
+ TCL_UNUSED(ClientData),
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+#if TCL_UTF_MAX <= 3
+ const Tcl_UniChar *src;
+#else
+ const char *src;
+#endif
+ int index, length;
+
+ if (objc != 3) {
+ Tcl_WrongNumArgs(interp, 1, objv, "string index");
+ return TCL_ERROR;
+ }
+
+#if TCL_UTF_MAX <= 3
+ src = Tcl_GetUnicodeFromObj(objv[1], &length);
+#else
+ src = Tcl_GetStringFromObj(objv[1], &length);
+ length = Tcl_NumUtfChars(src, length);
+#endif
+ if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ if (++index < 0) {
+ index = 0;
+ }
+ if (index >= length) {
+ index = length;
+#if TCL_UTF_MAX <= 3
+ } else if ((index > 0) && ((src[index-1] & 0xFC00) == 0xD800) && ((src[index] & 0xFC00) == 0xDC00)) {
+ index++;
+#endif
+ }
+ Tcl_SetObjResult(interp, Tcl_NewWideIntObj(index));
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* StringEqualCmd --
*
* This procedure is invoked to process the "string equal" Tcl command.
@@ -3312,6 +3425,8 @@ TclInitStringCmd(
static const EnsembleImplMap stringImplMap[] = {
{"bytelength", StringBytesCmd, TclCompileBasic1ArgCmd, NULL, NULL, 0},
{"cat", StringCatCmd, TclCompileStringCatCmd, NULL, NULL, 0},
+ {"charend", StringCharEndCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0},
+ {"charstart", StringCharStartCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0},
{"compare", StringCmpCmd, TclCompileStringCmpCmd, NULL, NULL, 0},
{"equal", StringEqualCmd, TclCompileStringEqualCmd, NULL, NULL, 0},
{"first", StringFirstCmd, TclCompileStringFirstCmd, NULL, NULL, 0},
diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c
index aabd764..f35038f 100644
--- a/generic/tclCompExpr.c
+++ b/generic/tclCompExpr.c
@@ -1895,7 +1895,7 @@ ParseLexeme(
{
const char *end;
int scanned;
- Tcl_UniChar ch = 0;
+ int ch;
Tcl_Obj *literal = NULL;
unsigned char byte;
@@ -2103,13 +2103,13 @@ ParseLexeme(
if (!TclIsBareword(*start) || *start == '_') {
if (Tcl_UtfCharComplete(start, numBytes)) {
- scanned = TclUtfToUniChar(start, &ch);
+ scanned = TclUtfToUCS4(start, &ch);
} else {
- char utfBytes[4];
+ char utfBytes[8];
memcpy(utfBytes, start, numBytes);
utfBytes[numBytes] = '\0';
- scanned = TclUtfToUniChar(utfBytes, &ch);
+ scanned = TclUtfToUCS4(utfBytes, &ch);
}
*lexemePtr = INVALID;
Tcl_DecrRefCount(literal);
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index 9ea6838..31bb2d4 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -999,7 +999,7 @@ EXTERN int Tcl_UniCharToUtf(int ch, char *buf);
/* 325 */
EXTERN const char * Tcl_UtfAtIndex(const char *src, int index);
/* 326 */
-EXTERN int Tcl_UtfCharComplete(const char *src, int length);
+EXTERN int TclUtfCharComplete(const char *src, int length);
/* 327 */
EXTERN int Tcl_UtfBackslash(const char *src, int *readPtr,
char *dst);
@@ -1008,9 +1008,9 @@ EXTERN const char * Tcl_UtfFindFirst(const char *src, int ch);
/* 329 */
EXTERN const char * Tcl_UtfFindLast(const char *src, int ch);
/* 330 */
-EXTERN const char * Tcl_UtfNext(const char *src);
+EXTERN const char * TclUtfNext(const char *src);
/* 331 */
-EXTERN const char * Tcl_UtfPrev(const char *src, const char *start);
+EXTERN const char * TclUtfPrev(const char *src, const char *start);
/* 332 */
EXTERN int Tcl_UtfToExternal(Tcl_Interp *interp,
Tcl_Encoding encoding, const char *src,
@@ -1920,6 +1920,14 @@ EXTERN char * Tcl_UniCharToUtfDString(const int *uniStr,
/* 648 */
EXTERN int * Tcl_UtfToUniCharDString(const char *src, int length,
Tcl_DString *dsPtr);
+/* 649 */
+EXTERN int Tcl_UtfCharComplete(const char *src, int length);
+/* 650 */
+EXTERN const char * Tcl_UtfNext(const char *src);
+/* 651 */
+EXTERN const char * Tcl_UtfPrev(const char *src, const char *start);
+/* 652 */
+EXTERN int Tcl_UniCharFold(int ch);
typedef struct {
const struct TclPlatStubs *tclPlatStubs;
@@ -2281,12 +2289,12 @@ typedef struct TclStubs {
int (*tcl_UniCharToUpper) (int ch); /* 323 */
int (*tcl_UniCharToUtf) (int ch, char *buf); /* 324 */
const char * (*tcl_UtfAtIndex) (const char *src, int index); /* 325 */
- int (*tcl_UtfCharComplete) (const char *src, int length); /* 326 */
+ int (*tclUtfCharComplete) (const char *src, int length); /* 326 */
int (*tcl_UtfBackslash) (const char *src, int *readPtr, char *dst); /* 327 */
const char * (*tcl_UtfFindFirst) (const char *src, int ch); /* 328 */
const char * (*tcl_UtfFindLast) (const char *src, int ch); /* 329 */
- const char * (*tcl_UtfNext) (const char *src); /* 330 */
- const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 331 */
+ const char * (*tclUtfNext) (const char *src); /* 330 */
+ const char * (*tclUtfPrev) (const char *src, const char *start); /* 331 */
int (*tcl_UtfToExternal) (Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); /* 332 */
char * (*tcl_UtfToExternalDString) (Tcl_Encoding encoding, const char *src, int srcLen, Tcl_DString *dsPtr); /* 333 */
int (*tcl_UtfToLower) (char *src); /* 334 */
@@ -2604,6 +2612,10 @@ typedef struct TclStubs {
int (*tcl_UtfToUniChar) (const char *src, int *chPtr); /* 646 */
char * (*tcl_UniCharToUtfDString) (const int *uniStr, int uniLength, Tcl_DString *dsPtr); /* 647 */
int * (*tcl_UtfToUniCharDString) (const char *src, int length, Tcl_DString *dsPtr); /* 648 */
+ int (*tcl_UtfCharComplete) (const char *src, int length); /* 649 */
+ const char * (*tcl_UtfNext) (const char *src); /* 650 */
+ const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 651 */
+ int (*tcl_UniCharFold) (int ch); /* 652 */
} TclStubs;
extern const TclStubs *tclStubsPtr;
@@ -3286,18 +3298,18 @@ extern const TclStubs *tclStubsPtr;
(tclStubsPtr->tcl_UniCharToUtf) /* 324 */
#define Tcl_UtfAtIndex \
(tclStubsPtr->tcl_UtfAtIndex) /* 325 */
-#define Tcl_UtfCharComplete \
- (tclStubsPtr->tcl_UtfCharComplete) /* 326 */
+#define TclUtfCharComplete \
+ (tclStubsPtr->tclUtfCharComplete) /* 326 */
#define Tcl_UtfBackslash \
(tclStubsPtr->tcl_UtfBackslash) /* 327 */
#define Tcl_UtfFindFirst \
(tclStubsPtr->tcl_UtfFindFirst) /* 328 */
#define Tcl_UtfFindLast \
(tclStubsPtr->tcl_UtfFindLast) /* 329 */
-#define Tcl_UtfNext \
- (tclStubsPtr->tcl_UtfNext) /* 330 */
-#define Tcl_UtfPrev \
- (tclStubsPtr->tcl_UtfPrev) /* 331 */
+#define TclUtfNext \
+ (tclStubsPtr->tclUtfNext) /* 330 */
+#define TclUtfPrev \
+ (tclStubsPtr->tclUtfPrev) /* 331 */
#define Tcl_UtfToExternal \
(tclStubsPtr->tcl_UtfToExternal) /* 332 */
#define Tcl_UtfToExternalDString \
@@ -3932,6 +3944,14 @@ extern const TclStubs *tclStubsPtr;
(tclStubsPtr->tcl_UniCharToUtfDString) /* 647 */
#define Tcl_UtfToUniCharDString \
(tclStubsPtr->tcl_UtfToUniCharDString) /* 648 */
+#define Tcl_UtfCharComplete \
+ (tclStubsPtr->tcl_UtfCharComplete) /* 649 */
+#define Tcl_UtfNext \
+ (tclStubsPtr->tcl_UtfNext) /* 650 */
+#define Tcl_UtfPrev \
+ (tclStubsPtr->tcl_UtfPrev) /* 651 */
+#define Tcl_UniCharFold \
+ (tclStubsPtr->tcl_UniCharFold) /* 652 */
#endif /* defined(USE_TCL_STUBS) */
@@ -4177,10 +4197,16 @@ extern const TclStubs *tclStubsPtr;
#define Tcl_Close(interp, chan) Tcl_CloseEx(interp, chan, 0)
#endif
-#if defined(USE_TCL_STUBS) && (TCL_UTF_MAX > 3)
+#undef TclUtfCharComplete
+#undef TclUtfNext
+#undef TclUtfPrev
+#if defined(USE_TCL_STUBS) && (TCL_UTF_MAX < 4) && !defined(TCL_NO_DEPRECATED)
# undef Tcl_UtfCharComplete
-# define Tcl_UtfCharComplete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \
- ? ((length) >= 4) : tclStubsPtr->tcl_UtfCharComplete((src), (length)))
+# undef Tcl_UtfNext
+# undef Tcl_UtfPrev
+# define Tcl_UtfCharComplete (tclStubsPtr->tclUtfCharComplete)
+# define Tcl_UtfNext (tclStubsPtr->tclUtfNext)
+# define Tcl_UtfPrev (tclStubsPtr->tclUtfPrev)
#endif
#define Tcl_CreateSlave Tcl_CreateChild
#define Tcl_GetSlave Tcl_GetChild
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 48ab3cf..9718f37 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2323,7 +2323,7 @@ UtfToUtfProc(
dstEnd = dst + dstLen - TCL_UTF_MAX;
for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
- if ((src > srcClose) && (!TclUCS4Complete(src, srcEnd - src))) {
+ if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
/*
* If there is more string to follow, this will ensure that the
* last UTF-8 character in the source buffer hasn't been cut off.
@@ -2353,7 +2353,7 @@ UtfToUtfProc(
*dst++ = 0;
*chPtr = 0; /* reset surrogate handling */
src += 2;
- } else if (!TclUCS4Complete(src, srcEnd - src)) {
+ } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
/*
* Always check before using TclUtfToUCS4. Not doing can so
* cause it run beyond the end of the buffer! If we happen such an
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index 54c147d..d1d7037 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -5323,7 +5323,7 @@ TEBCresume(
} else if (TclIsPureByteArray(valuePtr)) {
objResultPtr = Tcl_NewByteArrayObj(
Tcl_GetByteArrayFromObj(valuePtr, NULL)+index, 1);
- } else if (valuePtr->bytes && length == valuePtr->length) {
+ } else if (valuePtr->bytes && length == valuePtr->length && !(valuePtr->bytes[index] & 0x80)) {
objResultPtr = Tcl_NewStringObj((const char *)
valuePtr->bytes+index, 1);
} else {
diff --git a/generic/tclIndexObj.c b/generic/tclIndexObj.c
index 6ae2075..dc1fe24 100644
--- a/generic/tclIndexObj.c
+++ b/generic/tclIndexObj.c
@@ -785,7 +785,7 @@ PrefixLongestObjCmd(
* Adjust in case we stopped in the middle of a UTF char.
*/
- resultLength = TclUtfPrev(&resultString[i+1],
+ resultLength = Tcl_UtfPrev(&resultString[i+1],
resultString) - resultString;
break;
}
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 9dde88b..71459d1 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3252,15 +3252,11 @@ MODULE_SCOPE int TclUtfCount(int ch);
#if TCL_UTF_MAX > 3
# define TclUtfToUCS4 Tcl_UtfToUniChar
# define TclUniCharToUCS4(src, ptr) (*ptr = *(src),1)
-# define TclUCS4Complete Tcl_UtfCharComplete
-# define TclChar16Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \
- ? ((length) >= 3) : Tcl_UtfCharComplete((src), (length)))
+# define TclUCS4Prev(src, ptr) (((src) > (ptr)) ? ((src) - 1) : (src))
#else
- MODULE_SCOPE int TclUtfToUCS4(const char *src, int *ucs4Ptr);
- MODULE_SCOPE int TclUniCharToUCS4(const Tcl_UniChar *src, int *ucs4Ptr);
-# define TclUCS4Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \
- ? ((length) >= 4) : Tcl_UtfCharComplete((src), (length)))
-# define TclChar16Complete Tcl_UtfCharComplete
+ MODULE_SCOPE int TclUtfToUCS4(const char *, int *);
+ MODULE_SCOPE int TclUniCharToUCS4(const Tcl_UniChar *, int *);
+ MODULE_SCOPE const Tcl_UniChar *TclUCS4Prev(const Tcl_UniChar *, const Tcl_UniChar *);
#endif
MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(ClientData clientData);
MODULE_SCOPE Tcl_Obj * TclpFilesystemPathType(Tcl_Obj *pathPtr);
@@ -4700,11 +4696,6 @@ MODULE_SCOPE const TclFileAttrProcs tclpFileAttrProcs[];
(numChars) = _count; \
} while (0);
-#define TclUtfPrev(src, start) \
- (((src) < (start) + 2) ? (start) : \
- ((unsigned char) *((src) - 1)) < 0x80 ? (src) - 1 : \
- Tcl_UtfPrev(src, start))
-
/*
*----------------------------------------------------------------
* Macro that encapsulates the logic that determines when it is safe to
diff --git a/generic/tclParse.c b/generic/tclParse.c
index daad31d..d56a41d 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -789,7 +789,7 @@ TclParseBackslash(
* written. At most 4 bytes will be written there. */
{
const char *p = src+1;
- Tcl_UniChar unichar = 0;
+ int unichar;
int result;
int count;
char buf[4] = "";
@@ -936,13 +936,13 @@ TclParseBackslash(
*/
if (Tcl_UtfCharComplete(p, numBytes - 1)) {
- count = TclUtfToUniChar(p, &unichar) + 1; /* +1 for '\' */
+ count = TclUtfToUCS4(p, &unichar) + 1; /* +1 for '\' */
} else {
- char utfBytes[4];
+ char utfBytes[8];
memcpy(utfBytes, p, numBytes - 1);
utfBytes[numBytes - 1] = '\0';
- count = TclUtfToUniChar(utfBytes, &unichar) + 1;
+ count = TclUtfToUCS4(utfBytes, &unichar) + 1;
}
result = unichar;
break;
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 03aceaf..522a740 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -1171,10 +1171,10 @@ Tcl_AppendLimitedToObj(
}
eLen = strlen(ellipsis);
while (eLen > limit) {
- eLen = TclUtfPrev(ellipsis+eLen, ellipsis) - ellipsis;
+ eLen = Tcl_UtfPrev(ellipsis+eLen, ellipsis) - ellipsis;
}
- toCopy = TclUtfPrev(bytes+limit+1-eLen, bytes) - bytes;
+ toCopy = Tcl_UtfPrev(bytes+limit+1-eLen, bytes) - bytes;
}
/*
@@ -2616,7 +2616,7 @@ AppendPrintfToObjVA(
* multi-byte characters.
*/
- q = TclUtfPrev(end, bytes);
+ q = Tcl_UtfPrev(end, bytes);
if (!Tcl_UtfCharComplete(q, (int)(end - q))) {
end = q;
}
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index 36cb9b5..903ad10 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -88,6 +88,32 @@ static void uniCodePanic(void) {
# define Tcl_UniCharNcmp (int(*)(const Tcl_UniChar *, const Tcl_UniChar *, unsigned long))(void *)uniCodePanic
#endif
+#define TclUtfCharComplete UtfCharComplete
+#define TclUtfNext UtfNext
+#define TclUtfPrev UtfPrev
+
+static int TclUtfCharComplete(const char *src, int length) {
+ if ((unsigned)((unsigned char)*(src) - 0xF0) < 5) {
+ return length < 3;
+ }
+ return Tcl_UtfCharComplete(src, length);
+}
+
+static const char *TclUtfNext(const char *src) {
+ if ((unsigned)((unsigned char)*(src) - 0xF0) < 5) {
+ return src + 1;
+ }
+ return Tcl_UtfNext(src);
+}
+
+static const char *TclUtfPrev(const char *src, const char *start) {
+ if ((src >= start + 3) && ((src[-1] & 0xC0) == 0x80)
+ && ((src[-2] & 0xC0) == 0x80) && ((src[-3] & 0xC0) == 0x80)) {
+ return src - 3;
+ }
+ return Tcl_UtfPrev(src, start);
+}
+
#define TclBN_mp_add mp_add
#define TclBN_mp_and mp_and
#define TclBN_mp_clamp mp_clamp
@@ -1549,12 +1575,12 @@ const TclStubs tclStubs = {
Tcl_UniCharToUpper, /* 323 */
Tcl_UniCharToUtf, /* 324 */
Tcl_UtfAtIndex, /* 325 */
- Tcl_UtfCharComplete, /* 326 */
+ TclUtfCharComplete, /* 326 */
Tcl_UtfBackslash, /* 327 */
Tcl_UtfFindFirst, /* 328 */
Tcl_UtfFindLast, /* 329 */
- Tcl_UtfNext, /* 330 */
- Tcl_UtfPrev, /* 331 */
+ TclUtfNext, /* 330 */
+ TclUtfPrev, /* 331 */
Tcl_UtfToExternal, /* 332 */
Tcl_UtfToExternalDString, /* 333 */
Tcl_UtfToLower, /* 334 */
@@ -1872,6 +1898,10 @@ const TclStubs tclStubs = {
Tcl_UtfToUniChar, /* 646 */
Tcl_UniCharToUtfDString, /* 647 */
Tcl_UtfToUniCharDString, /* 648 */
+ Tcl_UtfCharComplete, /* 649 */
+ Tcl_UtfNext, /* 650 */
+ Tcl_UtfPrev, /* 651 */
+ Tcl_UniCharFold, /* 652 */
};
/* !END!: Do not edit above this line. */
diff --git a/generic/tclTest.c b/generic/tclTest.c
index a8ca463..0c14e8f 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -19,6 +19,9 @@
#ifndef USE_TCL_STUBS
# define USE_TCL_STUBS
#endif
+#ifndef TCL_NO_DEPRECATED
+# define TCL_NO_DEPRECATED
+#endif
#include "tclInt.h"
#ifdef TCL_WITH_EXTERNAL_TOMMATH
# include "tommath.h"
@@ -6960,7 +6963,7 @@ TestUtfPrevCmd(
} else {
offset = numBytes;
}
- result = TclUtfPrev(bytes + offset, bytes);
+ result = Tcl_UtfPrev(bytes + offset, bytes);
Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes));
return TCL_OK;
}
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 11bde5c..807e087 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -64,20 +64,12 @@ static const unsigned char totalBytes[256] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-/* Tcl_UtfCharComplete() might point to 2nd byte of valid 4-byte sequence */
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
-/* End of "continuation byte section" */
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
-#if TCL_UTF_MAX > 3
- 4,4,4,4,4,
-#else
- 1,1,1,1,1,
-#endif
- 1,1,1,1,1,1,1,1,1,1,1
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1
};
-
+
static const unsigned char complete[256] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@@ -88,15 +80,9 @@ static const unsigned char complete[256] = {
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
/* End of "continuation byte section" */
2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
-#if TCL_UTF_MAX > 3
- 4,4,4,4,4,
-#else
- 3,3,3,3,3,
-#endif
- 1,1,1,1,1,1,1,1,1,1,1
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1
};
-
+
/*
* Functions used only in this module.
*/
@@ -694,7 +680,7 @@ Tcl_UtfToUniCharDString(
p += TclUtfToUCS4(p, &ch);
*w++ = ch;
}
- while ((p < endPtr) && TclUCS4Complete(p, endPtr-p)) {
+ while ((p < endPtr) && Tcl_UtfCharComplete(p, endPtr-p)) {
p += TclUtfToUCS4(p, &ch);
*w++ = ch;
}
@@ -752,7 +738,7 @@ Tcl_UtfToChar16DString(
*w++ = ch;
}
while (p < endPtr) {
- if (TclChar16Complete(p, endPtr-p)) {
+ if (Tcl_UtfCharComplete(p, endPtr-p)) {
p += Tcl_UtfToChar16(p, &ch);
*w++ = ch;
} else {
@@ -833,7 +819,7 @@ Tcl_NumUtfChars(
/* Pointer to the end of string. Never read endPtr[0] */
const char *endPtr = src + length;
/* Pointer to last byte where optimization still can be used */
- const char *optPtr = endPtr - TCL_UTF_MAX;
+ const char *optPtr = endPtr - 4;
/*
* Optimize away the call in this loop. Justified because...
@@ -1064,7 +1050,7 @@ Tcl_UtfPrev(
* it (the fallback) is correct.
*/
- || (trailBytesSeen >= complete[byte])) {
+ || (trailBytesSeen >= totalBytes[byte])) {
/*
* That is, (1 + trailBytesSeen > needed).
* We've examined more bytes than needed to complete
@@ -1105,19 +1091,14 @@ Tcl_UtfPrev(
/* Continue the search backwards... */
look--;
- } while (trailBytesSeen < TCL_UTF_MAX);
+ } while (trailBytesSeen < 4);
/*
- * We've seen TCL_UTF_MAX trail bytes, so we know there will not be a
+ * We've seen 4 trail bytes, so we know there will not be a
* properly formed byte sequence to find, and we can stop looking,
- * accepting the fallback (for TCL_UTF_MAX > 3) or just go back as
- * far as we can.
+ * accepting the fallback.
*/
-#if TCL_UTF_MAX > 3
return fallback;
-#else
- return src - TCL_UTF_MAX;
-#endif
}
/*
@@ -1576,8 +1557,8 @@ Tcl_UtfNcasecmp(
return -ch2;
}
#endif
- ch1 = Tcl_UniCharToLower(ch1);
- ch2 = Tcl_UniCharToLower(ch2);
+ ch1 = Tcl_UniCharFold(ch1);
+ ch2 = Tcl_UniCharFold(ch2);
if (ch1 != ch2) {
return (ch1 - ch2);
}
@@ -1671,8 +1652,8 @@ TclUtfCasecmp(
return -ch2;
}
#endif
- ch1 = Tcl_UniCharToLower(ch1);
- ch2 = Tcl_UniCharToLower(ch2);
+ ch1 = Tcl_UniCharFold(ch1);
+ ch2 = Tcl_UniCharFold(ch2);
if (ch1 != ch2) {
return ch1 - ch2;
}
@@ -1744,6 +1725,38 @@ Tcl_UniCharToLower(
/* Clear away extension bits, if any */
return ch & 0x1FFFFF;
}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_UniCharFold --
+ *
+ * Compute the lowercase equivalent of the given Unicode character.
+ *
+ * Results:
+ * Returns the lowercase Unicode character.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharFold(
+ int ch) /* Unicode character to convert. */
+{
+ if (!UNICODE_OUT_OF_RANGE(ch)) {
+ int info = GetUniCharInfo(ch);
+ int mode = GetCaseType(info);
+
+ if ((mode & 0x02) && (mode != 0x7)) {
+ ch += GetDelta(info);
+ }
+ }
+ /* Clear away extension bits, if any */
+ return ch & 0x1FFFFF;
+}
/*
*----------------------------------------------------------------------
@@ -1885,8 +1898,8 @@ Tcl_UniCharNcasecmp(
{
for ( ; numChars != 0; numChars--, ucs++, uct++) {
if (*ucs != *uct) {
- Tcl_UniChar lcs = Tcl_UniCharToLower(*ucs);
- Tcl_UniChar lct = Tcl_UniCharToLower(*uct);
+ Tcl_UniChar lcs = Tcl_UniCharFold(*ucs);
+ Tcl_UniChar lct = Tcl_UniCharFold(*uct);
if (lcs != lct) {
return (lcs - lct);
@@ -2274,7 +2287,7 @@ Tcl_UniCharCaseMatch(
return 1;
}
if (nocase) {
- p = Tcl_UniCharToLower(p);
+ p = Tcl_UniCharFold(p);
}
while (1) {
/*
@@ -2326,13 +2339,13 @@ Tcl_UniCharCaseMatch(
Tcl_UniChar startChar, endChar;
uniPattern++;
- ch1 = (nocase ? Tcl_UniCharToLower(*uniStr) : *uniStr);
+ ch1 = (nocase ? Tcl_UniCharFold(*uniStr) : *uniStr);
uniStr++;
while (1) {
if ((*uniPattern == ']') || (*uniPattern == 0)) {
return 0;
}
- startChar = (nocase ? Tcl_UniCharToLower(*uniPattern)
+ startChar = (nocase ? Tcl_UniCharFold(*uniPattern)
: *uniPattern);
uniPattern++;
if (*uniPattern == '-') {
@@ -2340,7 +2353,7 @@ Tcl_UniCharCaseMatch(
if (*uniPattern == 0) {
return 0;
}
- endChar = (nocase ? Tcl_UniCharToLower(*uniPattern)
+ endChar = (nocase ? Tcl_UniCharFold(*uniPattern)
: *uniPattern);
uniPattern++;
if (((startChar <= ch1) && (ch1 <= endChar))
@@ -2382,8 +2395,8 @@ Tcl_UniCharCaseMatch(
*/
if (nocase) {
- if (Tcl_UniCharToLower(*uniStr) !=
- Tcl_UniCharToLower(*uniPattern)) {
+ if (Tcl_UniCharFold(*uniStr) !=
+ Tcl_UniCharFold(*uniPattern)) {
return 0;
}
} else if (*uniStr != *uniPattern) {
@@ -2466,7 +2479,7 @@ TclUniCharMatch(
}
p = *pattern;
if (nocase) {
- p = Tcl_UniCharToLower(p);
+ p = Tcl_UniCharFold(p);
}
while (1) {
/*
@@ -2478,7 +2491,7 @@ TclUniCharMatch(
if ((p != '[') && (p != '?') && (p != '\\')) {
if (nocase) {
while ((string < stringEnd) && (p != *string)
- && (p != Tcl_UniCharToLower(*string))) {
+ && (p != Tcl_UniCharFold(*string))) {
string++;
}
} else {
@@ -2519,20 +2532,20 @@ TclUniCharMatch(
Tcl_UniChar ch1, startChar, endChar;
pattern++;
- ch1 = (nocase ? Tcl_UniCharToLower(*string) : *string);
+ ch1 = (nocase ? Tcl_UniCharFold(*string) : *string);
string++;
while (1) {
if ((*pattern == ']') || (pattern == patternEnd)) {
return 0;
}
- startChar = (nocase ? Tcl_UniCharToLower(*pattern) : *pattern);
+ startChar = (nocase ? Tcl_UniCharFold(*pattern) : *pattern);
pattern++;
if (*pattern == '-') {
pattern++;
if (pattern == patternEnd) {
return 0;
}
- endChar = (nocase ? Tcl_UniCharToLower(*pattern)
+ endChar = (nocase ? Tcl_UniCharFold(*pattern)
: *pattern);
pattern++;
if (((startChar <= ch1) && (ch1 <= endChar))
@@ -2574,7 +2587,7 @@ TclUniCharMatch(
*/
if (nocase) {
- if (Tcl_UniCharToLower(*string) != Tcl_UniCharToLower(*pattern)) {
+ if (Tcl_UniCharFold(*string) != Tcl_UniCharFold(*pattern)) {
return 0;
}
} else if (*string != *pattern) {
@@ -2629,12 +2642,25 @@ TclUniCharToUCS4(
* by the Tcl_UniChar string. */
{
if (((src[0] & 0xFC00) == 0xD800) && ((src[1] & 0xFC00) == 0xDC00)) {
- *ucs4Ptr = (((src[0] & 0x3FF) << 10) | (src[01] & 0x3FF)) + 0x10000;
+ *ucs4Ptr = (((src[0] & 0x3FF) << 10) | (src[1] & 0x3FF)) + 0x10000;
return 2;
}
*ucs4Ptr = src[0];
return 1;
}
+
+const Tcl_UniChar *TclUCS4Prev(const Tcl_UniChar *src, const Tcl_UniChar *ptr) {
+ if (src <= ptr + 1) {
+ return ptr;
+ }
+ if (((src[-1] & 0xFC00) == 0xDC00) && ((src[-2] & 0xFC00) == 0xD800)) {
+ return src - 2;
+ }
+ return src - 1;
+}
+
+
+
#endif
/*
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 170a85e..0cf5d98 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -1707,7 +1707,7 @@ TclTrimRight(
const char *q = trim;
int pInc = 0, bytesLeft = numTrim;
- pp = TclUtfPrev(p, bytes);
+ pp = Tcl_UtfPrev(p, bytes);
do {
pp += pInc;
pInc = TclUtfToUCS4(pp, &ch1);
@@ -2208,7 +2208,7 @@ Tcl_StringCaseMatch(
} else {
TclUtfToUCS4(pattern, &ch2);
if (nocase) {
- ch2 = Tcl_UniCharToLower(ch2);
+ ch2 = Tcl_UniCharFold(ch2);
}
}
@@ -2223,7 +2223,7 @@ Tcl_StringCaseMatch(
if (nocase) {
while (*str) {
charLen = TclUtfToUCS4(str, &ch1);
- if (ch2==ch1 || ch2==Tcl_UniCharToLower(ch1)) {
+ if (ch2==ch1 || ch2==Tcl_UniCharFold(ch1)) {
break;
}
str += charLen;
@@ -2282,7 +2282,7 @@ Tcl_StringCaseMatch(
} else {
str += TclUtfToUCS4(str, &ch1);
if (nocase) {
- ch1 = Tcl_UniCharToLower(ch1);
+ ch1 = Tcl_UniCharFold(ch1);
}
}
while (1) {
@@ -2296,7 +2296,7 @@ Tcl_StringCaseMatch(
} else {
pattern += TclUtfToUCS4(pattern, &startChar);
if (nocase) {
- startChar = Tcl_UniCharToLower(startChar);
+ startChar = Tcl_UniCharFold(startChar);
}
}
if (*pattern == '-') {
@@ -2311,7 +2311,7 @@ Tcl_StringCaseMatch(
} else {
pattern += TclUtfToUCS4(pattern, &endChar);
if (nocase) {
- endChar = Tcl_UniCharToLower(endChar);
+ endChar = Tcl_UniCharFold(endChar);
}
}
if (((startChar <= ch1) && (ch1 <= endChar))
@@ -2360,7 +2360,7 @@ Tcl_StringCaseMatch(
str += TclUtfToUCS4(str, &ch1);
pattern += TclUtfToUCS4(pattern, &ch2);
if (nocase) {
- if (Tcl_UniCharToLower(ch1) != Tcl_UniCharToLower(ch2)) {
+ if (Tcl_UniCharFold(ch1) != Tcl_UniCharFold(ch2)) {
return 0;
}
} else if (ch1 != ch2) {