summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/tcl.decls9
-rw-r--r--generic/tclDecls.h15
-rw-r--r--generic/tclInt.h12
-rw-r--r--generic/tclStringObj.c8
-rw-r--r--generic/tclStubInit.c3
-rw-r--r--generic/tclUtf.c57
-rw-r--r--tests/string.test2
7 files changed, 96 insertions, 10 deletions
diff --git a/generic/tcl.decls b/generic/tcl.decls
index f5b2e78..fa844e0 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -2446,6 +2446,15 @@ declare 660 {
declare 668 {
int Tcl_UniCharLen(const int *uniStr)
}
+declare 669 {
+ int TclNumUtfChars(const char *src, int length)
+}
+declare 670 {
+ int TclGetCharLength(Tcl_Obj *objPtr)
+}
+declare 671 {
+ const char *TclUtfAtIndex(const char *src, int index)
+}
# ----- BASELINE -- FOR -- 8.7.0 ----- #
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index 1952641..9f5e798 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -1957,6 +1957,12 @@ EXTERN int Tcl_AsyncMarkFromSignal(Tcl_AsyncHandler async,
/* Slot 667 is reserved */
/* 668 */
EXTERN int Tcl_UniCharLen(const int *uniStr);
+/* 669 */
+EXTERN int TclNumUtfChars(const char *src, int length);
+/* 670 */
+EXTERN int TclGetCharLength(Tcl_Obj *objPtr);
+/* 671 */
+EXTERN const char * TclUtfAtIndex(const char *src, int index);
typedef struct {
const struct TclPlatStubs *tclPlatStubs;
@@ -2661,6 +2667,9 @@ typedef struct TclStubs {
void (*reserved666)(void);
void (*reserved667)(void);
int (*tcl_UniCharLen) (const int *uniStr); /* 668 */
+ int (*tclNumUtfChars) (const char *src, int length); /* 669 */
+ int (*tclGetCharLength) (Tcl_Obj *objPtr); /* 670 */
+ const char * (*tclUtfAtIndex) (const char *src, int index); /* 671 */
} TclStubs;
extern const TclStubs *tclStubsPtr;
@@ -4020,6 +4029,12 @@ extern const TclStubs *tclStubsPtr;
/* Slot 667 is reserved */
#define Tcl_UniCharLen \
(tclStubsPtr->tcl_UniCharLen) /* 668 */
+#define TclNumUtfChars \
+ (tclStubsPtr->tclNumUtfChars) /* 669 */
+#define TclGetCharLength \
+ (tclStubsPtr->tclGetCharLength) /* 670 */
+#define TclUtfAtIndex \
+ (tclStubsPtr->tclUtfAtIndex) /* 671 */
#endif /* defined(USE_TCL_STUBS) */
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 538bca3..73d6386 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3322,12 +3322,12 @@ MODULE_SCOPE int TclZipfs_Init(Tcl_Interp *interp);
#if TCL_UTF_MAX > 3
MODULE_SCOPE int *TclGetUnicodeFromObj_(Tcl_Obj *, int *);
MODULE_SCOPE Tcl_Obj *TclNewUnicodeObj(const int *, int);
- MODULE_SCOPE int TclGetCharLength(Tcl_Obj *);
MODULE_SCOPE void TclAppendUnicodeToObj(Tcl_Obj *, const int *, int);
MODULE_SCOPE int TclUniCharNcasecmp(const int *, const int *, unsigned long);
MODULE_SCOPE int TclUniCharCaseMatch(const int *, const int *, int);
MODULE_SCOPE int TclUniCharNcmp(const int *, const int *, unsigned long);
- MODULE_SCOPE const char *TclUtfAtIndex(const char *, int);
+# undef Tcl_NumUtfChars
+# define Tcl_NumUtfChars TclNumUtfChars
# undef Tcl_GetCharLength
# define Tcl_GetCharLength TclGetCharLength
# undef Tcl_UtfAtIndex
@@ -3335,11 +3335,15 @@ MODULE_SCOPE int TclZipfs_Init(Tcl_Interp *interp);
#else
# define TclGetUnicodeFromObj_ Tcl_GetUnicodeFromObj
# define TclNewUnicodeObj Tcl_NewUnicodeObj
-# define TclGetCharLength Tcl_GetCharLength
# define TclAppendUnicodeToObj Tcl_AppendUnicodeToObj
# define TclUniCharNcasecmp Tcl_UniCharNcasecmp
# define TclUniCharCaseMatch Tcl_UniCharCaseMatch
# define TclUniCharNcmp Tcl_UniCharNcmp
+# undef TclNumUtfChars
+# define TclNumUtfChars Tcl_NumUtfChars
+# undef TclGetCharLength
+# define TclGetCharLength Tcl_GetCharLength
+# undef TclUtfAtIndex
# define TclUtfAtIndex Tcl_UtfAtIndex
#endif
@@ -4764,7 +4768,7 @@ MODULE_SCOPE const TclFileAttrProcs tclpFileAttrProcs[];
*----------------------------------------------------------------
*/
-#define TclNumUtfChars(numChars, bytes, numBytes) \
+#define TclNumUtfChars_UNUSED(numChars, bytes, numBytes) \
do { \
int _count, _i = (numBytes); \
unsigned char *_str = (unsigned char *) (bytes); \
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 6417e1b..2b11877 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -637,7 +637,7 @@ TclGetCharLength(
*/
if (numChars == -1) {
- TclNumUtfChars(numChars, objPtr->bytes, objPtr->length);
+ numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length);
stringPtr->numChars = numChars;
}
return numChars;
@@ -782,7 +782,7 @@ Tcl_GetUniChar(
*/
if (stringPtr->numChars == -1) {
- TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length);
+ stringPtr->numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length);
}
if (stringPtr->numChars == objPtr->length) {
return (unsigned char) objPtr->bytes[index];
@@ -991,7 +991,7 @@ Tcl_GetRange(
*/
if (stringPtr->numChars == -1) {
- TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length);
+ stringPtr->numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length);
}
if (stringPtr->numChars == objPtr->length) {
if (last < 0 || last >= stringPtr->numChars) {
@@ -4447,7 +4447,7 @@ ExtendUnicodeRepWithString(
numOrigChars = stringPtr->numChars;
}
if (numAppendChars == -1) {
- TclNumUtfChars(numAppendChars, bytes, numBytes);
+ numAppendChars = Tcl_NumUtfChars(bytes, numBytes);
}
needed = numOrigChars + numAppendChars;
uniCharStringCheckLimits(needed);
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index e3ebb8b..09163c3 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -1937,6 +1937,9 @@ const TclStubs tclStubs = {
0, /* 666 */
0, /* 667 */
Tcl_UniCharLen, /* 668 */
+ TclNumUtfChars, /* 669 */
+ TclGetCharLength, /* 670 */
+ TclUtfAtIndex, /* 671 */
};
/* !END!: Do not edit above this line. */
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 4dd1e09..eda317f 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -799,7 +799,7 @@ Tcl_UtfCharComplete(
*/
int
-Tcl_NumUtfChars(
+TclNumUtfChars(
const char *src, /* The UTF-8 string to measure. */
int length) /* The length of the string in bytes, or -1
* for strlen(string). */
@@ -850,6 +850,61 @@ Tcl_NumUtfChars(
return i;
}
+#if TCL_UTF_MAX > 3
+#undef Tcl_NumUtfChars
+int
+Tcl_NumUtfChars(
+ const char *src, /* The UTF-8 string to measure. */
+ int length) /* The length of the string in bytes, or -1
+ * for strlen(string). */
+{
+ unsigned short ch = 0;
+ int i = 0;
+
+ if (length < 0) {
+ /* string is NUL-terminated, so TclUtfToUniChar calls are safe. */
+ while ((*src != '\0') && (i < INT_MAX)) {
+ src += Tcl_UtfToChar16(src, &ch);
+ i++;
+ }
+ } else {
+ /* Will return value between 0 and length. No overflow checks. */
+
+ /* Pointer to the end of string. Never read endPtr[0] */
+ const char *endPtr = src + length;
+ /* Pointer to last byte where optimization still can be used */
+ const char *optPtr = endPtr - 4;
+
+ /*
+ * Optimize away the call in this loop. Justified because...
+ * when (src <= optPtr), (endPtr - src) >= (endPtr - optPtr)
+ * By initialization above (endPtr - optPtr) = TCL_UTF_MAX
+ * So (endPtr - src) >= TCL_UTF_MAX, and passing that to
+ * Tcl_UtfCharComplete we know will cause return of 1.
+ */
+ while (src <= optPtr
+ /* && Tcl_UtfCharComplete(src, endPtr - src) */ ) {
+ src += Tcl_UtfToChar16(src, &ch);
+ i++;
+ }
+ /* Loop over the remaining string where call must happen */
+ while (src < endPtr) {
+ if (Tcl_UtfCharComplete(src, endPtr - src)) {
+ src += Tcl_UtfToChar16(src, &ch);
+ } else {
+ /*
+ * src points to incomplete UTF-8 sequence
+ * Treat first byte as character and count it
+ */
+ src++;
+ }
+ i++;
+ }
+ }
+ return i;
+}
+#endif
+
/*
*---------------------------------------------------------------------------
*
diff --git a/tests/string.test b/tests/string.test
index 203d0c6..6863c23 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -422,7 +422,7 @@ test string-4.16.$noComp {string first, normal string vs pure unicode string} -b
# Representation checks are canaries
run {list [representationpoke $s] [representationpoke $m] \
[string first $m $s]}
-} -result {{string 1} {string 0} 2}
+} -match glob -result {{*string 1} {*string 0} 2}
test string-4.17.$noComp {string first, corner case} -body {
run {string first a aaa 4294967295}
} -result {-1}