summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
Diffstat (limited to 'generic')
-rw-r--r--generic/tcl.decls10
-rw-r--r--generic/tclDecls.h35
-rw-r--r--generic/tclInt.h4
-rw-r--r--generic/tclStringObj.c8
-rw-r--r--generic/tclStubInit.c7
-rw-r--r--generic/tclUtf.c91
6 files changed, 122 insertions, 33 deletions
diff --git a/generic/tcl.decls b/generic/tcl.decls
index 5a03bd2..b943edd 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -1162,7 +1162,7 @@ declare 311 {
const Tcl_Time *timePtr)
}
declare 312 {
- size_t Tcl_NumUtfChars(const char *src, size_t length)
+ size_t TclNumUtfChars(const char *src, size_t length)
}
declare 313 {
size_t Tcl_ReadChars(Tcl_Channel channel, Tcl_Obj *objPtr,
@@ -1206,7 +1206,7 @@ declare 324 {
int Tcl_UniCharToUtf(int ch, char *buf)
}
declare 325 {
- const char *Tcl_UtfAtIndex(const char *src, size_t index)
+ const char *TclUtfAtIndex(const char *src, size_t index)
}
declare 326 {
int TclUtfCharComplete(const char *src, size_t length)
@@ -2516,6 +2516,12 @@ declare 660 {
declare 668 {
size_t Tcl_UniCharLen(const int *uniStr)
}
+declare 669 {
+ size_t Tcl_NumUtfChars(const char *src, size_t length)
+}
+declare 671 {
+ const char *Tcl_UtfAtIndex(const char *src, size_t index)
+}
# ----- BASELINE -- FOR -- 8.7.0 ----- #
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index cc33cf8..3e2b8cb 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -828,7 +828,7 @@ EXTERN void Tcl_ConditionNotify(Tcl_Condition *condPtr);
EXTERN void Tcl_ConditionWait(Tcl_Condition *condPtr,
Tcl_Mutex *mutexPtr, const Tcl_Time *timePtr);
/* 312 */
-EXTERN size_t Tcl_NumUtfChars(const char *src, size_t length);
+EXTERN size_t TclNumUtfChars(const char *src, size_t length);
/* 313 */
EXTERN size_t Tcl_ReadChars(Tcl_Channel channel, Tcl_Obj *objPtr,
size_t charsToRead, int appendFlag);
@@ -857,7 +857,7 @@ EXTERN int Tcl_UniCharToUpper(int ch);
/* 324 */
EXTERN int Tcl_UniCharToUtf(int ch, char *buf);
/* 325 */
-EXTERN const char * Tcl_UtfAtIndex(const char *src, size_t index);
+EXTERN const char * TclUtfAtIndex(const char *src, size_t index);
/* 326 */
EXTERN int TclUtfCharComplete(const char *src, size_t length);
/* 327 */
@@ -1774,6 +1774,11 @@ EXTERN int Tcl_AsyncMarkFromSignal(Tcl_AsyncHandler async,
/* Slot 667 is reserved */
/* 668 */
EXTERN size_t Tcl_UniCharLen(const int *uniStr);
+/* 669 */
+EXTERN size_t Tcl_NumUtfChars(const char *src, size_t length);
+/* Slot 670 is reserved */
+/* 671 */
+EXTERN const char * Tcl_UtfAtIndex(const char *src, size_t index);
typedef struct {
const struct TclPlatStubs *tclPlatStubs;
@@ -2097,7 +2102,7 @@ typedef struct TclStubs {
void (*tcl_MutexUnlock) (Tcl_Mutex *mutexPtr); /* 309 */
void (*tcl_ConditionNotify) (Tcl_Condition *condPtr); /* 310 */
void (*tcl_ConditionWait) (Tcl_Condition *condPtr, Tcl_Mutex *mutexPtr, const Tcl_Time *timePtr); /* 311 */
- size_t (*tcl_NumUtfChars) (const char *src, size_t length); /* 312 */
+ size_t (*tclNumUtfChars) (const char *src, size_t length); /* 312 */
size_t (*tcl_ReadChars) (Tcl_Channel channel, Tcl_Obj *objPtr, size_t charsToRead, int appendFlag); /* 313 */
void (*reserved314)(void);
void (*reserved315)(void);
@@ -2110,7 +2115,7 @@ typedef struct TclStubs {
int (*tcl_UniCharToTitle) (int ch); /* 322 */
int (*tcl_UniCharToUpper) (int ch); /* 323 */
int (*tcl_UniCharToUtf) (int ch, char *buf); /* 324 */
- const char * (*tcl_UtfAtIndex) (const char *src, size_t index); /* 325 */
+ const char * (*tclUtfAtIndex) (const char *src, size_t index); /* 325 */
int (*tclUtfCharComplete) (const char *src, size_t length); /* 326 */
size_t (*tcl_UtfBackslash) (const char *src, int *readPtr, char *dst); /* 327 */
const char * (*tcl_UtfFindFirst) (const char *src, int ch); /* 328 */
@@ -2454,6 +2459,9 @@ typedef struct TclStubs {
void (*reserved666)(void);
void (*reserved667)(void);
size_t (*tcl_UniCharLen) (const int *uniStr); /* 668 */
+ size_t (*tcl_NumUtfChars) (const char *src, size_t length); /* 669 */
+ void (*reserved670)(void);
+ const char * (*tcl_UtfAtIndex) (const char *src, size_t index); /* 671 */
} TclStubs;
extern const TclStubs *tclStubsPtr;
@@ -3046,8 +3054,8 @@ extern const TclStubs *tclStubsPtr;
(tclStubsPtr->tcl_ConditionNotify) /* 310 */
#define Tcl_ConditionWait \
(tclStubsPtr->tcl_ConditionWait) /* 311 */
-#define Tcl_NumUtfChars \
- (tclStubsPtr->tcl_NumUtfChars) /* 312 */
+#define TclNumUtfChars \
+ (tclStubsPtr->tclNumUtfChars) /* 312 */
#define Tcl_ReadChars \
(tclStubsPtr->tcl_ReadChars) /* 313 */
/* Slot 314 is reserved */
@@ -3070,8 +3078,8 @@ extern const TclStubs *tclStubsPtr;
(tclStubsPtr->tcl_UniCharToUpper) /* 323 */
#define Tcl_UniCharToUtf \
(tclStubsPtr->tcl_UniCharToUtf) /* 324 */
-#define Tcl_UtfAtIndex \
- (tclStubsPtr->tcl_UtfAtIndex) /* 325 */
+#define TclUtfAtIndex \
+ (tclStubsPtr->tclUtfAtIndex) /* 325 */
#define TclUtfCharComplete \
(tclStubsPtr->tclUtfCharComplete) /* 326 */
#define Tcl_UtfBackslash \
@@ -3736,6 +3744,11 @@ extern const TclStubs *tclStubsPtr;
/* Slot 667 is reserved */
#define Tcl_UniCharLen \
(tclStubsPtr->tcl_UniCharLen) /* 668 */
+#define Tcl_NumUtfChars \
+ (tclStubsPtr->tcl_NumUtfChars) /* 669 */
+/* Slot 670 is reserved */
+#define Tcl_UtfAtIndex \
+ (tclStubsPtr->tcl_UtfAtIndex) /* 671 */
#endif /* defined(USE_TCL_STUBS) */
@@ -3937,6 +3950,12 @@ extern const TclStubs *tclStubsPtr;
# define Tcl_UtfToUniChar Tcl_UtfToChar16
# undef Tcl_UniCharLen
# define Tcl_UniCharLen Tcl_Char16Len
+#if !defined(BUILD_tcl)
+# undef Tcl_NumUtfChars
+# define Tcl_NumUtfChars TclNumUtfChars
+# undef Tcl_UtfAtIndex
+# define Tcl_UtfAtIndex TclUtfAtIndex
+#endif
#endif
#if defined(USE_TCL_STUBS)
# define Tcl_WCharToUtfDString (sizeof(wchar_t) != sizeof(short) \
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 1eb486e..edd0172 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -4666,12 +4666,12 @@ MODULE_SCOPE const TclFileAttrProcs tclpFileAttrProcs[];
* of counting along a string of all one-byte characters. The ANSI C
* "prototype" for this macro is:
*
- * MODULE_SCOPE void TclNumUtfChars(int numChars, const char *bytes,
+ * MODULE_SCOPE void TclNumUtfCharsM(int numChars, const char *bytes,
* size_t numBytes);
*----------------------------------------------------------------
*/
-#define TclNumUtfChars(numChars, bytes, numBytes) \
+#define TclNumUtfCharsM(numChars, bytes, numBytes) \
do { \
size_t _count, _i = (numBytes); \
unsigned char *_str = (unsigned char *) (bytes); \
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index c8d9df7..2755cf6 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -440,7 +440,7 @@ Tcl_GetCharLength(
*/
if (numChars == TCL_INDEX_NONE) {
- TclNumUtfChars(numChars, objPtr->bytes, objPtr->length);
+ TclNumUtfCharsM(numChars, objPtr->bytes, objPtr->length);
stringPtr->numChars = numChars;
}
return numChars;
@@ -543,7 +543,7 @@ Tcl_GetUniChar(
*/
if (stringPtr->numChars == TCL_INDEX_NONE) {
- TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length);
+ TclNumUtfCharsM(stringPtr->numChars, objPtr->bytes, objPtr->length);
}
if (stringPtr->numChars == objPtr->length) {
return (unsigned char) objPtr->bytes[index];
@@ -709,7 +709,7 @@ Tcl_GetRange(
*/
if (stringPtr->numChars == TCL_INDEX_NONE) {
- TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length);
+ TclNumUtfCharsM(stringPtr->numChars, objPtr->bytes, objPtr->length);
}
if (stringPtr->numChars == objPtr->length) {
if (last >= stringPtr->numChars) {
@@ -4045,7 +4045,7 @@ ExtendUnicodeRepWithString(
numOrigChars = stringPtr->numChars;
}
if (numAppendChars == TCL_INDEX_NONE) {
- TclNumUtfChars(numAppendChars, bytes, numBytes);
+ TclNumUtfCharsM(numAppendChars, bytes, numBytes);
}
needed = numOrigChars + numAppendChars;
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index ea7083f..59036ec 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -1005,7 +1005,7 @@ const TclStubs tclStubs = {
Tcl_MutexUnlock, /* 309 */
Tcl_ConditionNotify, /* 310 */
Tcl_ConditionWait, /* 311 */
- Tcl_NumUtfChars, /* 312 */
+ TclNumUtfChars, /* 312 */
Tcl_ReadChars, /* 313 */
0, /* 314 */
0, /* 315 */
@@ -1018,7 +1018,7 @@ const TclStubs tclStubs = {
Tcl_UniCharToTitle, /* 322 */
Tcl_UniCharToUpper, /* 323 */
Tcl_UniCharToUtf, /* 324 */
- Tcl_UtfAtIndex, /* 325 */
+ TclUtfAtIndex, /* 325 */
TclUtfCharComplete, /* 326 */
Tcl_UtfBackslash, /* 327 */
Tcl_UtfFindFirst, /* 328 */
@@ -1362,6 +1362,9 @@ const TclStubs tclStubs = {
0, /* 666 */
0, /* 667 */
Tcl_UniCharLen, /* 668 */
+ Tcl_NumUtfChars, /* 669 */
+ 0, /* 670 */
+ Tcl_UtfAtIndex, /* 671 */
};
/* !END!: Do not edit above this line. */
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index e353b7f..09e464f 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -799,6 +799,7 @@ Tcl_UtfCharComplete(
*---------------------------------------------------------------------------
*/
+#undef Tcl_NumUtfChars
size_t
Tcl_NumUtfChars(
const char *src, /* The UTF-8 string to measure. */
@@ -851,6 +852,58 @@ Tcl_NumUtfChars(
return i;
}
+size_t
+TclNumUtfChars(
+ const char *src, /* The UTF-8 string to measure. */
+ size_t length) /* The length of the string in bytes, or
+ * TCL_INDEX_NONE for strlen(src). */
+{
+ unsigned short ch = 0;
+ size_t i = 0;
+
+ if (length == TCL_INDEX_NONE) {
+ /* string is NUL-terminated, so TclUtfToUniChar calls are safe. */
+ while (*src != '\0') {
+ src += Tcl_UtfToChar16(src, &ch);
+ i++;
+ }
+ } else {
+ /* Will return value between 0 and length. No overflow checks. */
+
+ /* Pointer to the end of string. Never read endPtr[0] */
+ const char *endPtr = src + length;
+ /* Pointer to last byte where optimization still can be used */
+ const char *optPtr = endPtr - 4;
+
+ /*
+ * Optimize away the call in this loop. Justified because...
+ * when (src <= optPtr), (endPtr - src) >= (endPtr - optPtr)
+ * By initialization above (endPtr - optPtr) = TCL_UTF_MAX
+ * So (endPtr - src) >= TCL_UTF_MAX, and passing that to
+ * Tcl_UtfCharComplete we know will cause return of 1.
+ */
+ while (src <= optPtr
+ /* && Tcl_UtfCharComplete(src, endPtr - src) */ ) {
+ src += Tcl_UtfToChar16(src, &ch);
+ i++;
+ }
+ /* Loop over the remaining string where call must happen */
+ while (src < endPtr) {
+ if (Tcl_UtfCharComplete(src, endPtr - src)) {
+ src += Tcl_UtfToChar16(src, &ch);
+ } else {
+ /*
+ * src points to incomplete UTF-8 sequence
+ * Treat first byte as character and count it
+ */
+ src++;
+ }
+ i++;
+ }
+ }
+ return i;
+}
+
/*
*---------------------------------------------------------------------------
*
@@ -1167,34 +1220,42 @@ Tcl_UniCharAtIndex(
*---------------------------------------------------------------------------
*/
+#undef Tcl_UtfAtIndex
const char *
Tcl_UtfAtIndex(
const char *src, /* The UTF-8 string. */
size_t index) /* The position of the desired character. */
{
- Tcl_UniChar ch = 0;
-#if TCL_UTF_MAX < 4
- size_t len = 0;
-#endif
+ int ch = 0;
if (index != TCL_INDEX_NONE) {
while (index--) {
-#if TCL_UTF_MAX < 4
- src += (len = TclUtfToUniChar(src, &ch));
-#else
- src += TclUtfToUniChar(src, &ch);
-#endif
+ src += Tcl_UtfToUniChar(src, &ch);
}
-#if TCL_UTF_MAX < 4
- if ((ch >= 0xD800) && (len < 3)) {
- /* Index points at character following high Surrogate */
- src += TclUtfToUniChar(src, &ch);
- }
-#endif
}
return src;
}
+const char *
+TclUtfAtIndex(
+ const char *src, /* The UTF-8 string. */
+ size_t index) /* The position of the desired character. */
+{
+ unsigned short ch = 0;
+ size_t len = 0;
+
+ if (index != TCL_INDEX_NONE) {
+ while (index--) {
+ src += (len = Tcl_UtfToChar16(src, &ch));
+ }
+ if ((ch >= 0xD800) && (len < 3)) {
+ /* Index points at character following high Surrogate */
+ src += Tcl_UtfToChar16(src, &ch);
+ }
+ }
+ return src;
+}
+
/*
*---------------------------------------------------------------------------
*