summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2022-04-01 13:13:06 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2022-04-01 13:13:06 (GMT)
commit67c5964d062b5119e03cf1c3e2fdd9a2da5f2540 (patch)
treef1b908e807872e62c59d2d2079308eb5f1ecfa5e /generic
parent4c82bda7741adb7962582755a619a9768f0ee8fa (diff)
parentd0fec7532c33f0b3da8057e2e0fda10524f22905 (diff)
downloadtcl-67c5964d062b5119e03cf1c3e2fdd9a2da5f2540.zip
tcl-67c5964d062b5119e03cf1c3e2fdd9a2da5f2540.tar.gz
tcl-67c5964d062b5119e03cf1c3e2fdd9a2da5f2540.tar.bz2
Merge 9.0
Diffstat (limited to 'generic')
-rw-r--r--generic/tcl.decls15
-rw-r--r--generic/tclDecls.h49
-rw-r--r--generic/tclStringObj.c121
-rw-r--r--generic/tclStubInit.c10
-rw-r--r--generic/tclUtf.c3
5 files changed, 172 insertions, 26 deletions
diff --git a/generic/tcl.decls b/generic/tcl.decls
index 6b9ce8e..0784bee 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -1396,17 +1396,17 @@ declare 379 {
size_t numChars)
}
declare 380 {
- size_t Tcl_GetCharLength(Tcl_Obj *objPtr)
+ size_t TclGetCharLength(Tcl_Obj *objPtr)
}
declare 381 {
- int Tcl_GetUniChar(Tcl_Obj *objPtr, size_t index)
+ int TclGetUniChar(Tcl_Obj *objPtr, size_t index)
}
# Removed in 9.0, replaced by macro.
#declare 382 {
# Tcl_UniChar *Tcl_GetUnicode(Tcl_Obj *objPtr)
#}
declare 383 {
- Tcl_Obj *Tcl_GetRange(Tcl_Obj *objPtr, size_t first, size_t last)
+ Tcl_Obj *TclGetRange(Tcl_Obj *objPtr, size_t first, size_t last)
}
# Removed in 9.0
#declare 384 {
@@ -2546,9 +2546,18 @@ declare 668 {
declare 669 {
size_t Tcl_NumUtfChars(const char *src, size_t length)
}
+declare 670 {
+ size_t Tcl_GetCharLength(Tcl_Obj *objPtr)
+}
declare 671 {
const char *Tcl_UtfAtIndex(const char *src, size_t index)
}
+declare 672 {
+ Tcl_Obj *Tcl_GetRange(Tcl_Obj *objPtr, size_t first, size_t last)
+}
+declare 673 {
+ int Tcl_GetUniChar(Tcl_Obj *objPtr, size_t index)
+}
# ----- BASELINE -- FOR -- 8.7.0 ----- #
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index 429ef0e..f4d13c8 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -997,12 +997,12 @@ EXTERN Tcl_Obj * Tcl_NewUnicodeObj(const Tcl_UniChar *unicode,
EXTERN void Tcl_SetUnicodeObj(Tcl_Obj *objPtr,
const Tcl_UniChar *unicode, size_t numChars);
/* 380 */
-EXTERN size_t Tcl_GetCharLength(Tcl_Obj *objPtr);
+EXTERN size_t TclGetCharLength(Tcl_Obj *objPtr);
/* 381 */
-EXTERN int Tcl_GetUniChar(Tcl_Obj *objPtr, size_t index);
+EXTERN int TclGetUniChar(Tcl_Obj *objPtr, size_t index);
/* Slot 382 is reserved */
/* 383 */
-EXTERN Tcl_Obj * Tcl_GetRange(Tcl_Obj *objPtr, size_t first,
+EXTERN Tcl_Obj * TclGetRange(Tcl_Obj *objPtr, size_t first,
size_t last);
/* Slot 384 is reserved */
/* 385 */
@@ -1795,9 +1795,15 @@ EXTERN int Tcl_ParseArgsObjv(Tcl_Interp *interp,
EXTERN size_t Tcl_UniCharLen(const int *uniStr);
/* 669 */
EXTERN size_t Tcl_NumUtfChars(const char *src, size_t length);
-/* Slot 670 is reserved */
+/* 670 */
+EXTERN size_t Tcl_GetCharLength(Tcl_Obj *objPtr);
/* 671 */
EXTERN const char * Tcl_UtfAtIndex(const char *src, size_t index);
+/* 672 */
+EXTERN Tcl_Obj * Tcl_GetRange(Tcl_Obj *objPtr, size_t first,
+ size_t last);
+/* 673 */
+EXTERN int Tcl_GetUniChar(Tcl_Obj *objPtr, size_t index);
typedef struct {
const struct TclPlatStubs *tclPlatStubs;
@@ -2189,10 +2195,10 @@ typedef struct TclStubs {
void (*tcl_RegExpGetInfo) (Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr); /* 377 */
Tcl_Obj * (*tcl_NewUnicodeObj) (const Tcl_UniChar *unicode, size_t numChars); /* 378 */
void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, size_t numChars); /* 379 */
- size_t (*tcl_GetCharLength) (Tcl_Obj *objPtr); /* 380 */
- int (*tcl_GetUniChar) (Tcl_Obj *objPtr, size_t index); /* 381 */
+ size_t (*tclGetCharLength) (Tcl_Obj *objPtr); /* 380 */
+ int (*tclGetUniChar) (Tcl_Obj *objPtr, size_t index); /* 381 */
void (*reserved382)(void);
- Tcl_Obj * (*tcl_GetRange) (Tcl_Obj *objPtr, size_t first, size_t last); /* 383 */
+ Tcl_Obj * (*tclGetRange) (Tcl_Obj *objPtr, size_t first, size_t last); /* 383 */
void (*reserved384)(void);
int (*tcl_RegExpMatchObj) (Tcl_Interp *interp, Tcl_Obj *textObj, Tcl_Obj *patternObj); /* 385 */
void (*tcl_SetNotifier) (const Tcl_NotifierProcs *notifierProcPtr); /* 386 */
@@ -2479,8 +2485,10 @@ typedef struct TclStubs {
int (*tcl_ParseArgsObjv) (Tcl_Interp *interp, const Tcl_ArgvInfo *argTable, size_t *objcPtr, Tcl_Obj *const *objv, Tcl_Obj ***remObjv); /* 667 */
size_t (*tcl_UniCharLen) (const int *uniStr); /* 668 */
size_t (*tcl_NumUtfChars) (const char *src, size_t length); /* 669 */
- void (*reserved670)(void);
+ size_t (*tcl_GetCharLength) (Tcl_Obj *objPtr); /* 670 */
const char * (*tcl_UtfAtIndex) (const char *src, size_t index); /* 671 */
+ Tcl_Obj * (*tcl_GetRange) (Tcl_Obj *objPtr, size_t first, size_t last); /* 672 */
+ int (*tcl_GetUniChar) (Tcl_Obj *objPtr, size_t index); /* 673 */
} TclStubs;
extern const TclStubs *tclStubsPtr;
@@ -3203,13 +3211,13 @@ extern const TclStubs *tclStubsPtr;
(tclStubsPtr->tcl_NewUnicodeObj) /* 378 */
#define Tcl_SetUnicodeObj \
(tclStubsPtr->tcl_SetUnicodeObj) /* 379 */
-#define Tcl_GetCharLength \
- (tclStubsPtr->tcl_GetCharLength) /* 380 */
-#define Tcl_GetUniChar \
- (tclStubsPtr->tcl_GetUniChar) /* 381 */
+#define TclGetCharLength \
+ (tclStubsPtr->tclGetCharLength) /* 380 */
+#define TclGetUniChar \
+ (tclStubsPtr->tclGetUniChar) /* 381 */
/* Slot 382 is reserved */
-#define Tcl_GetRange \
- (tclStubsPtr->tcl_GetRange) /* 383 */
+#define TclGetRange \
+ (tclStubsPtr->tclGetRange) /* 383 */
/* Slot 384 is reserved */
#define Tcl_RegExpMatchObj \
(tclStubsPtr->tcl_RegExpMatchObj) /* 385 */
@@ -3772,9 +3780,14 @@ extern const TclStubs *tclStubsPtr;
(tclStubsPtr->tcl_UniCharLen) /* 668 */
#define Tcl_NumUtfChars \
(tclStubsPtr->tcl_NumUtfChars) /* 669 */
-/* Slot 670 is reserved */
+#define Tcl_GetCharLength \
+ (tclStubsPtr->tcl_GetCharLength) /* 670 */
#define Tcl_UtfAtIndex \
(tclStubsPtr->tcl_UtfAtIndex) /* 671 */
+#define Tcl_GetRange \
+ (tclStubsPtr->tcl_GetRange) /* 672 */
+#define Tcl_GetUniChar \
+ (tclStubsPtr->tcl_GetUniChar) /* 673 */
#endif /* defined(USE_TCL_STUBS) */
@@ -3979,8 +3992,14 @@ extern const TclStubs *tclStubsPtr;
#if !defined(BUILD_tcl)
# undef Tcl_NumUtfChars
# define Tcl_NumUtfChars TclNumUtfChars
+# undef Tcl_GetCharLength
+# define Tcl_GetCharLength TclGetCharLength
# undef Tcl_UtfAtIndex
# define Tcl_UtfAtIndex TclUtfAtIndex
+# undef Tcl_GetRange
+# define Tcl_GetRange TclGetRange
+# undef Tcl_GetUniChar
+# define Tcl_GetUniChar TclGetUniChar
#endif
#endif
#if defined(USE_TCL_STUBS)
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index a65d560..a1f3ada 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -446,6 +446,44 @@ Tcl_GetCharLength(
return numChars;
}
+size_t
+TclGetCharLength(
+ Tcl_Obj *objPtr) /* The String object to get the num chars
+ * of. */
+{
+ size_t numChars = 0;
+
+ /*
+ * Quick, no-shimmer return for short string reps.
+ */
+
+ if ((objPtr->bytes) && (objPtr->length < 2)) {
+ /* 0 bytes -> 0 chars; 1 byte -> 1 char */
+ return objPtr->length;
+ }
+
+ /*
+ * Optimize the case where we're really dealing with a bytearray object;
+ * we don't need to convert to a string to perform the get-length operation.
+ *
+ * Starting in Tcl 8.7, we check for a "pure" bytearray, because the
+ * machinery behind that test is using a proper bytearray ObjType. We
+ * could also compute length of an improper bytearray without shimmering
+ * but there's no value in that. We *want* to shimmer an improper bytearray
+ * because improper bytearrays have worthless internal reps.
+ */
+
+ if (TclIsPureByteArray(objPtr)) {
+ (void) Tcl_GetByteArrayFromObj(objPtr, &numChars);
+ } else {
+ Tcl_GetString(objPtr);
+ numChars = TclNumUtfChars(objPtr->bytes, objPtr->length);
+ }
+
+ return numChars;
+}
+
+
/*
*----------------------------------------------------------------------
*
@@ -574,6 +612,40 @@ Tcl_GetUniChar(
#endif
return ch;
}
+
+int
+TclGetUniChar(
+ Tcl_Obj *objPtr, /* The object to get the Unicode charater
+ * from. */
+ size_t index) /* Get the index'th Unicode character. */
+{
+ int ch = 0;
+
+ /*
+ * Optimize the case where we're really dealing with a bytearray object
+ * we don't need to convert to a string to perform the indexing operation.
+ */
+
+ if (TclIsPureByteArray(objPtr)) {
+ size_t length = 0;
+ unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length);
+ if (index >= length) {
+ return -1;
+ }
+
+ return bytes[index];
+ }
+
+ size_t numChars = TclNumUtfChars(objPtr->bytes, objPtr->length);
+
+ if (index >= numChars) {
+ return -1;
+ }
+ const char *begin = TclUtfAtIndex(objPtr->bytes, index);
+#undef Tcl_UtfToUniChar
+ Tcl_UtfToUniChar(begin, &ch);
+ return ch;
+}
/*
*----------------------------------------------------------------------
@@ -751,6 +823,51 @@ Tcl_GetRange(
#endif
return Tcl_NewUnicodeObj(stringPtr->unicode + first, last - first + 1);
}
+
+Tcl_Obj *
+TclGetRange(
+ Tcl_Obj *objPtr, /* The Tcl object to find the range of. */
+ size_t first, /* First index of the range. */
+ size_t last) /* Last index of the range. */
+{
+ Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */
+ size_t length = 0;
+
+ if (first == TCL_INDEX_NONE) {
+ first = TCL_INDEX_START;
+ }
+
+ /*
+ * Optimize the case where we're really dealing with a bytearray object
+ * we don't need to convert to a string to perform the substring operation.
+ */
+
+ if (TclIsPureByteArray(objPtr)) {
+ unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length);
+
+ if (last >= length) {
+ last = length - 1;
+ }
+ if (last + 1 < first + 1) {
+ TclNewObj(newObjPtr);
+ return newObjPtr;
+ }
+ return Tcl_NewByteArrayObj(bytes + first, last - first + 1);
+ }
+
+ size_t numChars = TclNumUtfChars(objPtr->bytes, objPtr->length);
+
+ if (last >= numChars) {
+ last = numChars - 1;
+ }
+ if (last + 1 < first + 1) {
+ TclNewObj(newObjPtr);
+ return newObjPtr;
+ }
+ const char *begin = TclUtfAtIndex(objPtr->bytes, first);
+ const char *end = TclUtfAtIndex(objPtr->bytes, last + 1);
+ return Tcl_NewStringObj(begin, end - begin);
+}
/*
*----------------------------------------------------------------------
@@ -1206,7 +1323,7 @@ Tcl_AppendToObj(
/*
*----------------------------------------------------------------------
*
- * TclAppendUnicodeToObj --
+ * Tcl_AppendUnicodeToObj --
*
* This function appends a Unicode string to an object in the most
* efficient manner possible. Length must be >= 0.
@@ -1230,7 +1347,7 @@ TclAppendUnicodeToObj(
String *stringPtr;
if (Tcl_IsShared(objPtr)) {
- Tcl_Panic("%s called with shared object", "TclAppendUnicodeToObj");
+ Tcl_Panic("%s called with shared object", "Tcl_AppendUnicodeToObj");
}
if (length == 0) {
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index f9604c7..0fa4ff0 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -1173,10 +1173,10 @@ const TclStubs tclStubs = {
Tcl_RegExpGetInfo, /* 377 */
Tcl_NewUnicodeObj, /* 378 */
Tcl_SetUnicodeObj, /* 379 */
- Tcl_GetCharLength, /* 380 */
- Tcl_GetUniChar, /* 381 */
+ TclGetCharLength, /* 380 */
+ TclGetUniChar, /* 381 */
0, /* 382 */
- Tcl_GetRange, /* 383 */
+ TclGetRange, /* 383 */
0, /* 384 */
Tcl_RegExpMatchObj, /* 385 */
Tcl_SetNotifier, /* 386 */
@@ -1463,8 +1463,10 @@ const TclStubs tclStubs = {
Tcl_ParseArgsObjv, /* 667 */
Tcl_UniCharLen, /* 668 */
Tcl_NumUtfChars, /* 669 */
- 0, /* 670 */
+ Tcl_GetCharLength, /* 670 */
Tcl_UtfAtIndex, /* 671 */
+ Tcl_GetRange, /* 672 */
+ Tcl_GetUniChar, /* 673 */
};
/* !END!: Do not edit above this line. */
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 09e464f..6f43dc4 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -799,7 +799,6 @@ Tcl_UtfCharComplete(
*---------------------------------------------------------------------------
*/
-#undef Tcl_NumUtfChars
size_t
Tcl_NumUtfChars(
const char *src, /* The UTF-8 string to measure. */
@@ -1220,7 +1219,6 @@ Tcl_UniCharAtIndex(
*---------------------------------------------------------------------------
*/
-#undef Tcl_UtfAtIndex
const char *
Tcl_UtfAtIndex(
const char *src, /* The UTF-8 string. */
@@ -1230,6 +1228,7 @@ Tcl_UtfAtIndex(
if (index != TCL_INDEX_NONE) {
while (index--) {
+ /* Make use of the #undef Tcl_UtfToUniChar above, which already handles UCS4. */
src += Tcl_UtfToUniChar(src, &ch);
}
}