diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2022-04-01 13:13:06 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2022-04-01 13:13:06 (GMT) |
commit | 67c5964d062b5119e03cf1c3e2fdd9a2da5f2540 (patch) | |
tree | f1b908e807872e62c59d2d2079308eb5f1ecfa5e /generic | |
parent | 4c82bda7741adb7962582755a619a9768f0ee8fa (diff) | |
parent | d0fec7532c33f0b3da8057e2e0fda10524f22905 (diff) | |
download | tcl-67c5964d062b5119e03cf1c3e2fdd9a2da5f2540.zip tcl-67c5964d062b5119e03cf1c3e2fdd9a2da5f2540.tar.gz tcl-67c5964d062b5119e03cf1c3e2fdd9a2da5f2540.tar.bz2 |
Merge 9.0
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tcl.decls | 15 | ||||
-rw-r--r-- | generic/tclDecls.h | 49 | ||||
-rw-r--r-- | generic/tclStringObj.c | 121 | ||||
-rw-r--r-- | generic/tclStubInit.c | 10 | ||||
-rw-r--r-- | generic/tclUtf.c | 3 |
5 files changed, 172 insertions, 26 deletions
diff --git a/generic/tcl.decls b/generic/tcl.decls index 6b9ce8e..0784bee 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -1396,17 +1396,17 @@ declare 379 { size_t numChars) } declare 380 { - size_t Tcl_GetCharLength(Tcl_Obj *objPtr) + size_t TclGetCharLength(Tcl_Obj *objPtr) } declare 381 { - int Tcl_GetUniChar(Tcl_Obj *objPtr, size_t index) + int TclGetUniChar(Tcl_Obj *objPtr, size_t index) } # Removed in 9.0, replaced by macro. #declare 382 { # Tcl_UniChar *Tcl_GetUnicode(Tcl_Obj *objPtr) #} declare 383 { - Tcl_Obj *Tcl_GetRange(Tcl_Obj *objPtr, size_t first, size_t last) + Tcl_Obj *TclGetRange(Tcl_Obj *objPtr, size_t first, size_t last) } # Removed in 9.0 #declare 384 { @@ -2546,9 +2546,18 @@ declare 668 { declare 669 { size_t Tcl_NumUtfChars(const char *src, size_t length) } +declare 670 { + size_t Tcl_GetCharLength(Tcl_Obj *objPtr) +} declare 671 { const char *Tcl_UtfAtIndex(const char *src, size_t index) } +declare 672 { + Tcl_Obj *Tcl_GetRange(Tcl_Obj *objPtr, size_t first, size_t last) +} +declare 673 { + int Tcl_GetUniChar(Tcl_Obj *objPtr, size_t index) +} # ----- BASELINE -- FOR -- 8.7.0 ----- # diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 429ef0e..f4d13c8 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -997,12 +997,12 @@ EXTERN Tcl_Obj * Tcl_NewUnicodeObj(const Tcl_UniChar *unicode, EXTERN void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode, size_t numChars); /* 380 */ -EXTERN size_t Tcl_GetCharLength(Tcl_Obj *objPtr); +EXTERN size_t TclGetCharLength(Tcl_Obj *objPtr); /* 381 */ -EXTERN int Tcl_GetUniChar(Tcl_Obj *objPtr, size_t index); +EXTERN int TclGetUniChar(Tcl_Obj *objPtr, size_t index); /* Slot 382 is reserved */ /* 383 */ -EXTERN Tcl_Obj * Tcl_GetRange(Tcl_Obj *objPtr, size_t first, +EXTERN Tcl_Obj * TclGetRange(Tcl_Obj *objPtr, size_t first, size_t last); /* Slot 384 is reserved */ /* 385 */ @@ -1795,9 +1795,15 @@ EXTERN int Tcl_ParseArgsObjv(Tcl_Interp *interp, EXTERN size_t Tcl_UniCharLen(const int *uniStr); /* 669 */ EXTERN size_t Tcl_NumUtfChars(const char *src, size_t length); -/* Slot 670 is reserved */ +/* 670 */ +EXTERN size_t Tcl_GetCharLength(Tcl_Obj *objPtr); /* 671 */ EXTERN const char * Tcl_UtfAtIndex(const char *src, size_t index); +/* 672 */ +EXTERN Tcl_Obj * Tcl_GetRange(Tcl_Obj *objPtr, size_t first, + size_t last); +/* 673 */ +EXTERN int Tcl_GetUniChar(Tcl_Obj *objPtr, size_t index); typedef struct { const struct TclPlatStubs *tclPlatStubs; @@ -2189,10 +2195,10 @@ typedef struct TclStubs { void (*tcl_RegExpGetInfo) (Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr); /* 377 */ Tcl_Obj * (*tcl_NewUnicodeObj) (const Tcl_UniChar *unicode, size_t numChars); /* 378 */ void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, size_t numChars); /* 379 */ - size_t (*tcl_GetCharLength) (Tcl_Obj *objPtr); /* 380 */ - int (*tcl_GetUniChar) (Tcl_Obj *objPtr, size_t index); /* 381 */ + size_t (*tclGetCharLength) (Tcl_Obj *objPtr); /* 380 */ + int (*tclGetUniChar) (Tcl_Obj *objPtr, size_t index); /* 381 */ void (*reserved382)(void); - Tcl_Obj * (*tcl_GetRange) (Tcl_Obj *objPtr, size_t first, size_t last); /* 383 */ + Tcl_Obj * (*tclGetRange) (Tcl_Obj *objPtr, size_t first, size_t last); /* 383 */ void (*reserved384)(void); int (*tcl_RegExpMatchObj) (Tcl_Interp *interp, Tcl_Obj *textObj, Tcl_Obj *patternObj); /* 385 */ void (*tcl_SetNotifier) (const Tcl_NotifierProcs *notifierProcPtr); /* 386 */ @@ -2479,8 +2485,10 @@ typedef struct TclStubs { int (*tcl_ParseArgsObjv) (Tcl_Interp *interp, const Tcl_ArgvInfo *argTable, size_t *objcPtr, Tcl_Obj *const *objv, Tcl_Obj ***remObjv); /* 667 */ size_t (*tcl_UniCharLen) (const int *uniStr); /* 668 */ size_t (*tcl_NumUtfChars) (const char *src, size_t length); /* 669 */ - void (*reserved670)(void); + size_t (*tcl_GetCharLength) (Tcl_Obj *objPtr); /* 670 */ const char * (*tcl_UtfAtIndex) (const char *src, size_t index); /* 671 */ + Tcl_Obj * (*tcl_GetRange) (Tcl_Obj *objPtr, size_t first, size_t last); /* 672 */ + int (*tcl_GetUniChar) (Tcl_Obj *objPtr, size_t index); /* 673 */ } TclStubs; extern const TclStubs *tclStubsPtr; @@ -3203,13 +3211,13 @@ extern const TclStubs *tclStubsPtr; (tclStubsPtr->tcl_NewUnicodeObj) /* 378 */ #define Tcl_SetUnicodeObj \ (tclStubsPtr->tcl_SetUnicodeObj) /* 379 */ -#define Tcl_GetCharLength \ - (tclStubsPtr->tcl_GetCharLength) /* 380 */ -#define Tcl_GetUniChar \ - (tclStubsPtr->tcl_GetUniChar) /* 381 */ +#define TclGetCharLength \ + (tclStubsPtr->tclGetCharLength) /* 380 */ +#define TclGetUniChar \ + (tclStubsPtr->tclGetUniChar) /* 381 */ /* Slot 382 is reserved */ -#define Tcl_GetRange \ - (tclStubsPtr->tcl_GetRange) /* 383 */ +#define TclGetRange \ + (tclStubsPtr->tclGetRange) /* 383 */ /* Slot 384 is reserved */ #define Tcl_RegExpMatchObj \ (tclStubsPtr->tcl_RegExpMatchObj) /* 385 */ @@ -3772,9 +3780,14 @@ extern const TclStubs *tclStubsPtr; (tclStubsPtr->tcl_UniCharLen) /* 668 */ #define Tcl_NumUtfChars \ (tclStubsPtr->tcl_NumUtfChars) /* 669 */ -/* Slot 670 is reserved */ +#define Tcl_GetCharLength \ + (tclStubsPtr->tcl_GetCharLength) /* 670 */ #define Tcl_UtfAtIndex \ (tclStubsPtr->tcl_UtfAtIndex) /* 671 */ +#define Tcl_GetRange \ + (tclStubsPtr->tcl_GetRange) /* 672 */ +#define Tcl_GetUniChar \ + (tclStubsPtr->tcl_GetUniChar) /* 673 */ #endif /* defined(USE_TCL_STUBS) */ @@ -3979,8 +3992,14 @@ extern const TclStubs *tclStubsPtr; #if !defined(BUILD_tcl) # undef Tcl_NumUtfChars # define Tcl_NumUtfChars TclNumUtfChars +# undef Tcl_GetCharLength +# define Tcl_GetCharLength TclGetCharLength # undef Tcl_UtfAtIndex # define Tcl_UtfAtIndex TclUtfAtIndex +# undef Tcl_GetRange +# define Tcl_GetRange TclGetRange +# undef Tcl_GetUniChar +# define Tcl_GetUniChar TclGetUniChar #endif #endif #if defined(USE_TCL_STUBS) diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index a65d560..a1f3ada 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -446,6 +446,44 @@ Tcl_GetCharLength( return numChars; } +size_t +TclGetCharLength( + Tcl_Obj *objPtr) /* The String object to get the num chars + * of. */ +{ + size_t numChars = 0; + + /* + * Quick, no-shimmer return for short string reps. + */ + + if ((objPtr->bytes) && (objPtr->length < 2)) { + /* 0 bytes -> 0 chars; 1 byte -> 1 char */ + return objPtr->length; + } + + /* + * Optimize the case where we're really dealing with a bytearray object; + * we don't need to convert to a string to perform the get-length operation. + * + * Starting in Tcl 8.7, we check for a "pure" bytearray, because the + * machinery behind that test is using a proper bytearray ObjType. We + * could also compute length of an improper bytearray without shimmering + * but there's no value in that. We *want* to shimmer an improper bytearray + * because improper bytearrays have worthless internal reps. + */ + + if (TclIsPureByteArray(objPtr)) { + (void) Tcl_GetByteArrayFromObj(objPtr, &numChars); + } else { + Tcl_GetString(objPtr); + numChars = TclNumUtfChars(objPtr->bytes, objPtr->length); + } + + return numChars; +} + + /* *---------------------------------------------------------------------- * @@ -574,6 +612,40 @@ Tcl_GetUniChar( #endif return ch; } + +int +TclGetUniChar( + Tcl_Obj *objPtr, /* The object to get the Unicode charater + * from. */ + size_t index) /* Get the index'th Unicode character. */ +{ + int ch = 0; + + /* + * Optimize the case where we're really dealing with a bytearray object + * we don't need to convert to a string to perform the indexing operation. + */ + + if (TclIsPureByteArray(objPtr)) { + size_t length = 0; + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); + if (index >= length) { + return -1; + } + + return bytes[index]; + } + + size_t numChars = TclNumUtfChars(objPtr->bytes, objPtr->length); + + if (index >= numChars) { + return -1; + } + const char *begin = TclUtfAtIndex(objPtr->bytes, index); +#undef Tcl_UtfToUniChar + Tcl_UtfToUniChar(begin, &ch); + return ch; +} /* *---------------------------------------------------------------------- @@ -751,6 +823,51 @@ Tcl_GetRange( #endif return Tcl_NewUnicodeObj(stringPtr->unicode + first, last - first + 1); } + +Tcl_Obj * +TclGetRange( + Tcl_Obj *objPtr, /* The Tcl object to find the range of. */ + size_t first, /* First index of the range. */ + size_t last) /* Last index of the range. */ +{ + Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ + size_t length = 0; + + if (first == TCL_INDEX_NONE) { + first = TCL_INDEX_START; + } + + /* + * Optimize the case where we're really dealing with a bytearray object + * we don't need to convert to a string to perform the substring operation. + */ + + if (TclIsPureByteArray(objPtr)) { + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); + + if (last >= length) { + last = length - 1; + } + if (last + 1 < first + 1) { + TclNewObj(newObjPtr); + return newObjPtr; + } + return Tcl_NewByteArrayObj(bytes + first, last - first + 1); + } + + size_t numChars = TclNumUtfChars(objPtr->bytes, objPtr->length); + + if (last >= numChars) { + last = numChars - 1; + } + if (last + 1 < first + 1) { + TclNewObj(newObjPtr); + return newObjPtr; + } + const char *begin = TclUtfAtIndex(objPtr->bytes, first); + const char *end = TclUtfAtIndex(objPtr->bytes, last + 1); + return Tcl_NewStringObj(begin, end - begin); +} /* *---------------------------------------------------------------------- @@ -1206,7 +1323,7 @@ Tcl_AppendToObj( /* *---------------------------------------------------------------------- * - * TclAppendUnicodeToObj -- + * Tcl_AppendUnicodeToObj -- * * This function appends a Unicode string to an object in the most * efficient manner possible. Length must be >= 0. @@ -1230,7 +1347,7 @@ TclAppendUnicodeToObj( String *stringPtr; if (Tcl_IsShared(objPtr)) { - Tcl_Panic("%s called with shared object", "TclAppendUnicodeToObj"); + Tcl_Panic("%s called with shared object", "Tcl_AppendUnicodeToObj"); } if (length == 0) { diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index f9604c7..0fa4ff0 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -1173,10 +1173,10 @@ const TclStubs tclStubs = { Tcl_RegExpGetInfo, /* 377 */ Tcl_NewUnicodeObj, /* 378 */ Tcl_SetUnicodeObj, /* 379 */ - Tcl_GetCharLength, /* 380 */ - Tcl_GetUniChar, /* 381 */ + TclGetCharLength, /* 380 */ + TclGetUniChar, /* 381 */ 0, /* 382 */ - Tcl_GetRange, /* 383 */ + TclGetRange, /* 383 */ 0, /* 384 */ Tcl_RegExpMatchObj, /* 385 */ Tcl_SetNotifier, /* 386 */ @@ -1463,8 +1463,10 @@ const TclStubs tclStubs = { Tcl_ParseArgsObjv, /* 667 */ Tcl_UniCharLen, /* 668 */ Tcl_NumUtfChars, /* 669 */ - 0, /* 670 */ + Tcl_GetCharLength, /* 670 */ Tcl_UtfAtIndex, /* 671 */ + Tcl_GetRange, /* 672 */ + Tcl_GetUniChar, /* 673 */ }; /* !END!: Do not edit above this line. */ diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 09e464f..6f43dc4 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -799,7 +799,6 @@ Tcl_UtfCharComplete( *--------------------------------------------------------------------------- */ -#undef Tcl_NumUtfChars size_t Tcl_NumUtfChars( const char *src, /* The UTF-8 string to measure. */ @@ -1220,7 +1219,6 @@ Tcl_UniCharAtIndex( *--------------------------------------------------------------------------- */ -#undef Tcl_UtfAtIndex const char * Tcl_UtfAtIndex( const char *src, /* The UTF-8 string. */ @@ -1230,6 +1228,7 @@ Tcl_UtfAtIndex( if (index != TCL_INDEX_NONE) { while (index--) { + /* Make use of the #undef Tcl_UtfToUniChar above, which already handles UCS4. */ src += Tcl_UtfToUniChar(src, &ch); } } |