diff options
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r-- | generic/tclStringObj.c | 121 |
1 files changed, 119 insertions, 2 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index a65d560..a1f3ada 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -446,6 +446,44 @@ Tcl_GetCharLength( return numChars; } +size_t +TclGetCharLength( + Tcl_Obj *objPtr) /* The String object to get the num chars + * of. */ +{ + size_t numChars = 0; + + /* + * Quick, no-shimmer return for short string reps. + */ + + if ((objPtr->bytes) && (objPtr->length < 2)) { + /* 0 bytes -> 0 chars; 1 byte -> 1 char */ + return objPtr->length; + } + + /* + * Optimize the case where we're really dealing with a bytearray object; + * we don't need to convert to a string to perform the get-length operation. + * + * Starting in Tcl 8.7, we check for a "pure" bytearray, because the + * machinery behind that test is using a proper bytearray ObjType. We + * could also compute length of an improper bytearray without shimmering + * but there's no value in that. We *want* to shimmer an improper bytearray + * because improper bytearrays have worthless internal reps. + */ + + if (TclIsPureByteArray(objPtr)) { + (void) Tcl_GetByteArrayFromObj(objPtr, &numChars); + } else { + Tcl_GetString(objPtr); + numChars = TclNumUtfChars(objPtr->bytes, objPtr->length); + } + + return numChars; +} + + /* *---------------------------------------------------------------------- * @@ -574,6 +612,40 @@ Tcl_GetUniChar( #endif return ch; } + +int +TclGetUniChar( + Tcl_Obj *objPtr, /* The object to get the Unicode charater + * from. */ + size_t index) /* Get the index'th Unicode character. */ +{ + int ch = 0; + + /* + * Optimize the case where we're really dealing with a bytearray object + * we don't need to convert to a string to perform the indexing operation. + */ + + if (TclIsPureByteArray(objPtr)) { + size_t length = 0; + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); + if (index >= length) { + return -1; + } + + return bytes[index]; + } + + size_t numChars = TclNumUtfChars(objPtr->bytes, objPtr->length); + + if (index >= numChars) { + return -1; + } + const char *begin = TclUtfAtIndex(objPtr->bytes, index); +#undef Tcl_UtfToUniChar + Tcl_UtfToUniChar(begin, &ch); + return ch; +} /* *---------------------------------------------------------------------- @@ -751,6 +823,51 @@ Tcl_GetRange( #endif return Tcl_NewUnicodeObj(stringPtr->unicode + first, last - first + 1); } + +Tcl_Obj * +TclGetRange( + Tcl_Obj *objPtr, /* The Tcl object to find the range of. */ + size_t first, /* First index of the range. */ + size_t last) /* Last index of the range. */ +{ + Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ + size_t length = 0; + + if (first == TCL_INDEX_NONE) { + first = TCL_INDEX_START; + } + + /* + * Optimize the case where we're really dealing with a bytearray object + * we don't need to convert to a string to perform the substring operation. + */ + + if (TclIsPureByteArray(objPtr)) { + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); + + if (last >= length) { + last = length - 1; + } + if (last + 1 < first + 1) { + TclNewObj(newObjPtr); + return newObjPtr; + } + return Tcl_NewByteArrayObj(bytes + first, last - first + 1); + } + + size_t numChars = TclNumUtfChars(objPtr->bytes, objPtr->length); + + if (last >= numChars) { + last = numChars - 1; + } + if (last + 1 < first + 1) { + TclNewObj(newObjPtr); + return newObjPtr; + } + const char *begin = TclUtfAtIndex(objPtr->bytes, first); + const char *end = TclUtfAtIndex(objPtr->bytes, last + 1); + return Tcl_NewStringObj(begin, end - begin); +} /* *---------------------------------------------------------------------- @@ -1206,7 +1323,7 @@ Tcl_AppendToObj( /* *---------------------------------------------------------------------- * - * TclAppendUnicodeToObj -- + * Tcl_AppendUnicodeToObj -- * * This function appends a Unicode string to an object in the most * efficient manner possible. Length must be >= 0. @@ -1230,7 +1347,7 @@ TclAppendUnicodeToObj( String *stringPtr; if (Tcl_IsShared(objPtr)) { - Tcl_Panic("%s called with shared object", "TclAppendUnicodeToObj"); + Tcl_Panic("%s called with shared object", "Tcl_AppendUnicodeToObj"); } if (length == 0) { |