diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2022-04-01 13:12:08 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2022-04-01 13:12:08 (GMT) |
commit | 4717ea14fde569dbc9d0c0addb817fae41e30862 (patch) | |
tree | e3f2b61ed30ff5377355618ed9419d1297cdccf4 /generic/tclStringObj.c | |
parent | 05de893f6ff1e5b322d9579f183a83ad49be48df (diff) | |
parent | d0fec7532c33f0b3da8057e2e0fda10524f22905 (diff) | |
download | tcl-4717ea14fde569dbc9d0c0addb817fae41e30862.zip tcl-4717ea14fde569dbc9d0c0addb817fae41e30862.tar.gz tcl-4717ea14fde569dbc9d0c0addb817fae41e30862.tar.bz2 |
Merge 9.0
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r-- | generic/tclStringObj.c | 121 |
1 files changed, 119 insertions, 2 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 47b532d..fbb6312 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -451,6 +451,44 @@ Tcl_GetCharLength( return numChars; } +size_t +TclGetCharLength( + Tcl_Obj *objPtr) /* The String object to get the num chars + * of. */ +{ + size_t numChars = 0; + + /* + * Quick, no-shimmer return for short string reps. + */ + + if ((objPtr->bytes) && (objPtr->length < 2)) { + /* 0 bytes -> 0 chars; 1 byte -> 1 char */ + return objPtr->length; + } + + /* + * Optimize the case where we're really dealing with a bytearray object; + * we don't need to convert to a string to perform the get-length operation. + * + * Starting in Tcl 8.7, we check for a "pure" bytearray, because the + * machinery behind that test is using a proper bytearray ObjType. We + * could also compute length of an improper bytearray without shimmering + * but there's no value in that. We *want* to shimmer an improper bytearray + * because improper bytearrays have worthless internal reps. + */ + + if (TclIsPureByteArray(objPtr)) { + (void) Tcl_GetByteArrayFromObj(objPtr, &numChars); + } else { + Tcl_GetString(objPtr); + numChars = TclNumUtfChars(objPtr->bytes, objPtr->length); + } + + return numChars; +} + + /* *---------------------------------------------------------------------- * @@ -579,6 +617,40 @@ Tcl_GetUniChar( #endif return ch; } + +int +TclGetUniChar( + Tcl_Obj *objPtr, /* The object to get the Unicode charater + * from. */ + size_t index) /* Get the index'th Unicode character. */ +{ + int ch = 0; + + /* + * Optimize the case where we're really dealing with a bytearray object + * we don't need to convert to a string to perform the indexing operation. + */ + + if (TclIsPureByteArray(objPtr)) { + size_t length = 0; + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); + if (index >= length) { + return -1; + } + + return bytes[index]; + } + + size_t numChars = TclNumUtfChars(objPtr->bytes, objPtr->length); + + if (index >= numChars) { + return -1; + } + const char *begin = TclUtfAtIndex(objPtr->bytes, index); +#undef Tcl_UtfToUniChar + Tcl_UtfToUniChar(begin, &ch); + return ch; +} /* *---------------------------------------------------------------------- @@ -756,6 +828,51 @@ Tcl_GetRange( #endif return Tcl_NewUnicodeObj(stringPtr->unicode + first, last - first + 1); } + +Tcl_Obj * +TclGetRange( + Tcl_Obj *objPtr, /* The Tcl object to find the range of. */ + size_t first, /* First index of the range. */ + size_t last) /* Last index of the range. */ +{ + Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ + size_t length = 0; + + if (first == TCL_INDEX_NONE) { + first = TCL_INDEX_START; + } + + /* + * Optimize the case where we're really dealing with a bytearray object + * we don't need to convert to a string to perform the substring operation. + */ + + if (TclIsPureByteArray(objPtr)) { + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); + + if (last >= length) { + last = length - 1; + } + if (last + 1 < first + 1) { + TclNewObj(newObjPtr); + return newObjPtr; + } + return Tcl_NewByteArrayObj(bytes + first, last - first + 1); + } + + size_t numChars = TclNumUtfChars(objPtr->bytes, objPtr->length); + + if (last >= numChars) { + last = numChars - 1; + } + if (last + 1 < first + 1) { + TclNewObj(newObjPtr); + return newObjPtr; + } + const char *begin = TclUtfAtIndex(objPtr->bytes, first); + const char *end = TclUtfAtIndex(objPtr->bytes, last + 1); + return Tcl_NewStringObj(begin, end - begin); +} /* *---------------------------------------------------------------------- @@ -1211,7 +1328,7 @@ Tcl_AppendToObj( /* *---------------------------------------------------------------------- * - * TclAppendUnicodeToObj -- + * Tcl_AppendUnicodeToObj -- * * This function appends a Unicode string to an object in the most * efficient manner possible. Length must be >= 0. @@ -1235,7 +1352,7 @@ TclAppendUnicodeToObj( String *stringPtr; if (Tcl_IsShared(objPtr)) { - Tcl_Panic("%s called with shared object", "TclAppendUnicodeToObj"); + Tcl_Panic("%s called with shared object", "Tcl_AppendUnicodeToObj"); } if (length == 0) { |