summaryrefslogtreecommitdiffstats
path: root/generic/tclStringObj.c
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2022-04-01 13:12:08 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2022-04-01 13:12:08 (GMT)
commit4717ea14fde569dbc9d0c0addb817fae41e30862 (patch)
treee3f2b61ed30ff5377355618ed9419d1297cdccf4 /generic/tclStringObj.c
parent05de893f6ff1e5b322d9579f183a83ad49be48df (diff)
parentd0fec7532c33f0b3da8057e2e0fda10524f22905 (diff)
downloadtcl-4717ea14fde569dbc9d0c0addb817fae41e30862.zip
tcl-4717ea14fde569dbc9d0c0addb817fae41e30862.tar.gz
tcl-4717ea14fde569dbc9d0c0addb817fae41e30862.tar.bz2
Merge 9.0
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r--generic/tclStringObj.c121
1 files changed, 119 insertions, 2 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 47b532d..fbb6312 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -451,6 +451,44 @@ Tcl_GetCharLength(
return numChars;
}
+size_t
+TclGetCharLength(
+ Tcl_Obj *objPtr) /* The String object to get the num chars
+ * of. */
+{
+ size_t numChars = 0;
+
+ /*
+ * Quick, no-shimmer return for short string reps.
+ */
+
+ if ((objPtr->bytes) && (objPtr->length < 2)) {
+ /* 0 bytes -> 0 chars; 1 byte -> 1 char */
+ return objPtr->length;
+ }
+
+ /*
+ * Optimize the case where we're really dealing with a bytearray object;
+ * we don't need to convert to a string to perform the get-length operation.
+ *
+ * Starting in Tcl 8.7, we check for a "pure" bytearray, because the
+ * machinery behind that test is using a proper bytearray ObjType. We
+ * could also compute length of an improper bytearray without shimmering
+ * but there's no value in that. We *want* to shimmer an improper bytearray
+ * because improper bytearrays have worthless internal reps.
+ */
+
+ if (TclIsPureByteArray(objPtr)) {
+ (void) Tcl_GetByteArrayFromObj(objPtr, &numChars);
+ } else {
+ Tcl_GetString(objPtr);
+ numChars = TclNumUtfChars(objPtr->bytes, objPtr->length);
+ }
+
+ return numChars;
+}
+
+
/*
*----------------------------------------------------------------------
*
@@ -579,6 +617,40 @@ Tcl_GetUniChar(
#endif
return ch;
}
+
+int
+TclGetUniChar(
+ Tcl_Obj *objPtr, /* The object to get the Unicode charater
+ * from. */
+ size_t index) /* Get the index'th Unicode character. */
+{
+ int ch = 0;
+
+ /*
+ * Optimize the case where we're really dealing with a bytearray object
+ * we don't need to convert to a string to perform the indexing operation.
+ */
+
+ if (TclIsPureByteArray(objPtr)) {
+ size_t length = 0;
+ unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length);
+ if (index >= length) {
+ return -1;
+ }
+
+ return bytes[index];
+ }
+
+ size_t numChars = TclNumUtfChars(objPtr->bytes, objPtr->length);
+
+ if (index >= numChars) {
+ return -1;
+ }
+ const char *begin = TclUtfAtIndex(objPtr->bytes, index);
+#undef Tcl_UtfToUniChar
+ Tcl_UtfToUniChar(begin, &ch);
+ return ch;
+}
/*
*----------------------------------------------------------------------
@@ -756,6 +828,51 @@ Tcl_GetRange(
#endif
return Tcl_NewUnicodeObj(stringPtr->unicode + first, last - first + 1);
}
+
+Tcl_Obj *
+TclGetRange(
+ Tcl_Obj *objPtr, /* The Tcl object to find the range of. */
+ size_t first, /* First index of the range. */
+ size_t last) /* Last index of the range. */
+{
+ Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */
+ size_t length = 0;
+
+ if (first == TCL_INDEX_NONE) {
+ first = TCL_INDEX_START;
+ }
+
+ /*
+ * Optimize the case where we're really dealing with a bytearray object
+ * we don't need to convert to a string to perform the substring operation.
+ */
+
+ if (TclIsPureByteArray(objPtr)) {
+ unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length);
+
+ if (last >= length) {
+ last = length - 1;
+ }
+ if (last + 1 < first + 1) {
+ TclNewObj(newObjPtr);
+ return newObjPtr;
+ }
+ return Tcl_NewByteArrayObj(bytes + first, last - first + 1);
+ }
+
+ size_t numChars = TclNumUtfChars(objPtr->bytes, objPtr->length);
+
+ if (last >= numChars) {
+ last = numChars - 1;
+ }
+ if (last + 1 < first + 1) {
+ TclNewObj(newObjPtr);
+ return newObjPtr;
+ }
+ const char *begin = TclUtfAtIndex(objPtr->bytes, first);
+ const char *end = TclUtfAtIndex(objPtr->bytes, last + 1);
+ return Tcl_NewStringObj(begin, end - begin);
+}
/*
*----------------------------------------------------------------------
@@ -1211,7 +1328,7 @@ Tcl_AppendToObj(
/*
*----------------------------------------------------------------------
*
- * TclAppendUnicodeToObj --
+ * Tcl_AppendUnicodeToObj --
*
* This function appends a Unicode string to an object in the most
* efficient manner possible. Length must be >= 0.
@@ -1235,7 +1352,7 @@ TclAppendUnicodeToObj(
String *stringPtr;
if (Tcl_IsShared(objPtr)) {
- Tcl_Panic("%s called with shared object", "TclAppendUnicodeToObj");
+ Tcl_Panic("%s called with shared object", "Tcl_AppendUnicodeToObj");
}
if (length == 0) {