diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2022-03-24 14:28:10 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2022-03-24 14:28:10 (GMT) |
commit | db3553f6b2e985ce55fa6c42cb0bf268a06cdc70 (patch) | |
tree | c2bcc1dcc8b8f20aff0e0497a9786dafbc1c08bc /generic/tclStringObj.c | |
parent | 4fcff1f053c279076fb2bc1507dac8a26b3c562b (diff) | |
download | tcl-db3553f6b2e985ce55fa6c42cb0bf268a06cdc70.zip tcl-db3553f6b2e985ce55fa6c42cb0bf268a06cdc70.tar.gz tcl-db3553f6b2e985ce55fa6c42cb0bf268a06cdc70.tar.bz2 |
Add UTF-16 versions of Tcl_GetRange/Tcl_GetUniChar
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r-- | generic/tclStringObj.c | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 7e65ef1..dc64fcd 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -612,6 +612,40 @@ Tcl_GetUniChar( #endif return ch; } + +int +TclGetUniChar( + Tcl_Obj *objPtr, /* The object to get the Unicode charater + * from. */ + size_t index) /* Get the index'th Unicode character. */ +{ + int ch = 0; + + /* + * Optimize the case where we're really dealing with a bytearray object + * we don't need to convert to a string to perform the indexing operation. + */ + + if (TclIsPureByteArray(objPtr)) { + size_t length = 0; + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); + if (index >= length) { + return -1; + } + + return bytes[index]; + } + + size_t numChars = TclNumUtfChars(objPtr->bytes, objPtr->length); + + if (index >= numChars) { + return -1; + } + const char *begin = TclUtfAtIndex(objPtr->bytes, index); + Tcl_UtfToUniChar(begin, &ch); + return ch; +} + /* *---------------------------------------------------------------------- @@ -792,6 +826,55 @@ Tcl_GetRange( #endif return Tcl_NewUnicodeObj(stringPtr->unicode + first, last - first + 1); } + +Tcl_Obj * +TclGetRange( + Tcl_Obj *objPtr, /* The Tcl object to find the range of. */ + size_t first, /* First index of the range. */ + size_t last) /* Last index of the range. */ +{ + Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ + size_t length = 0; + + if (first == TCL_INDEX_NONE) { + first = TCL_INDEX_START; + } + if (last + 2 <= first + 1) { + return Tcl_NewObj(); + } + + /* + * Optimize the case where we're really dealing with a bytearray object + * we don't need to convert to a string to perform the substring operation. + */ + + if (TclIsPureByteArray(objPtr)) { + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); + + if (last >= length) { + last = length - 1; + } + if (last < first) { + TclNewObj(newObjPtr); + return newObjPtr; + } + return Tcl_NewByteArrayObj(bytes + first, last - first + 1); + } + + size_t numChars = TclNumUtfChars(objPtr->bytes, objPtr->length); + + if (last >= numChars) { + last = numChars - 1; + } + if (last < first) { + TclNewObj(newObjPtr); + return newObjPtr; + } + const char *begin = TclUtfAtIndex(objPtr->bytes, first); + const char *end = TclUtfAtIndex(objPtr->bytes, last + 1); + return Tcl_NewStringObj(begin, end - begin); +} + /* *---------------------------------------------------------------------- |