summaryrefslogtreecommitdiffstats
path: root/generic/tclStringObj.c
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2020-05-04 12:31:51 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2020-05-04 12:31:51 (GMT)
commit7759441602f14b3772186ea0d87f61ffdafc8404 (patch)
treec8aac29d738caa3756201b63f7a4b9a81f27ad47 /generic/tclStringObj.c
parent66197cab8b6c43b474a6dceae32fc95f4eed37b9 (diff)
downloadtcl-7759441602f14b3772186ea0d87f61ffdafc8404.zip
tcl-7759441602f14b3772186ea0d87f61ffdafc8404.tar.gz
tcl-7759441602f14b3772186ea0d87f61ffdafc8404.tar.bz2
New internal function TclGetUCS4() only available when TCL_UTF_MAX=4. This fixes all "knownBug" testcases related to tip389.
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r--generic/tclStringObj.c78
1 files changed, 76 insertions, 2 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 3ce8281..656d6ce 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -519,10 +519,10 @@ TclCheckEmptyString (
/*
*----------------------------------------------------------------------
*
- * Tcl_GetUniChar --
+ * Tcl_GetUniChar/TclGetUCS4 --
*
* Get the index'th Unicode character from the String object. If index
- * is out of range, the result = 0xFFFD;
+ * is out of range, the result = 0xFFFD (Tcl_GetUniChar) resp. -1 (TclGetUCS4)
*
* Results:
* Returns the index'th Unicode character in the Object.
@@ -587,6 +587,80 @@ Tcl_GetUniChar(
}
return stringPtr->unicode[index];
}
+
+#if TCL_UTF_MAX == 4
+int
+TclGetUCS4(
+ Tcl_Obj *objPtr, /* The object to get the Unicode charater
+ * from. */
+ int index) /* Get the index'th Unicode character. */
+{
+ String *stringPtr;
+ int ch, length;
+
+ if (index < 0) {
+ return -1;
+ }
+
+ /*
+ * Optimize the case where we're really dealing with a bytearray object
+ * we don't need to convert to a string to perform the indexing operation.
+ */
+
+ if (TclIsPureByteArray(objPtr)) {
+ unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length);
+ if (index >= length) {
+ return -1;
+ }
+
+ return (int) bytes[index];
+ }
+
+ /*
+ * OK, need to work with the object as a string.
+ */
+
+ SetStringFromAny(NULL, objPtr);
+ stringPtr = GET_STRING(objPtr);
+
+ if (stringPtr->hasUnicode == 0) {
+ /*
+ * If numChars is unknown, compute it.
+ */
+
+ if (stringPtr->numChars == -1) {
+ TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length);
+ }
+ if (stringPtr->numChars == objPtr->length) {
+ return (Tcl_UniChar) objPtr->bytes[index];
+ }
+ FillUnicodeRep(objPtr);
+ stringPtr = GET_STRING(objPtr);
+ }
+
+ if (index >= stringPtr->numChars) {
+ return -1;
+ }
+ ch = stringPtr->unicode[index];
+#if TCL_UTF_MAX <= 4
+ /* See: bug [11ae2be95dac9417] */
+ if ((ch & 0xF800) == 0xD800) {
+ if (ch & 0x400) {
+ if ((index > 0)
+ && ((stringPtr->unicode[index-1] & 0xFC00) == 0xD800)) {
+ ch = -1; /* low surrogate preceded by high surrogate */
+ }
+ } else if ((++index < stringPtr->numChars)
+ && ((stringPtr->unicode[index] & 0xFC00) == 0xDC00)) {
+ /* high surrogate followed by low surrogate */
+ ch = (((ch & 0x3FF) << 10) |
+ (stringPtr->unicode[index] & 0x3FF)) + 0x10000;
+ }
+ }
+#endif
+ return ch;
+}
+#endif
/*
*----------------------------------------------------------------------