diff options
| author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-05-04 12:31:51 (GMT) |
|---|---|---|
| committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-05-04 12:31:51 (GMT) |
| commit | 7759441602f14b3772186ea0d87f61ffdafc8404 (patch) | |
| tree | c8aac29d738caa3756201b63f7a4b9a81f27ad47 /generic/tclStringObj.c | |
| parent | 66197cab8b6c43b474a6dceae32fc95f4eed37b9 (diff) | |
| download | tcl-7759441602f14b3772186ea0d87f61ffdafc8404.zip tcl-7759441602f14b3772186ea0d87f61ffdafc8404.tar.gz tcl-7759441602f14b3772186ea0d87f61ffdafc8404.tar.bz2 | |
New internal function TclGetUCS4() only available when TCL_UTF_MAX=4. This fixes all "knownBug" testcases related to tip389.
Diffstat (limited to 'generic/tclStringObj.c')
| -rw-r--r-- | generic/tclStringObj.c | 78 |
1 files changed, 76 insertions, 2 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 3ce8281..656d6ce 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -519,10 +519,10 @@ TclCheckEmptyString ( /* *---------------------------------------------------------------------- * - * Tcl_GetUniChar -- + * Tcl_GetUniChar/TclGetUCS4 -- * * Get the index'th Unicode character from the String object. If index - * is out of range, the result = 0xFFFD; + * is out of range, the result = 0xFFFD (Tcl_GetUniChar) resp. -1 (TclGetUCS4) * * Results: * Returns the index'th Unicode character in the Object. @@ -587,6 +587,80 @@ Tcl_GetUniChar( } return stringPtr->unicode[index]; } + +#if TCL_UTF_MAX == 4 +int +TclGetUCS4( + Tcl_Obj *objPtr, /* The object to get the Unicode charater + * from. */ + int index) /* Get the index'th Unicode character. */ +{ + String *stringPtr; + int ch, length; + + if (index < 0) { + return -1; + } + + /* + * Optimize the case where we're really dealing with a bytearray object + * we don't need to convert to a string to perform the indexing operation. + */ + + if (TclIsPureByteArray(objPtr)) { + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); + if (index >= length) { + return -1; + } + + return (int) bytes[index]; + } + + /* + * OK, need to work with the object as a string. + */ + + SetStringFromAny(NULL, objPtr); + stringPtr = GET_STRING(objPtr); + + if (stringPtr->hasUnicode == 0) { + /* + * If numChars is unknown, compute it. + */ + + if (stringPtr->numChars == -1) { + TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length); + } + if (stringPtr->numChars == objPtr->length) { + return (Tcl_UniChar) objPtr->bytes[index]; + } + FillUnicodeRep(objPtr); + stringPtr = GET_STRING(objPtr); + } + + if (index >= stringPtr->numChars) { + return -1; + } + ch = stringPtr->unicode[index]; +#if TCL_UTF_MAX <= 4 + /* See: bug [11ae2be95dac9417] */ + if ((ch & 0xF800) == 0xD800) { + if (ch & 0x400) { + if ((index > 0) + && ((stringPtr->unicode[index-1] & 0xFC00) == 0xD800)) { + ch = -1; /* low surrogate preceded by high surrogate */ + } + } else if ((++index < stringPtr->numChars) + && ((stringPtr->unicode[index] & 0xFC00) == 0xDC00)) { + /* high surrogate followed by low surrogate */ + ch = (((ch & 0x3FF) << 10) | + (stringPtr->unicode[index] & 0x3FF)) + 0x10000; + } + } +#endif + return ch; +} +#endif /* *---------------------------------------------------------------------- |
