diff options
author | dgp <dgp@users.sourceforge.net> | 2018-04-25 13:02:35 (GMT) |
---|---|---|
committer | dgp <dgp@users.sourceforge.net> | 2018-04-25 13:02:35 (GMT) |
commit | 4d9ba4a6528b96b85dd0037953e94a5f4aa623be (patch) | |
tree | deb4afdc84bf1eea1feac4dcf16d8c3828e30e61 /generic/tclStringObj.c | |
parent | 9ff5b7077a286a44e200859c71d641d9ea3bd60d (diff) | |
parent | 859bcc9f868c96444e50001dac785edb3e889156 (diff) | |
download | tcl-4d9ba4a6528b96b85dd0037953e94a5f4aa623be.zip tcl-4d9ba4a6528b96b85dd0037953e94a5f4aa623be.tar.gz tcl-4d9ba4a6528b96b85dd0037953e94a5f4aa623be.tar.bz2 |
merge 8.7
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r-- | generic/tclStringObj.c | 70 |
1 files changed, 59 insertions, 11 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 93691a3..2e44e0e 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -460,8 +460,9 @@ Tcl_GetCharLength( * * Tcl_GetUniChar -- * - * Get the index'th Unicode character from the String object. The index - * is assumed to be in the appropriate range. + * Get the index'th Unicode character from the String object. If index + * is out of range or it references a low surrogate preceded by a high + * surrogate, the result = -1; * * Results: * Returns the index'th Unicode character in the Object. @@ -472,13 +473,18 @@ Tcl_GetCharLength( *---------------------------------------------------------------------- */ -Tcl_UniChar +int Tcl_GetUniChar( Tcl_Obj *objPtr, /* The object to get the Unicode charater * from. */ int index) /* Get the index'th Unicode character. */ { String *stringPtr; + int ch, length; + + if (index < 0) { + return -1; + } /* * Optimize the case where we're really dealing with a bytearray object @@ -486,9 +492,12 @@ Tcl_GetUniChar( */ if (TclIsPureByteArray(objPtr)) { - unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, NULL); + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); + if (index >= length) { + return -1; + } - return (Tcl_UniChar) bytes[index]; + return (int) bytes[index]; } /* @@ -512,7 +521,26 @@ Tcl_GetUniChar( FillUnicodeRep(objPtr); stringPtr = GET_STRING(objPtr); } - return stringPtr->unicode[index]; + + if (index >= stringPtr->numChars) { + return -1; + } + ch = stringPtr->unicode[index]; +#if TCL_UTF_MAX <= 4 + /* See: bug [11ae2be95dac9417] */ + if ((ch&0xF800) == 0xD800) { + if (ch&0x400) { + if ((index > 0) && ((stringPtr->unicode[index-1]&0xFC00) == 0xD800)) { + ch = -1; /* low surrogate preceded by high surrogate */ + } + } else if ((++index < stringPtr->numChars) + && ((stringPtr->unicode[index]&0xFC00) == 0xDC00)) { + /* high surrogate followed by low surrogate */ + ch = (((ch & 0x3FF) << 10) | (stringPtr->unicode[index] & 0x3FF)) + 0x10000; + } + } +#endif + return ch; } /* @@ -612,15 +640,24 @@ Tcl_GetRange( { Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ String *stringPtr; + int length; + if (first < 0) { + first = 0; + } /* * Optimize the case where we're really dealing with a bytearray object * we don't need to convert to a string to perform the substring operation. */ if (TclIsPureByteArray(objPtr)) { - unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, NULL); - + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); + if (last >= length) { + last = length - 1; + } + if (last < first) { + return Tcl_NewObj(); + } return Tcl_NewByteArrayObj(bytes+first, last-first+1); } @@ -640,6 +677,12 @@ Tcl_GetRange( TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length); } if (stringPtr->numChars == objPtr->length) { + if (last >= stringPtr->numChars) { + last = stringPtr->numChars - 1; + } + if (last < first) { + return Tcl_NewObj(); + } newObjPtr = Tcl_NewStringObj(objPtr->bytes + first, last-first+1); /* @@ -654,8 +697,13 @@ Tcl_GetRange( FillUnicodeRep(objPtr); stringPtr = GET_STRING(objPtr); } - -#if TCL_UTF_MAX == 4 + if (last > stringPtr->numChars) { + last = stringPtr->numChars; + } + if (last < first) { + return Tcl_NewObj(); + } +#if TCL_UTF_MAX <= 4 /* See: bug [11ae2be95dac9417] */ if ((first>0) && ((stringPtr->unicode[first]&0xFC00) == 0xDC00) && ((stringPtr->unicode[first-1]&0xFC00) == 0xD800)) { @@ -1930,7 +1978,7 @@ Tcl_AppendFormatToObj( } break; case 'c': { - char buf[TCL_UTF_MAX]; + char buf[4]; int code, length; if (TclGetIntFromObj(interp, segment, &code) != TCL_OK) { |