summaryrefslogtreecommitdiffstats
path: root/generic/tclStringObj.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r--generic/tclStringObj.c70
1 files changed, 59 insertions, 11 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 93691a3..2e44e0e 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -460,8 +460,9 @@ Tcl_GetCharLength(
*
* Tcl_GetUniChar --
*
- * Get the index'th Unicode character from the String object. The index
- * is assumed to be in the appropriate range.
+ * Get the index'th Unicode character from the String object. If index
+ * is out of range or it references a low surrogate preceded by a high
+ * surrogate, the result = -1;
*
* Results:
* Returns the index'th Unicode character in the Object.
@@ -472,13 +473,18 @@ Tcl_GetCharLength(
*----------------------------------------------------------------------
*/
-Tcl_UniChar
+int
Tcl_GetUniChar(
Tcl_Obj *objPtr, /* The object to get the Unicode charater
* from. */
int index) /* Get the index'th Unicode character. */
{
String *stringPtr;
+ int ch, length;
+
+ if (index < 0) {
+ return -1;
+ }
/*
* Optimize the case where we're really dealing with a bytearray object
@@ -486,9 +492,12 @@ Tcl_GetUniChar(
*/
if (TclIsPureByteArray(objPtr)) {
- unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, NULL);
+ unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length);
+ if (index >= length) {
+ return -1;
+ }
- return (Tcl_UniChar) bytes[index];
+ return (int) bytes[index];
}
/*
@@ -512,7 +521,26 @@ Tcl_GetUniChar(
FillUnicodeRep(objPtr);
stringPtr = GET_STRING(objPtr);
}
- return stringPtr->unicode[index];
+
+ if (index >= stringPtr->numChars) {
+ return -1;
+ }
+ ch = stringPtr->unicode[index];
+#if TCL_UTF_MAX <= 4
+ /* See: bug [11ae2be95dac9417] */
+ if ((ch&0xF800) == 0xD800) {
+ if (ch&0x400) {
+ if ((index > 0) && ((stringPtr->unicode[index-1]&0xFC00) == 0xD800)) {
+ ch = -1; /* low surrogate preceded by high surrogate */
+ }
+ } else if ((++index < stringPtr->numChars)
+ && ((stringPtr->unicode[index]&0xFC00) == 0xDC00)) {
+ /* high surrogate followed by low surrogate */
+ ch = (((ch & 0x3FF) << 10) | (stringPtr->unicode[index] & 0x3FF)) + 0x10000;
+ }
+ }
+#endif
+ return ch;
}
/*
@@ -612,15 +640,24 @@ Tcl_GetRange(
{
Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */
String *stringPtr;
+ int length;
+ if (first < 0) {
+ first = 0;
+ }
/*
* Optimize the case where we're really dealing with a bytearray object
* we don't need to convert to a string to perform the substring operation.
*/
if (TclIsPureByteArray(objPtr)) {
- unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, NULL);
-
+ unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length);
+ if (last >= length) {
+ last = length - 1;
+ }
+ if (last < first) {
+ return Tcl_NewObj();
+ }
return Tcl_NewByteArrayObj(bytes+first, last-first+1);
}
@@ -640,6 +677,12 @@ Tcl_GetRange(
TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length);
}
if (stringPtr->numChars == objPtr->length) {
+ if (last >= stringPtr->numChars) {
+ last = stringPtr->numChars - 1;
+ }
+ if (last < first) {
+ return Tcl_NewObj();
+ }
newObjPtr = Tcl_NewStringObj(objPtr->bytes + first, last-first+1);
/*
@@ -654,8 +697,13 @@ Tcl_GetRange(
FillUnicodeRep(objPtr);
stringPtr = GET_STRING(objPtr);
}
-
-#if TCL_UTF_MAX == 4
+ if (last > stringPtr->numChars) {
+ last = stringPtr->numChars;
+ }
+ if (last < first) {
+ return Tcl_NewObj();
+ }
+#if TCL_UTF_MAX <= 4
/* See: bug [11ae2be95dac9417] */
if ((first>0) && ((stringPtr->unicode[first]&0xFC00) == 0xDC00)
&& ((stringPtr->unicode[first-1]&0xFC00) == 0xD800)) {
@@ -1930,7 +1978,7 @@ Tcl_AppendFormatToObj(
}
break;
case 'c': {
- char buf[TCL_UTF_MAX];
+ char buf[4];
int code, length;
if (TclGetIntFromObj(interp, segment, &code) != TCL_OK) {