summaryrefslogtreecommitdiffstats
path: root/generic/tclStringObj.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r--generic/tclStringObj.c279
1 files changed, 157 insertions, 122 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 42c4514..f9ac8d7 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -436,7 +436,6 @@ Tcl_GetCharLength(
return length;
}
-
/*
* OK, need to work with the object as a string.
*/
@@ -455,8 +454,6 @@ Tcl_GetCharLength(
}
return numChars;
}
-
-
/*
*----------------------------------------------------------------------
@@ -475,9 +472,9 @@ Tcl_GetCharLength(
*----------------------------------------------------------------------
*/
int
-TclCheckEmptyString (
- Tcl_Obj *objPtr
-) {
+TclCheckEmptyString(
+ Tcl_Obj *objPtr)
+{
int length = -1;
if (objPtr->bytes == &tclEmptyString) {
@@ -570,20 +567,22 @@ Tcl_GetUniChar(
if (index >= stringPtr->numChars) {
return -1;
}
- ch = stringPtr->unicode[index];
+ ch = stringPtr->unicode[index];
#if TCL_UTF_MAX <= 4
- /* See: bug [11ae2be95dac9417] */
- if ((ch&0xF800) == 0xD800) {
- if (ch&0x400) {
- if ((index > 0) && ((stringPtr->unicode[index-1]&0xFC00) == 0xD800)) {
- ch = -1; /* low surrogate preceded by high surrogate */
- }
- } else if ((++index < stringPtr->numChars)
- && ((stringPtr->unicode[index]&0xFC00) == 0xDC00)) {
- /* high surrogate followed by low surrogate */
- ch = (((ch & 0x3FF) << 10) | (stringPtr->unicode[index] & 0x3FF)) + 0x10000;
+ /* See: bug [11ae2be95dac9417] */
+ if ((ch & 0xF800) == 0xD800) {
+ if (ch & 0x400) {
+ if ((index > 0)
+ && ((stringPtr->unicode[index-1] & 0xFC00) == 0xD800)) {
+ ch = -1; /* low surrogate preceded by high surrogate */
}
+ } else if ((++index < stringPtr->numChars)
+ && ((stringPtr->unicode[index] & 0xFC00) == 0xDC00)) {
+ /* high surrogate followed by low surrogate */
+ ch = (((ch & 0x3FF) << 10) |
+ (stringPtr->unicode[index] & 0x3FF)) + 0x10000;
}
+ }
#endif
return ch;
}
@@ -689,7 +688,8 @@ Tcl_GetRange(
if (first < 0) {
first = 0;
- }
+ }
+
/*
* Optimize the case where we're really dealing with a bytearray object
* we don't need to convert to a string to perform the substring operation.
@@ -697,13 +697,14 @@ Tcl_GetRange(
if (TclIsPureByteArray(objPtr)) {
unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length);
+
if (last >= length) {
last = length - 1;
}
if (last < first) {
return Tcl_NewObj();
}
- return Tcl_NewByteArrayObj(bytes+first, last-first+1);
+ return Tcl_NewByteArrayObj(bytes + first, last - first + 1);
}
/*
@@ -742,24 +743,25 @@ Tcl_GetRange(
FillUnicodeRep(objPtr);
stringPtr = GET_STRING(objPtr);
}
- if (last > stringPtr->numChars) {
- last = stringPtr->numChars;
- }
- if (last < first) {
- return Tcl_NewObj();
- }
+ if (last > stringPtr->numChars) {
+ last = stringPtr->numChars;
+ }
+ if (last < first) {
+ return Tcl_NewObj();
+ }
#if TCL_UTF_MAX <= 4
- /* See: bug [11ae2be95dac9417] */
- if ((first>0) && ((stringPtr->unicode[first]&0xFC00) == 0xDC00)
- && ((stringPtr->unicode[first-1]&0xFC00) == 0xD800)) {
- ++first;
- }
- if ((last+1<stringPtr->numChars) && ((stringPtr->unicode[last+1]&0xFC00) == 0xDC00)
- && ((stringPtr->unicode[last]&0xFC00) == 0xD800)) {
- ++last;
- }
+ /* See: bug [11ae2be95dac9417] */
+ if ((first > 0) && ((stringPtr->unicode[first] & 0xFC00) == 0xDC00)
+ && ((stringPtr->unicode[first-1] & 0xFC00) == 0xD800)) {
+ ++first;
+ }
+ if ((last + 1 < stringPtr->numChars)
+ && ((stringPtr->unicode[last+1] & 0xFC00) == 0xDC00)
+ && ((stringPtr->unicode[last] & 0xFC00) == 0xD800)) {
+ ++last;
+ }
#endif
- return Tcl_NewUnicodeObj(stringPtr->unicode + first, last-first+1);
+ return Tcl_NewUnicodeObj(stringPtr->unicode + first, last - first + 1);
}
/*
@@ -1317,45 +1319,51 @@ Tcl_AppendObjToObj(
/*
* Handle append of one bytearray object to another as a special case.
* Note that we only do this when the objects are pure so that the
- * bytearray faithfully represent the true value; Otherwise
- * appending the byte arrays together could lose information;
+ * bytearray faithfully represent the true value; Otherwise appending the
+ * byte arrays together could lose information;
*/
if ((TclIsPureByteArray(objPtr) || objPtr->bytes == &tclEmptyString)
&& TclIsPureByteArray(appendObjPtr)) {
-
/*
* You might expect the code here to be
*
* bytes = Tcl_GetByteArrayFromObj(appendObjPtr, &length);
* TclAppendBytesToByteArray(objPtr, bytes, length);
*
- * and essentially all of the time that would be fine. However,
- * it would run into trouble in the case where objPtr and
- * appendObjPtr point to the same thing. That may never be a
- * good idea. It seems to violate Copy On Write, and we don't
- * have any tests for the situation, since making any Tcl commands
- * that call Tcl_AppendObjToObj() do that appears impossible
- * (They honor Copy On Write!). For the sake of extensions that
- * go off into that realm, though, here's a more complex approach
- * that can handle all the cases.
+ * and essentially all of the time that would be fine. However, it
+ * would run into trouble in the case where objPtr and appendObjPtr
+ * point to the same thing. That may never be a good idea. It seems to
+ * violate Copy On Write, and we don't have any tests for the
+ * situation, since making any Tcl commands that call
+ * Tcl_AppendObjToObj() do that appears impossible (They honor Copy On
+ * Write!). For the sake of extensions that go off into that realm,
+ * though, here's a more complex approach that can handle all the
+ * cases.
+ *
+ * First, get the lengths.
*/
- /* Get lengths */
int lengthSrc;
(void) Tcl_GetByteArrayFromObj(objPtr, &length);
(void) Tcl_GetByteArrayFromObj(appendObjPtr, &lengthSrc);
- /* Grow buffer enough for the append */
+ /*
+ * Grow buffer enough for the append.
+ */
+
TclAppendBytesToByteArray(objPtr, NULL, lengthSrc);
- /* Reset objPtr back to the original value */
+ /*
+ * Reset objPtr back to the original value.
+ */
+
Tcl_SetByteArrayLength(objPtr, length);
/*
- * Now do the append knowing that buffer growth cannot cause
- * any trouble.
+ * Now do the append knowing that buffer growth cannot cause any
+ * trouble.
*/
TclAppendBytesToByteArray(objPtr,
@@ -1403,6 +1411,7 @@ Tcl_AppendObjToObj(
numChars = stringPtr->numChars;
if ((numChars >= 0) && (appendObjPtr->typePtr == &tclStringType)) {
String *appendStringPtr = GET_STRING(appendObjPtr);
+
appendNumChars = appendStringPtr->numChars;
}
@@ -1988,7 +1997,8 @@ Tcl_AppendFormatToObj(
format += step;
step = TclUtfToUniChar(format, &ch);
}
- } else if ((ch == 't') || (ch == 'z') || (ch == 'q') || (ch == 'j') || (ch == 'L')) {
+ } else if ((ch == 't') || (ch == 'z') || (ch == 'q') || (ch == 'j')
+ || (ch == 'L')) {
format += step;
step = TclUtfToUniChar(format, &ch);
useBig = 1;
@@ -2513,7 +2523,7 @@ Tcl_AppendFormatToObj(
/*
*---------------------------------------------------------------------------
*
- * Tcl_Format--
+ * Tcl_Format --
*
* Results:
* A refcount zero Tcl_Obj.
@@ -2904,7 +2914,10 @@ TclStringRepeat(
Tcl_GetByteArrayFromObj(objResultPtr, NULL),
(count - done) * length);
} else if (unichar) {
- /* Efficiently produce a pure Tcl_UniChar array result */
+ /*
+ * Efficiently produce a pure Tcl_UniChar array result.
+ */
+
if (!inPlace || Tcl_IsShared(objPtr)) {
objResultPtr = Tcl_NewUnicodeObj(Tcl_GetUnicode(objPtr), length);
} else {
@@ -2930,7 +2943,10 @@ TclStringRepeat(
Tcl_AppendUnicodeToObj(objResultPtr, Tcl_GetUnicode(objResultPtr),
(count - done) * length);
} else {
- /* Efficiently concatenate string reps */
+ /*
+ * Efficiently concatenate string reps.
+ */
+
if (!inPlace || Tcl_IsShared(objPtr)) {
objResultPtr = Tcl_NewStringObj(Tcl_GetString(objPtr), length);
} else {
@@ -3014,9 +3030,10 @@ TclStringCat(
/* Value has a string rep. */
if (objPtr->length) {
/*
- * Non-empty string rep. Not a pure bytearray, so we
- * won't create a pure bytearray
+ * Non-empty string rep. Not a pure bytearray, so we won't
+ * create a pure bytearray.
*/
+
binary = 0;
if ((objPtr->typePtr) && (objPtr->typePtr != &tclStringType)) {
/* Prevent shimmer of non-string types. */
@@ -3037,8 +3054,12 @@ TclStringCat(
} while (--oc && (binary || allowUniChar));
if (binary) {
- /* Result will be pure byte array. Pre-size it */
- ov = objv; oc = objc;
+ /*
+ * Result will be pure byte array. Pre-size it
+ */
+
+ ov = objv;
+ oc = objc;
do {
Tcl_Obj *objPtr = *ov++;
@@ -3058,8 +3079,12 @@ TclStringCat(
}
} while (--oc);
} else if (allowUniChar && requestUniChar) {
- /* Result will be pure Tcl_UniChar array. Pre-size it. */
- ov = objv; oc = objc;
+ /*
+ * Result will be pure Tcl_UniChar array. Pre-size it.
+ */
+
+ ov = objv;
+ oc = objc;
do {
Tcl_Obj *objPtr = *ov++;
@@ -3104,9 +3129,9 @@ TclStringCat(
} while (--oc && (length == 0) && (pendingPtr == NULL));
/*
- * Either we found a possibly non-empty value, and we
- * remember this index as the first and last such value so
- * far seen, or (oc == 0) and all values are known empty,
+ * Either we found a possibly non-empty value, and we remember
+ * this index as the first and last such value so far seen,
+ * or (oc == 0) and all values are known empty,
* so first = last = objc - 1 signals the right quick return.
*/
@@ -3118,10 +3143,9 @@ TclStringCat(
/* assert ( pendingPtr != NULL ) */
/*
- * There's a pending value followed by more values.
- * Loop over remaining values generating strings until
- * a non-empty value is found, or the pending value gets
- * its string generated.
+ * There's a pending value followed by more values. Loop over
+ * remaining values generating strings until a non-empty value
+ * is found, or the pending value gets its string generated.
*/
do {
@@ -3177,10 +3201,10 @@ TclStringCat(
unsigned char *dst;
/*
- * Broken interface! Byte array value routines offer no way
- * to handle failure to allocate enough space. Following
- * stanza may panic.
+ * Broken interface! Byte array value routines offer no way to handle
+ * failure to allocate enough space. Following stanza may panic.
*/
+
if (inPlace && !Tcl_IsShared(*objv)) {
int start;
@@ -3295,6 +3319,7 @@ TclStringCat(
if ((objPtr->bytes == NULL) || (objPtr->length)) {
int more;
char *src = Tcl_GetStringFromObj(objPtr, &more);
+
memcpy(dst, src, (size_t) more);
dst += more;
}
@@ -3320,7 +3345,7 @@ TclStringCat(
* Compare two Tcl_Obj values as strings.
*
* Results:
- * Like memcmp, return -1, 0, or 1.
+ * Like memcmp, return -1, 0, or 1.
*
* Side effects:
* String representations may be generated. Internal representation may
@@ -3329,13 +3354,13 @@ TclStringCat(
*---------------------------------------------------------------------------
*/
-int TclStringCmp (
- Tcl_Obj *value1Ptr,
- Tcl_Obj *value2Ptr,
- int checkEq, /* comparison is only for equality */
- int nocase, /* comparison is not case sensitive */
- int reqlength /* requested length */
-) {
+int TclStringCmp(
+ Tcl_Obj *value1Ptr,
+ Tcl_Obj *value2Ptr,
+ int checkEq, /* comparison is only for equality */
+ int nocase, /* comparison is not case sensitive */
+ int reqlength) /* requested length */
+{
char *s1, *s2;
int empty, length, match, s1len, s2len;
memCmpFn_t memCmpFn;
@@ -3355,6 +3380,7 @@ int TclStringCmp (
* case-sensitive (which is all that really makes sense with byte
* arrays anyway, and we have no memcasecmp() for some reason... :^)
*/
+
s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len);
s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len);
memCmpFn = memcmp;
@@ -3402,31 +3428,31 @@ int TclStringCmp (
} else {
if ((empty = TclCheckEmptyString(value1Ptr)) > 0) {
switch (TclCheckEmptyString(value2Ptr)) {
- case -1:
+ case -1:
s1 = 0;
s1len = 0;
s2 = TclGetStringFromObj(value2Ptr, &s2len);
break;
- case 0:
+ case 0:
match = -1;
goto matchdone;
- case 1:
- default: /* avoid warn: `s2` may be used uninitialized */
+ case 1:
+ default: /* avoid warn: `s2` may be used uninitialized */
match = 0;
goto matchdone;
}
} else if (TclCheckEmptyString(value2Ptr) > 0) {
switch (empty) {
- case -1:
+ case -1:
s2 = 0;
s2len = 0;
s1 = TclGetStringFromObj(value1Ptr, &s1len);
break;
- case 0:
+ case 0:
match = 1;
goto matchdone;
- case 1:
- default: /* avoid warn: `s1` may be used uninitialized */
+ case 1:
+ default: /* avoid warn: `s1` may be used uninitialized */
match = 0;
goto matchdone;
}
@@ -3436,18 +3462,19 @@ int TclStringCmp (
}
if (!nocase && checkEq) {
/*
- * When we have equal-length we can check only for (in)equality.
- * We can use memcmp in all (n)eq cases because we
- * don't need to worry about lexical LE/BE variance.
+ * When we have equal-length we can check only for
+ * (in)equality. We can use memcmp in all (n)eq cases because
+ * we don't need to worry about lexical LE/BE variance.
*/
+
memCmpFn = memcmp;
} else {
-
/*
- * As a catch-all we will work with UTF-8. We cannot use memcmp() as
- * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's
- * utf rep). We can use the more efficient TclpUtfNcmp2 if we are
- * case-sensitive and no specific length was requested.
+ * As a catch-all we will work with UTF-8. We cannot use
+ * memcmp() as that is unsafe with any string containing NUL
+ * (\xC0\x80 in Tcl's utf rep). We can use the more efficient
+ * TclpUtfNcmp2 if we are case-sensitive and no specific
+ * length was requested.
*/
if ((reqlength < 0) && !nocase) {
@@ -3455,7 +3482,8 @@ int TclStringCmp (
} else {
s1len = Tcl_NumUtfChars(s1, s1len);
s2len = Tcl_NumUtfChars(s2, s2len);
- memCmpFn = (memCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp);
+ memCmpFn = (memCmpFn_t)
+ (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp);
}
}
}
@@ -3465,8 +3493,8 @@ int TclStringCmp (
length = reqlength;
} else if (reqlength < 0) {
/*
- * The requested length is negative, so we ignore it by setting it to
- * length + 1 so we correct the match var.
+ * The requested length is negative, so we ignore it by setting it
+ * to length + 1 so we correct the match var.
*/
reqlength = length + 1;
@@ -3474,11 +3502,12 @@ int TclStringCmp (
if (checkEq && (s1len != s2len)) {
match = 1; /* This will be reversed below. */
- } else {
+ } else {
/*
- * The comparison function should compare up to the minimum
- * byte length only.
+ * The comparison function should compare up to the minimum byte
+ * length only.
*/
+
match = memCmpFn(s1, s2, (size_t) length);
}
if ((match == 0) && (reqlength > length)) {
@@ -3486,7 +3515,7 @@ int TclStringCmp (
}
match = (match > 0) ? 1 : (match < 0) ? -1 : 0;
}
- matchdone:
+ matchdone:
return match;
}
@@ -3684,18 +3713,20 @@ static void
ReverseBytes(
unsigned char *to, /* Copy bytes into here... */
unsigned char *from, /* ...from here... */
- int count) /* Until this many are copied, */
+ int count) /* Until this many are copied, */
/* reversing as you go. */
{
unsigned char *src = from + count;
+
if (to == from) {
/* Reversing in place */
while (--src > to) {
unsigned char c = *src;
+
*src = *to;
*to++ = c;
}
- } else {
+ } else {
while (--src >= from) {
*to++ = *src;
}
@@ -3744,7 +3775,10 @@ TclStringReverse(
*to++ = *src;
}
} else {
- /* Reversing in place */
+ /*
+ * Reversing in place.
+ */
+
while (--src > from) {
ch = *src;
*src = *from;
@@ -3768,20 +3802,22 @@ TclStringReverse(
/*
* Either numChars == -1 and we don't know how many chars are
* represented by objPtr->bytes and we need Pass 1 just in case,
- * or numChars >= 0 and we know we have fewer chars than bytes,
- * so we know there's a multibyte character needing Pass 1.
+ * or numChars >= 0 and we know we have fewer chars than bytes, so
+ * we know there's a multibyte character needing Pass 1.
*
* Pass 1. Reverse the bytes of each multi-byte character.
*/
+
int charCount = 0;
int bytesLeft = numBytes;
while (bytesLeft) {
/*
- * NOTE: We know that the from buffer is NUL-terminated.
- * It's part of the contract for objPtr->bytes values.
- * Thus, we can skip calling Tcl_UtfCharComplete() here.
+ * NOTE: We know that the from buffer is NUL-terminated. It's
+ * part of the contract for objPtr->bytes values. Thus, we can
+ * skip calling Tcl_UtfCharComplete() here.
*/
+
int bytesInChar = TclUtfToUniChar(from, &ch);
ReverseBytes((unsigned char *)to, (unsigned char *)from,
@@ -3811,19 +3847,18 @@ TclStringReverse(
*
* The result is a concatenation of a prefix from objPtr, characters
* 0 through first-1, the insertPtr string value, and a suffix from
- * objPtr, characters from first + count to the end. The effect is
- * as if the inner substring of characters first through first+count-1
- * are removed and replaced with insertPtr.
- * If insertPtr is NULL, it is treated as an empty string.
- * When passed the flag TCL_STRING_IN_PLACE, this routine will try
- * to do the work within objPtr, so long as no sharing forbids it.
- * Without that request, or as needed, a new Tcl value will be allocated
- * to be the result.
+ * objPtr, characters from first + count to the end. The effect is as if
+ * the inner substring of characters first through first+count-1 are
+ * removed and replaced with insertPtr. If insertPtr is NULL, it is
+ * treated as an empty string. When passed the flag TCL_STRING_IN_PLACE,
+ * this routine will try to do the work within objPtr, so long as no
+ * sharing forbids it. Without that request, or as needed, a new Tcl
+ * value will be allocated to be the result.
*
* Results:
- * A Tcl value that is the result of the substring replacement.
- * May return NULL in case of an error. When NULL is returned and
- * interp is non-NULL, error information is left in interp
+ * A Tcl value that is the result of the substring replacement. May
+ * return NULL in case of an error. When NULL is returned and interp is
+ * non-NULL, error information is left in interp
*
*---------------------------------------------------------------------------
*/