summaryrefslogtreecommitdiffstats
path: root/generic/tclStringObj.c
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2024-05-23 11:52:04 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2024-05-23 11:52:04 (GMT)
commit6bce591e1e7185c259f56514222025f0ac271dbe (patch)
tree1a28b831e72535079d3814d5b5a6400e1487f7cb /generic/tclStringObj.c
parent2e1531179ae7fd22d1b1d7eedeb50c2c95a4ee3d (diff)
downloadtcl-6bce591e1e7185c259f56514222025f0ac271dbe.zip
tcl-6bce591e1e7185c259f56514222025f0ac271dbe.tar.gz
tcl-6bce591e1e7185c259f56514222025f0ac271dbe.tar.bz2
Fix 2 warnings on Win32 (Thanks, Harald). Some more code cleanup, backported from 8.7)
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r--generic/tclStringObj.c125
1 files changed, 60 insertions, 65 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index dcff811..55315f2 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -1,34 +1,32 @@
/*
* tclStringObj.c --
*
- * This file contains functions that implement string operations on Tcl
- * objects. Some string operations work with UTF strings and others
- * require Unicode format. Functions that require knowledge of the width
- * of each character, such as indexing, operate on Unicode data.
- *
- * A Unicode string is an internationalized string. Conceptually, a
- * Unicode string is an array of 16-bit quantities organized as a
- * sequence of properly formed UTF-8 characters. There is a one-to-one
- * map between Unicode and UTF characters. Because Unicode characters
- * have a fixed width, operations such as indexing operate on Unicode
- * data. The String object is optimized for the case where each UTF char
+ * This file contains functions that implement string operations on Tcl
+ * objects. Some string operations work with UTF-8 encoding forms.
+ * Functions that require knowledge of the width of each character,
+ * such as indexing, operate on fixed width encoding forms such as UTF-16.
+ *
+ * Conceptually, a string is a sequence of Unicode code points. Internally
+ * it may be stored in an encoding form such as a modified version of
+ * UTF-8 or UTF-16.
+ *
+ * The String object is optimized for the case where each UTF char
* in a string is only one byte. In this case, we store the value of
- * numChars, but we don't store the Unicode data (unless Tcl_GetUnicode
- * is explicitly called).
+ * numChars, but we don't store the fixed form encoding (unless
+ * Tcl_GetUnicode is explicitly called).
*
- * The String object type stores one or both formats. The default
- * behavior is to store UTF. Once Unicode is calculated by a function, it
- * is stored in the internal rep for future access (without an additional
- * O(n) cost).
+ * The String object type stores one or both formats. The default
+ * behavior is to store UTF-8. Once UTF-16/UTF32 is calculated, it is
+ * stored in the internal rep for future access (without an additional
+ * O(n) cost).
*
* To allow many appends to be done to an object without constantly
- * reallocating the space for the string or Unicode representation, we
- * allocate double the space for the string or Unicode and use the
+ * reallocating space, we allocate double the space and use the
* internal representation to keep track of how much space is used vs.
* allocated.
*
* Copyright (c) 1995-1997 Sun Microsystems, Inc.
- * Copyright (c) 1999 by Scriptics Corporation.
+ * Copyright (c) 1999 Scriptics Corporation.
*
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
@@ -121,8 +119,8 @@ const Tcl_ObjType tclStringType = {
static void
GrowStringBuffer(
Tcl_Obj *objPtr,
- int needed,
- int flag)
+ int needed, /* Not including terminating nul */
+ int flag) /* If 0, try to overallocate */
{
/*
* Preconditions:
@@ -238,7 +236,7 @@ GrowUnicodeBuffer(
*
* Side effects:
* The new object's internal string representation will be set to a copy
- * of the length bytes starting at "bytes". If "length" is negative, use
+ * of the length bytes starting at "bytes". If "length" is -1, use
* bytes up to the first NUL byte; i.e., assume "bytes" points to a
* C-style NUL-terminated string. The object's type is set to NULL. An
* extra NUL is added to the end of the new object's byte array.
@@ -252,9 +250,9 @@ Tcl_Obj *
Tcl_NewStringObj(
const char *bytes, /* Points to the first of the length bytes
* used to initialize the new object. */
- int length) /* The number of bytes to copy from "bytes"
+ int length) /* The number of bytes to copy from "bytes"
* when initializing the new object. If
- * negative, use bytes up to the first NUL
+ * -1, use bytes up to the first NUL
* byte. */
{
return Tcl_DbNewStringObj(bytes, length, "unknown", 0);
@@ -265,7 +263,7 @@ Tcl_NewStringObj(
const char *bytes, /* Points to the first of the length bytes
* used to initialize the new object. */
int length) /* The number of bytes to copy from "bytes"
- * when initializing the new object. If negative,
+ * when initializing the new object. If -1,
* use bytes up to the first NUL byte. */
{
Tcl_Obj *objPtr;
@@ -299,7 +297,7 @@ Tcl_NewStringObj(
*
* Side effects:
* The new object's internal string representation will be set to a copy
- * of the length bytes starting at "bytes". If "length" is negative, use
+ * of the length bytes starting at "bytes". If "length" is -1, use
* bytes up to the first NUL byte; i.e., assume "bytes" points to a
* C-style NUL-terminated string. The object's type is set to NULL. An
* extra NUL is added to the end of the new object's byte array.
@@ -313,7 +311,7 @@ Tcl_DbNewStringObj(
const char *bytes, /* Points to the first of the length bytes
* used to initialize the new object. */
int length, /* The number of bytes to copy from "bytes"
- * when initializing the new object. If negative,
+ * when initializing the new object. If -1,
* use bytes up to the first NUL byte. */
const char *file, /* The name of the source file calling this
* function; used for debugging. */
@@ -334,10 +332,9 @@ Tcl_Obj *
Tcl_DbNewStringObj(
const char *bytes, /* Points to the first of the length bytes
* used to initialize the new object. */
- int length, /* The number of bytes to copy from "bytes"
- * when initializing the new object. If
- * negative, use bytes up to the first NUL
- * byte. */
+ int length, /* The number of bytes to copy from "bytes"
+ * when initializing the new object. If -1,
+ * use bytes up to the first NUL byte. */
const char *file, /* The name of the source file calling this
* function; used for debugging. */
int line) /* Line number in the source file; used for
@@ -415,7 +412,7 @@ Tcl_GetCharLength(
}
/*
- * Optimize the case where we're really dealing with a ByteArray object;
+ * Optimize the case where we're really dealing with a bytearray object;
* we don't need to convert to a string to perform the get-length operation.
*
* NOTE that we do not need the ByteArray to be "pure". A ByteArray value
@@ -468,7 +465,7 @@ Tcl_GetCharLength(
*----------------------------------------------------------------------
*/
int
-TclCheckEmptyString (
+TclCheckEmptyString(
Tcl_Obj *objPtr)
{
int length = -1;
@@ -723,9 +720,9 @@ Tcl_GetUnicodeFromObj(
*
* Create a Tcl Object that contains the chars between first and last of
* the object indicated by "objPtr". If the object is not already a
- * String object, convert it to one. If first is negative, the
+ * String object, convert it to one. If first is -1, the
* returned string start at the beginning of objPtr. If last is
- * negative, the returned string ends at the end of objPtr.
+ * -1, the returned string ends at the end of objPtr.
*
* Results:
* Returns a new Tcl Object of the String type.
@@ -751,7 +748,7 @@ Tcl_GetRange(
}
/*
- * Optimize the case where we're really dealing with a ByteArray object
+ * Optimize the case where we're really dealing with a bytearray object
* we don't need to convert to a string to perform the substring operation.
*/
@@ -805,7 +802,6 @@ Tcl_GetRange(
FillUnicodeRep(objPtr);
stringPtr = GET_STRING(objPtr);
}
-
if (last < 0 || last >= stringPtr->numChars) {
last = stringPtr->numChars - 1;
}
@@ -841,7 +837,7 @@ Tcl_GetRange(
*
* Side effects:
* The object's string representation will be set to a copy of the
- * "length" bytes starting at "bytes". If "length" is negative, use bytes
+ * "length" bytes starting at "bytes". If "length" is -1, use bytes
* up to the first NUL byte; i.e., assume "bytes" points to a C-style
* NUL-terminated string. The object's old string and internal
* representations are freed and the object's type is set NULL.
@@ -854,8 +850,8 @@ Tcl_SetStringObj(
Tcl_Obj *objPtr, /* Object whose internal rep to init. */
const char *bytes, /* Points to the first of the length bytes
* used to initialize the object. */
- int length) /* The number of bytes to copy from "bytes"
- * when initializing the object. If negative,
+ int length) /* The number of bytes to copy from "bytes"
+ * when initializing the object. If -1,
* use bytes up to the first NUL byte.*/
{
if (Tcl_IsShared(objPtr)) {
@@ -891,12 +887,11 @@ Tcl_SetStringObj(
* None.
*
* Side effects:
- * If the size of objPtr's string representation is greater than length,
- * then it is reduced to length and a new terminating null byte is stored
- * in the strength. If the length of the string representation is greater
- * than length, the storage space is reallocated to the given length; a
- * null byte is stored at the end, but other bytes past the end of the
- * original string representation are undefined.
+ * If the size of objPtr's string representation is greater than length, a
+ * new terminating null byte is stored in objPtr->bytes at length, and
+ * bytes at positions past length have no meaning. If the length of the
+ * string representation is greater than length, the storage space is
+ * reallocated to length+1.
*
* The object's internal representation is changed to &tclStringType.
*
@@ -907,7 +902,7 @@ void
Tcl_SetObjLength(
Tcl_Obj *objPtr, /* Pointer to object. This object must not
* currently be shared. */
- int length) /* Number of bytes desired for string
+ int length) /* Number of bytes desired for string
* representation of object, not including
* terminating null byte. */
{
@@ -1007,7 +1002,7 @@ int
Tcl_AttemptSetObjLength(
Tcl_Obj *objPtr, /* Pointer to object. This object must not
* currently be shared. */
- int length) /* Number of bytes desired for string
+ int length) /* Number of bytes desired for string
* representation of object, not including
* terminating null byte. */
{
@@ -1195,10 +1190,10 @@ Tcl_AppendLimitedToObj(
Tcl_Obj *objPtr, /* Points to the object to append to. */
const char *bytes, /* Points to the bytes to append to the
* object. */
- int length, /* The number of bytes available to be
- * appended from "bytes". If < 0, then all
- * bytes up to a NUL byte are available. */
- int limit, /* The maximum number of bytes to append to
+ int length, /* The number of bytes available to be
+ * appended from "bytes". If -1, then
+ * all bytes up to a NUL byte are available. */
+ int limit, /* The maximum number of bytes to append to
* the object. */
const char *ellipsis) /* Ellipsis marker string, appended to the
* object to indicate not all available bytes
@@ -1507,7 +1502,7 @@ static void
AppendUnicodeToUnicodeRep(
Tcl_Obj *objPtr, /* Points to the object to append to. */
const Tcl_UniChar *unicode, /* String to append. */
- int appendNumChars) /* Number of chars of "unicode" to append. */
+ int appendNumChars) /* Number of chars of "unicode" to append. */
{
String *stringPtr;
int numChars;
@@ -1596,7 +1591,7 @@ static void
AppendUnicodeToUtfRep(
Tcl_Obj *objPtr, /* Points to the object to append to. */
const Tcl_UniChar *unicode, /* String to convert to UTF. */
- int numChars) /* Number of chars of unicode to convert. */
+ int numChars) /* Number of chars of Unicode to convert. */
{
String *stringPtr = GET_STRING(objPtr);
@@ -1876,7 +1871,7 @@ Tcl_AppendFormatToObj(
if (Tcl_IsShared(appendObj)) {
Tcl_Panic("%s called with shared object", "Tcl_AppendFormatToObj");
}
- TclGetStringFromObj(appendObj, &originalLength);
+ (void)TclGetStringFromObj(appendObj, &originalLength);
limit = INT_MAX - originalLength;
/*
@@ -2347,7 +2342,7 @@ Tcl_AppendFormatToObj(
uw /= base;
}
#endif
- } else if (useBig && big.used) {
+ } else if (useBig && !mp_iszero(&big)) {
int leftover = (big.used * MP_DIGIT_BIT) % numBits;
mp_digit mask = (~(mp_digit)0) << (MP_DIGIT_BIT-leftover);
@@ -2380,13 +2375,13 @@ Tcl_AppendFormatToObj(
numDigits = 1;
}
TclNewObj(pure);
- Tcl_SetObjLength(pure, numDigits);
+ Tcl_SetObjLength(pure, (int)numDigits);
bytes = TclGetString(pure);
toAppend = length = numDigits;
while (numDigits--) {
int digitOffset;
- if (useBig && big.used) {
+ if (useBig && !mp_iszero(&big)) {
if (index < big.used && (size_t) shift <
CHAR_BIT*sizeof(Tcl_WideUInt) - MP_DIGIT_BIT) {
bits |= ((Tcl_WideUInt) big.dp[index++]) << shift;
@@ -2535,7 +2530,7 @@ Tcl_AppendFormatToObj(
}
}
- TclGetStringFromObj(segment, &segmentNumBytes);
+ (void)TclGetStringFromObj(segment, &segmentNumBytes);
if (segmentNumBytes > limit) {
if (allocSegment) {
Tcl_DecrRefCount(segment);
@@ -2878,9 +2873,9 @@ TclGetStringStorage(
* Implements the [string reverse] operation.
*
* Results:
- * An unshared Tcl value which is the [string reverse] of the argument
- * supplied. When sharing rules permit, the returned value might be the
- * argument with modifications done in place.
+ * A Tcl value which is the [string reverse] of the argument supplied.
+ * When sharing rules permit and the caller requests, the returned value
+ * might be the argument with modifications done in place.
*
* Side effects:
* May allocate a new Tcl_Obj.
@@ -2892,7 +2887,7 @@ static void
ReverseBytes(
unsigned char *to, /* Copy bytes into here... */
unsigned char *from, /* ...from here... */
- int count) /* Until this many are copied, */
+ int count) /* Until this many are copied, */
/* reversing as you go. */
{
unsigned char *src = from + count;
@@ -3109,7 +3104,7 @@ ExtendUnicodeRepWithString(
} else {
numAppendChars = 0;
}
- for (dst=stringPtr->unicode + numOrigChars; numAppendChars-- > 0; dst++) {
+ for (dst = stringPtr->unicode + numOrigChars; numAppendChars-- > 0; dst++) {
bytes += TclUtfToUniChar(bytes, &unichar);
*dst = unichar;
}