From 694c4505cd26d54934847d0e5442ae3d5dca36fd Mon Sep 17 00:00:00 2001
From: apnadkarni <apnmbx-wits@yahoo.com>
Date: Tue, 2 May 2023 05:38:04 +0000
Subject: Change realloc strategy on fail, optimize for empty string

---
 generic/tclListObj.c | 88 +++++++++++++++++++++++++---------------------------
 1 file changed, 43 insertions(+), 45 deletions(-)

diff --git a/generic/tclListObj.c b/generic/tclListObj.c
index 87530be..2475c8e 100644
--- a/generic/tclListObj.c
+++ b/generic/tclListObj.c
@@ -40,7 +40,7 @@
 
 #ifdef ENABLE_LIST_ASSERTS
 
-#define LIST_ASSERT(cond_) assert(cond_) /* TODO - is there a Tcl-specific one? */
+#define LIST_ASSERT(cond_) assert(cond_)
 /*
  * LIST_INDEX_ASSERT is to catch errors with negative indices and counts
  * being passed AFTER validation. On Tcl9 length types are unsigned hence
@@ -68,9 +68,7 @@
 #endif
 
 /* Checks for when caller should have already converted to internal list type */
-#define LIST_ASSERT_TYPE(listObj_) \
-    LIST_ASSERT((listObj_)->typePtr == &tclListType.objType);
-
+#define LIST_ASSERT_TYPE(listObj_) LIST_ASSERT(INTREP_IS_LISTREP(listObj_))
 
 /*
  * If ENABLE_LIST_INVARIANTS is enabled (-DENABLE_LIST_INVARIANTS from the
@@ -305,12 +303,12 @@ ListSpanMerited(
     Tcl_Size allocatedStorageLength) /* Length of the currently allocation */
 {
     /*
-     TODO
-     - heuristics thresholds need to be determined
-     - currently, information about the sharing (ref count) of existing
-       storage is not passed. Perhaps it should be. For example if the
-       existing storage has a "large" ref count, then it might make sense
-       to do even a small span.
+     * Possible optimizations for future consideration 
+     * - heuristic LIST_SPAN_THRESHOLD 
+     * - currently, information about the sharing (ref count) of existing
+     * storage is not passed. Perhaps it should be. For example if the
+     * existing storage has a "large" ref count, then it might make sense
+     * to do even a small span.
      */
 
     if (length < LIST_SPAN_THRESHOLD) {
@@ -771,14 +769,16 @@ ListStoreNew(
     }
 
     if (flags & LISTREP_SPACE_FLAGS) {
+	/* Caller requests extra space front, back or both */
 	capacity = ListStoreUpSize(objc);
     } else {
 	capacity = objc;
     }
 
     storePtr = (ListStore *)Tcl_AttemptAlloc(LIST_SIZE(capacity));
-    if (storePtr == NULL && capacity != objc) {
-	capacity = objc; /* Try allocating exact size */
+    while (storePtr == NULL && (capacity > (objc+1))) {
+	/* Because of loop condition capacity won't overflow */
+	capacity = objc + ((capacity - objc) / 2);
 	storePtr = (ListStore *)Tcl_AttemptAlloc(LIST_SIZE(capacity));
     }
     if (storePtr == NULL) {
@@ -827,7 +827,8 @@ ListStoreNew(
  *
  * ListStoreReallocate --
  *
- *    Reallocates the memory for a ListStore.
+ *    Reallocates the memory for a ListStore allocating extra for 
+ *    possible future growth.
  *
  * Results:
  *    Pointer to the ListStore which may be the same as storePtr or pointer
@@ -856,7 +857,7 @@ ListStoreReallocate (ListStore *storePtr, Tcl_Size numSlots)
      * by half every time.
      */
     while (newStorePtr == NULL && (newCapacity > (numSlots+1))) {
-	/* Because of loop condition newCapacity can't overflow */
+	/* Because of loop condition newCapacity won't overflow */
 	newCapacity = numSlots + ((newCapacity - numSlots) / 2);
 	newStorePtr =
 	    (ListStore *)Tcl_AttemptRealloc(storePtr, LIST_SIZE(newCapacity));
@@ -1960,19 +1961,18 @@ int
 Tcl_ListObjIndex(
     Tcl_Interp *interp,  /* Used to report errors if not NULL. */
     Tcl_Obj *listObj,    /* List object to index into. */
-    Tcl_Size index,           /* Index of element to return. */
+    Tcl_Size index,      /* Index of element to return. */
     Tcl_Obj **objPtrPtr) /* The resulting Tcl_Obj* is stored here. */
 {
     Tcl_Obj **elemObjs;
     Tcl_Size numElems;
 
-    /*
-     * TODO
-     * Unlike the original list code, this does not optimize for lindex'ing
-     * an empty string when the internal rep is not already a list. On the
-     * other hand, this code will be faster for the case where the object
-     * is currently a dict. Benchmark the two cases.
-     */
+    /* Empty string => empty list. Avoid unnecessary shimmering */
+    if (listObj->bytes == &tclEmptyString) {
+	*objPtrPtr = NULL;
+	return TCL_OK;
+    }
+
     if (TclListObjGetElementsM(interp, listObj, &numElems, &elemObjs)
 	!= TCL_OK) {
 	return TCL_ERROR;
@@ -2017,19 +2017,19 @@ Tcl_ListObjLength(
 {
     ListRep listRep;
 
+    /* Empty string => empty list. Avoid unnecessary shimmering */
+    if (listObj->bytes == &tclEmptyString) {
+	*lenPtr = 0;
+	return TCL_OK;
+    }
+
     Tcl_Size (*lengthProc)(Tcl_Obj *obj) =  ABSTRACTLIST_PROC(listObj, lengthProc);
     if (lengthProc) {
 	*lenPtr = lengthProc(listObj);
 	return TCL_OK;
     }
 
-    /*
-     * TODO
-     * Unlike the original list code, this does not optimize for lindex'ing
-     * an empty string when the internal rep is not already a list. On the
-     * other hand, this code will be faster for the case where the object
-     * is currently a dict. Benchmark the two cases.
-     */
+
     if (TclListObjGetRep(interp, listObj, &listRep) != TCL_OK) {
 	return TCL_ERROR;
     }
@@ -2094,12 +2094,12 @@ Tcl_ListObjReplace(
 {
     ListRep listRep;
     Tcl_Size origListLen;
-    ptrdiff_t lenChange;
-    ptrdiff_t leadSegmentLen;
-    ptrdiff_t tailSegmentLen;
+    Tcl_Size lenChange;
+    Tcl_Size leadSegmentLen;
+    Tcl_Size tailSegmentLen;
     Tcl_Size numFreeSlots;
-    ptrdiff_t leadShift;
-    ptrdiff_t tailShift;
+    Tcl_Size leadShift;
+    Tcl_Size tailShift;
     Tcl_Obj **listObjs;
     int favor;
 
@@ -2110,8 +2110,6 @@ Tcl_ListObjReplace(
     if (TclListObjGetRep(interp, listObj, &listRep) != TCL_OK)
 	return TCL_ERROR; /* Cannot be converted to a list */
 
-    /* TODO - will need modification if Tcl9 sticks to unsigned indices */
-
     /* Make limits sane */
     origListLen = ListRepLength(&listRep);
     if (first < 0) {
@@ -2260,7 +2258,7 @@ Tcl_ListObjReplace(
      * be an explicit alloc and memmove which would let us redistribute
      * free space.
      */
-    if ((ptrdiff_t)numFreeSlots < lenChange && !ListRepIsShared(&listRep)) {
+    if (numFreeSlots < lenChange && !ListRepIsShared(&listRep)) {
 	/* T:listrep-1.{1,3,14,18,21},3.{3,10,11,14,27,32,41} */
 	ListStore *newStorePtr =
 	    ListStoreReallocate(listRep.storePtr, origListLen + lenChange);
@@ -2287,7 +2285,7 @@ Tcl_ListObjReplace(
      * TODO - for unshared case ONLY, consider a "move" based implementation
      */
     if (ListRepIsShared(&listRep) || /* 3a */
-	(ptrdiff_t)numFreeSlots < lenChange ||  /* 3b */
+	numFreeSlots < lenChange ||  /* 3b */
 	(origListLen + lenChange) < (listRep.storePtr->numAllocated / 4) /* 3c */
     ) {
 	ListRep newRep;
@@ -2402,9 +2400,9 @@ Tcl_ListObjReplace(
 	 * or need to shift both. In the former case, favor shifting the
 	 * smaller segment.
 	 */
-	ptrdiff_t leadSpace = ListRepNumFreeHead(&listRep);
-	ptrdiff_t tailSpace = ListRepNumFreeTail(&listRep);
-	ptrdiff_t finalFreeSpace = leadSpace + tailSpace - lenChange;
+	Tcl_Size leadSpace = ListRepNumFreeHead(&listRep);
+	Tcl_Size tailSpace = ListRepNumFreeTail(&listRep);
+	Tcl_Size finalFreeSpace = leadSpace + tailSpace - lenChange;
 
 	LIST_ASSERT((leadSpace + tailSpace) >= lenChange);
 	if (leadSpace >= lenChange
@@ -2421,7 +2419,7 @@ Tcl_ListObjReplace(
 	     * insertions.
 	     */
 	    if (finalFreeSpace > 1 && (tailSpace == 0 || tailSegmentLen == 0)) {
-		ptrdiff_t postShiftLeadSpace = leadSpace - lenChange;
+		Tcl_Size postShiftLeadSpace = leadSpace - lenChange;
 		if (postShiftLeadSpace > (finalFreeSpace/2)) {
 		    Tcl_Size extraShift = postShiftLeadSpace - (finalFreeSpace / 2);
 		    leadShift -= extraShift;
@@ -2438,7 +2436,7 @@ Tcl_ListObjReplace(
 	     * See comments above. This is analogous.
 	     */
 	    if (finalFreeSpace > 1 && (leadSpace == 0 || leadSegmentLen == 0)) {
-		ptrdiff_t postShiftTailSpace = tailSpace - lenChange;
+		Tcl_Size postShiftTailSpace = tailSpace - lenChange;
 		if (postShiftTailSpace > (finalFreeSpace/2)) {
 		    /* T:listrep-1.{1,3,14,18,21},3.{2,3,26,27} */
 		    Tcl_Size extraShift = postShiftTailSpace - (finalFreeSpace / 2);
@@ -2613,7 +2611,7 @@ TclLindexList(
 	/*
 	 * The argument is neither an index nor a well-formed list.
 	 * Report the error via TclLindexFlat.
-	 * TODO - This is as original. why not directly return an error?
+	 * TODO - This is as original code. why not directly return an error?
 	 */
 	return TclLindexFlat(interp, listObj, 1, &argObj);
     }
@@ -3557,7 +3555,7 @@ TclListTestObj(size_t length, size_t leadingSpace, size_t endSpace)
 	return NULL;
     }
 
-    ListRepInit(capacity, NULL, 0, &listRep);
+    ListRepInit(capacity, NULL, LISTREP_PANIC_ON_FAIL, &listRep);
 
     ListStore *storePtr = listRep.storePtr;
     size_t i;
-- 
cgit v0.12


From 1f5d09b497ec683bca02b1b78b8ada732b43dda9 Mon Sep 17 00:00:00 2001
From: apnadkarni <apnmbx-wits@yahoo.com>
Date: Tue, 2 May 2023 11:50:03 +0000
Subject: Fix LISTREP_ASSERT

---
 generic/tclListObj.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/generic/tclListObj.c b/generic/tclListObj.c
index 2475c8e..726b8dd 100644
--- a/generic/tclListObj.c
+++ b/generic/tclListObj.c
@@ -68,7 +68,8 @@
 #endif
 
 /* Checks for when caller should have already converted to internal list type */
-#define LIST_ASSERT_TYPE(listObj_) LIST_ASSERT(INTREP_IS_LISTREP(listObj_))
+#define LIST_ASSERT_TYPE(listObj_) \
+    LIST_ASSERT(TclHasInternalRep((listObj_), &tclListType.objType))
 
 /*
  * If ENABLE_LIST_INVARIANTS is enabled (-DENABLE_LIST_INVARIANTS from the
-- 
cgit v0.12


From 88760a8a81789306d3f2b9986b8f9824a4e6d08d Mon Sep 17 00:00:00 2001
From: pooryorick <com.digitalsmarties@pooryorick.com>
Date: Wed, 3 May 2023 09:31:08 +0000
Subject: Use Tcl_Size instead of ptrdiff_t in Tcl_ObjCmdProc2(),
 Tcl_CmdObjTraceProc2, and Tcl_MethodCallProc2().

---
 doc/CrtObjCmd.3    | 2 +-
 doc/CrtTrace.3     | 4 ++--
 generic/tcl.h      | 4 ++--
 generic/tclOO.h    | 2 +-
 unix/dltest/pkgt.c | 8 ++++----
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/doc/CrtObjCmd.3 b/doc/CrtObjCmd.3
index 7ba71eb..4bdde44 100644
--- a/doc/CrtObjCmd.3
+++ b/doc/CrtObjCmd.3
@@ -187,7 +187,7 @@ except its \fIproc2\fR argument is of type \fBTcl_ObjCmdProc2\fR.
 typedef int \fBTcl_ObjCmdProc2\fR(
         void *\fIclientData\fR,
         Tcl_Interp *\fIinterp\fR,
-        ptrdiff_t \fIobjc\fR,
+        Tcl_Size \fIobjc\fR,
         Tcl_Obj *const \fIobjv\fR[]);
 .CE
 .PP
diff --git a/doc/CrtTrace.3 b/doc/CrtTrace.3
index cfd3303..9f74cbf 100644
--- a/doc/CrtTrace.3
+++ b/doc/CrtTrace.3
@@ -88,10 +88,10 @@ typedef int \fBTcl_CmdObjTraceProc\fR(
 typedef int \fBTcl_CmdObjTraceProc2\fR(
         \fBvoid *\fR \fIclientData\fR,
         \fBTcl_Interp\fR* \fIinterp\fR,
-        ptrdiff_t \fIlevel\fR,
+        Tcl_Size \fIlevel\fR,
         const char *\fIcommand\fR,
         \fBTcl_Command\fR \fIcommandToken\fR,
-        ptrdiff_t \fIobjc\fR,
+        Tcl_Size \fIobjc\fR,
         \fBTcl_Obj\fR *const \fIobjv\fR[]);
 .CE
 .PP
diff --git a/generic/tcl.h b/generic/tcl.h
index 0e647db..d75da06 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -561,7 +561,7 @@ typedef int (Tcl_CmdObjTraceProc) (void *clientData, Tcl_Interp *interp,
 	int level, const char *command, Tcl_Command commandInfo, int objc,
 	struct Tcl_Obj *const *objv);
 typedef int (Tcl_CmdObjTraceProc2) (void *clientData, Tcl_Interp *interp,
-	ptrdiff_t level, const char *command, Tcl_Command commandInfo, ptrdiff_t objc,
+	Tcl_Size level, const char *command, Tcl_Command commandInfo, Tcl_Size objc,
 	struct Tcl_Obj *const *objv);
 typedef void (Tcl_CmdObjTraceDeleteProc) (void *clientData);
 typedef void (Tcl_DupInternalRepProc) (struct Tcl_Obj *srcPtr,
@@ -586,7 +586,7 @@ typedef void (Tcl_NamespaceDeleteProc) (void *clientData);
 typedef int (Tcl_ObjCmdProc) (void *clientData, Tcl_Interp *interp,
 	int objc, struct Tcl_Obj *const *objv);
 typedef int (Tcl_ObjCmdProc2) (void *clientData, Tcl_Interp *interp,
-	ptrdiff_t objc, struct Tcl_Obj *const *objv);
+	Tcl_Size objc, struct Tcl_Obj *const *objv);
 typedef int (Tcl_LibraryInitProc) (Tcl_Interp *interp);
 typedef int (Tcl_LibraryUnloadProc) (Tcl_Interp *interp, int flags);
 typedef void (Tcl_PanicProc) (const char *format, ...);
diff --git a/generic/tclOO.h b/generic/tclOO.h
index 775bd32..524acb9 100644
--- a/generic/tclOO.h
+++ b/generic/tclOO.h
@@ -63,7 +63,7 @@ typedef struct Tcl_ObjectContext_ *Tcl_ObjectContext;
 typedef int (Tcl_MethodCallProc)(void *clientData, Tcl_Interp *interp,
 	Tcl_ObjectContext objectContext, int objc, Tcl_Obj *const *objv);
 typedef int (Tcl_MethodCallProc2)(void *clientData, Tcl_Interp *interp,
-	Tcl_ObjectContext objectContext, ptrdiff_t objc, Tcl_Obj *const *objv);
+	Tcl_ObjectContext objectContext, Tcl_Size objc, Tcl_Obj *const *objv);
 typedef void (Tcl_MethodDeleteProc)(void *clientData);
 typedef int (Tcl_CloneProc)(Tcl_Interp *interp, void *oldClientData,
 	void **newClientData);
diff --git a/unix/dltest/pkgt.c b/unix/dltest/pkgt.c
index 77e21ac..1f326f5 100644
--- a/unix/dltest/pkgt.c
+++ b/unix/dltest/pkgt.c
@@ -16,10 +16,10 @@
 static int TraceProc2 (
     void *clientData,
     Tcl_Interp *interp,
-    ptrdiff_t level,
+    Tcl_Size level,
     const char *command,
     Tcl_Command commandInfo,
-    ptrdiff_t objc,
+    Tcl_Size objc,
     struct Tcl_Obj *const *objv)
 {
     (void)clientData;
@@ -55,12 +55,12 @@ static int
 Pkgt_EqObjCmd2(
     void *dummy,		/* Not used. */
     Tcl_Interp *interp,		/* Current interpreter. */
-    ptrdiff_t objc,			/* Number of arguments. */
+    Tcl_Size objc,			/* Number of arguments. */
     Tcl_Obj *const objv[])	/* Argument objects. */
 {
     Tcl_WideInt result;
     const char *str1, *str2;
-    ptrdiff_t len1, len2;
+    Tcl_Size len1, len2;
     (void)dummy;
 
     if (objc != 3) {
-- 
cgit v0.12


From 06cc962945bd148c0fb2a63ab23e6e3577945929 Mon Sep 17 00:00:00 2001
From: apnadkarni <apnmbx-wits@yahoo.com>
Date: Wed, 3 May 2023 10:01:50 +0000
Subject: Comments only. Fix blatantly obsolete ones

---
 generic/tclStringObj.c | 36 +++++++++++++++++-------------------
 generic/tclStringRep.h | 48 +++++++++++++-----------------------------------
 2 files changed, 30 insertions(+), 54 deletions(-)

diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 63e38bb..d2bc1b2 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -1,29 +1,27 @@
 /*
  * tclStringObj.c --
  *
- *	This file contains functions that implement string operations on Tcl
- *	objects. Some string operations work with UTF strings and others
- *	require Unicode format. Functions that require knowledge of the width
- *	of each character, such as indexing, operate on Unicode data.
- *
- *	A Unicode string is an internationalized string. Conceptually, a
- *	Unicode string is an array of 16-bit quantities organized as a
- *	sequence of properly formed UTF-8 characters. There is a one-to-one
- *	map between Unicode and UTF characters. Because Unicode characters
- *	have a fixed width, operations such as indexing operate on Unicode
- *	data. The String object is optimized for the case where each UTF char
+ *      This file contains functions that implement string operations on Tcl
+ *      objects. Some string operations work with UTF-8 encoding forms.
+ *      Functions that require knowledge of the width of each character,
+ * 	such as indexing, operate on fixed width encoding forms such as UTF-32.
+ *
+ * 	Conceptually, a string is a sequence of Unicode code points. Internally
+ * 	it may be stored in an encoding form such as a modified version of
+ * 	UTF-8 or UTF-16 (when TCL_UTF_MAX=3) or UTF-32.
+ *
+ *	The String object is optimized for the case where each UTF char
  *	in a string is only one byte. In this case, we store the value of
- *	numChars, but we don't store the Unicode data (unless Tcl_GetUnicode
- *	is explicitly called).
+ *	numChars, but we don't store the fixed form encoding (unless
+ * 	Tcl_GetUnicode is explicitly called).
  *
- *	The String object type stores one or both formats. The default
- *	behavior is to store UTF. Once Unicode is calculated by a function, it
- *	is stored in the internal rep for future access (without an additional
- *	O(n) cost).
+ *      The String object type stores one or both formats. The default
+ *      behavior is to store UTF-8. Once UTF-16/UTF32 is calculated, it is
+ *      stored in the internal rep for future access (without an additional
+ *      O(n) cost).
  *
  *	To allow many appends to be done to an object without constantly
- *	reallocating the space for the string or Unicode representation, we
- *	allocate double the space for the string or Unicode and use the
+ *	reallocating space, we allocate double the space and use the
  *	internal representation to keep track of how much space is used vs.
  *	allocated.
  *
diff --git a/generic/tclStringRep.h b/generic/tclStringRep.h
index d0c76cb..7f72b04 100644
--- a/generic/tclStringRep.h
+++ b/generic/tclStringRep.h
@@ -1,29 +1,12 @@
 /*
  * tclStringRep.h --
  *
- *	This file contains the definition of the Unicode string internal
- *	representation and macros to access it.
+ *  This file contains the definition of internal representations of a string
+ *  and macros to access it.
  *
- *	A Unicode string is an internationalized string. Conceptually, a
- *	Unicode string is an array of 16-bit quantities organized as a
- *	sequence of properly formed UTF-8 characters. There is a one-to-one
- *	map between Unicode and UTF characters. Because Unicode characters
- *	have a fixed width, operations such as indexing operate on Unicode
- *	data. The String object is optimized for the case where each UTF char
- *	in a string is only one byte. In this case, we store the value of
- *	numChars, but we don't store the Unicode data (unless Tcl_GetUnicode
- *	is explicitly called).
- *
- *	The String object type stores one or both formats. The default
- *	behavior is to store UTF. Once Unicode is calculated by a function, it
- *	is stored in the internal rep for future access (without an additional
- *	O(n) cost).
- *
- *	To allow many appends to be done to an object without constantly
- *	reallocating the space for the string or Unicode representation, we
- *	allocate double the space for the string or Unicode and use the
- *	internal representation to keep track of how much space is used vs.
- *	allocated.
+ *  Conceptually, a string is a sequence of Unicode code points. Internally
+ *  it may be stored in an encoding form such as a modified version of UTF-8
+ *  or UTF-16 (when TCL_UTF_MAX=3) or UTF-32.
  *
  * Copyright (c) 1995-1997 Sun Microsystems, Inc.
  * Copyright (c) 1999 by Scriptics Corporation.
@@ -39,15 +22,10 @@
 /*
  * The following structure is the internal rep for a String object. It keeps
  * track of how much memory has been used and how much has been allocated for
- * the Unicode and UTF string to enable growing and shrinking of the UTF and
- * Unicode reps of the String object with fewer mallocs. To optimize string
+ * the various representations to enable growing and shrinking of 
+ * the String object with fewer mallocs. To optimize string
  * length and indexing operations, this structure also stores the number of
- * characters (same of UTF and Unicode!) once that value has been computed.
- *
- * Under normal configurations, what Tcl calls "Unicode" is actually UTF-16
- * restricted to the Basic Multilingual Plane (i.e. U+00000 to U+0FFFF). This
- * can be officially modified by altering the definition of Tcl_UniChar in
- * tcl.h, but do not do that unless you are sure what you're doing!
+ * code points (independent of encoding form) once that value has been computed.
  */
 
 typedef struct {
@@ -57,15 +35,15 @@ typedef struct {
 				 * Unicode rep, or that the number of UTF bytes ==
 				 * the number of chars. */
     Tcl_Size allocated;		/* The amount of space actually allocated for
-				 * the UTF string (minus 1 byte for the
+				 * the UTF-8 string (minus 1 byte for the
 				 * termination char). */
     Tcl_Size maxChars;		/* Max number of chars that can fit in the
 				 * space allocated for the Unicode array. */
     int hasUnicode;		/* Boolean determining whether the string has
-				 * a Unicode representation. */
-    Tcl_UniChar unicode[TCLFLEXARRAY];	/* The array of Unicode chars. The actual size
-				 * of this field depends on the 'maxChars'
-				 * field above. */
+				 * a Tcl_UniChar representation. */
+    Tcl_UniChar unicode[TCLFLEXARRAY];	/* The array of Tcl_UniChar units.
+				 * The actual size of this field depends on
+				 * the maxChars field above. */
 } String;
 
 /* Limit on string lengths. The -1 because limit does not include the nul */
-- 
cgit v0.12


From d40bb9d13a5a6e2607f313df4bd986886f1ef8d7 Mon Sep 17 00:00:00 2001
From: pointsman <rolf@pointsman.de>
Date: Wed, 3 May 2023 15:18:17 +0000
Subject: Removed the bug specific test constraint from the tests related to 
 [0306a5563c] because the bug is fixed.

---
 tests/bigdata.test | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/bigdata.test b/tests/bigdata.test
index 08556dd..0f72be7 100644
--- a/tests/bigdata.test
+++ b/tests/bigdata.test
@@ -1114,7 +1114,6 @@ bigtestRO concat-bigdata-1 "concat" {4294967296 {0 1 2 3 4} {6 7 0 1 2} {3 4 5 6
 test puts-bigdata-1 "puts" -setup {
     set fpath [makeFile {} bug-0306a5563.data]
 } -constraints {
-    bug0306a5563
     bigdata
 } -body {
     set fd [open $fpath w]    
@@ -1130,7 +1129,6 @@ test puts-bigdata-1 "puts" -setup {
 test puts-bigdata-2 "puts" -setup {
     set fpath [tcltest::makeFile {} bug-0306a5563.data]
 } -constraints {
-    bug0306a5563
     bigdata
 } -body {
     set fd [open $fpath w]
-- 
cgit v0.12


From b583ea7360808cb502d1ea65954ab0387ebdd823 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Wed, 3 May 2023 15:51:10 +0000
Subject: Backport "Comments only. Fix blatantly obsolete ones". And a few more
 improvements from the same files.

---
 generic/tclStringObj.c | 95 +++++++++++++++++++++++++++++---------------------
 generic/tclStringRep.h | 44 ++++++++---------------
 2 files changed, 70 insertions(+), 69 deletions(-)

diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index d3a17d1..7fbf77a 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -1,29 +1,27 @@
 /*
  * tclStringObj.c --
  *
- *	This file contains functions that implement string operations on Tcl
- *	objects. Some string operations work with UTF strings and others
- *	require Unicode format. Functions that require knowledge of the width
- *	of each character, such as indexing, operate on Unicode data.
- *
- *	A Unicode string is an internationalized string. Conceptually, a
- *	Unicode string is an array of 16-bit quantities organized as a
- *	sequence of properly formed UTF-8 characters. There is a one-to-one
- *	map between Unicode and UTF characters. Because Unicode characters
- *	have a fixed width, operations such as indexing operate on Unicode
- *	data. The String object is optimized for the case where each UTF char
+ *      This file contains functions that implement string operations on Tcl
+ *      objects. Some string operations work with UTF-8 encoding forms.
+ *      Functions that require knowledge of the width of each character,
+ * 	such as indexing, operate on fixed width encoding forms such as UTF-32.
+ *
+ * 	Conceptually, a string is a sequence of Unicode code points. Internally
+ * 	it may be stored in an encoding form such as a modified version of
+ * 	UTF-8 or UTF-16 (when TCL_UTF_MAX=3) or UTF-32.
+ *
+ *	The String object is optimized for the case where each UTF char
  *	in a string is only one byte. In this case, we store the value of
- *	numChars, but we don't store the Unicode data (unless Tcl_GetUnicode
- *	is explicitly called).
+ *	numChars, but we don't store the fixed form encoding (unless
+ * 	Tcl_GetUnicode is explicitly called).
  *
- *	The String object type stores one or both formats. The default
- *	behavior is to store UTF. Once Unicode is calculated by a function, it
- *	is stored in the internal rep for future access (without an additional
- *	O(n) cost).
+ *      The String object type stores one or both formats. The default
+ *      behavior is to store UTF-8. Once UTF-16/UTF32 is calculated, it is
+ *      stored in the internal rep for future access (without an additional
+ *      O(n) cost).
  *
  *	To allow many appends to be done to an object without constantly
- *	reallocating the space for the string or Unicode representation, we
- *	allocate double the space for the string or Unicode and use the
+ *	reallocating space, we allocate double the space and use the
  *	internal representation to keep track of how much space is used vs.
  *	allocated.
  *
@@ -37,7 +35,6 @@
 #include "tclInt.h"
 #include "tclTomMath.h"
 #include "tclStringRep.h"
-
 #include "assert.h"
 /*
  * Prototypes for functions defined later in this file:
@@ -631,10 +628,8 @@ TclGetCharLength(
      */
 
     if (TclIsPureByteArray(objPtr)) {
-	int length;
-
-	(void) Tcl_GetByteArrayFromObj(objPtr, &length);
-	return length;
+	(void) Tcl_GetByteArrayFromObj(objPtr, &numChars);
+	return numChars;
     }
 
     /*
@@ -675,10 +670,10 @@ Tcl_GetCharLength(
     }
 
     /*
-     * Optimize BytArray case: No need to convert to a string to perform the
-     * get-length operation.
+     * Optimize the case where we're really dealing with a bytearray object;
+     * we don't need to convert to a string to perform the get-length operation.
      *
-     * Starting in Tcl 8.7, check for a "pure" bytearray, because the
+     * Starting in Tcl 8.7, we check for a "pure" bytearray, because the
      * machinery behind that test is using a proper bytearray ObjType.  We
      * could also compute length of an improper bytearray without shimmering
      * but there's no value in that. We *want* to shimmer an improper bytearray
@@ -686,16 +681,17 @@ Tcl_GetCharLength(
      */
 
     if (TclIsPureByteArray(objPtr)) {
-
 	(void) Tcl_GetByteArrayFromObj(objPtr, &numChars);
     } else {
 	Tcl_GetString(objPtr);
 	numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length);
     }
+
     return numChars;
 }
 #endif
 
+
 /*
  *----------------------------------------------------------------------
  *
@@ -722,6 +718,11 @@ TclCheckEmptyString(
 	return TCL_EMPTYSTRING_YES;
     }
 
+    if (TclIsPureByteArray(objPtr)
+	&& Tcl_GetCharLength(objPtr) == 0) {
+	return TCL_EMPTYSTRING_YES;
+    }
+
     if (TclListObjIsCanonical(objPtr)) {
 	TclListObjLengthM(NULL, objPtr, &length);
 	return length == 0;
@@ -2383,12 +2384,16 @@ Tcl_AppendFormatToObj(
 
 	width = 0;
 	if (isdigit(UCHAR(ch))) {
-	    width = strtoul(format, &end, 10);
-	    if (width < 0) {
+	    /* Note ull will be >= 0 because of isdigit check above */
+	    unsigned long long ull;
+	    ull = strtoull(format, &end, 10);
+	    /* Comparison is >=, not >, to leave room for nul */
+	    if (ull >= WIDE_MAX) {
 		msg = overflow;
 		errCode = "OVERFLOW";
 		goto errorMsg;
 	    }
+	    width = (Tcl_WideInt)ull;
 	    format = end;
 	    step = TclUtfToUniChar(format, &ch);
 	} else if (ch == '*') {
@@ -2425,7 +2430,16 @@ Tcl_AppendFormatToObj(
 	    step = TclUtfToUniChar(format, &ch);
 	}
 	if (isdigit(UCHAR(ch))) {
-	    precision = strtoul(format, &end, 10);
+	    /* Note ull will be >= 0 because of isdigit check above */
+	    unsigned long long ull;
+	    ull = strtoull(format, &end, 10);
+	    /* Comparison is >=, not >, to leave room for nul */
+	    if (ull >= WIDE_MAX) {
+		msg = overflow;
+		errCode = "OVERFLOW";
+		goto errorMsg;
+	    }
+	    precision = (Tcl_WideInt)ull;
 	    format = end;
 	    step = TclUtfToUniChar(format, &ch);
 	} else if (ch == '*') {
@@ -2531,6 +2545,9 @@ Tcl_AppendFormatToObj(
 	    if (TclGetIntFromObj(interp, segment, &code) != TCL_OK) {
 		goto error;
 	    }
+	    if ((unsigned)code > 0x10FFFF) {
+	    	code = 0xFFFD;
+	    }
 	    length = Tcl_UniCharToUtf(code, buf);
 	    if ((code >= 0xD800) && (length < 3)) {
 		/* Special case for handling high surrogates. */
@@ -3875,6 +3892,7 @@ TclStringCmp(
     if ((reqlength == 0) || (value1Ptr == value2Ptr)) {
 	/*
 	 * Always match at 0 chars of if it is the same obj.
+	 * Note: as documented reqlength negative means it is ignored
 	 */
 	match = 0;
     } else {
@@ -4006,15 +4024,15 @@ TclStringCmp(
 	 * comparison function.
 	 */
 	length = (s1len < s2len) ? s1len : s2len;
-	if (reqlength > 0 && reqlength < length) {
-	    length = reqlength;
-	} else if (reqlength < 0) {
+	if (reqlength < 0) {
 	    /*
 	     * The requested length is negative, so ignore it by setting it
 	     * to length + 1 to correct the match var.
 	     */
 
 	    reqlength = length + 1;
+	} else if (reqlength > 0 && reqlength < length) {
+	    length = reqlength;
 	}
 
 	if (checkEq && reqlength < 0 && (s1len != s2len)) {
@@ -4452,18 +4470,17 @@ TclStringReplace(
     int inPlace = flags & TCL_STRING_IN_PLACE;
     Tcl_Obj *result;
 
-    /* Caller is expected to pass sensible arguments */
-    assert ( count >= 0 ) ;
-    assert ( first >= 0 ) ;
-
     /* Replace nothing with nothing */
-    if ((insertPtr == NULL) && (count == 0)) {
+    if ((insertPtr == NULL) && (count <= 0)) {
 	if (inPlace) {
 	    return objPtr;
 	} else {
 	    return Tcl_DuplicateObj(objPtr);
 	}
     }
+    if (first < 0) {
+	first = 0;
+    }
 
     /*
      * The caller very likely had to call Tcl_GetCharLength() or similar
diff --git a/generic/tclStringRep.h b/generic/tclStringRep.h
index 0219a00..ef64d6c 100644
--- a/generic/tclStringRep.h
+++ b/generic/tclStringRep.h
@@ -1,29 +1,12 @@
 /*
  * tclStringRep.h --
  *
- *	This file contains the definition of the Unicode string internal
- *	representation and macros to access it.
+ *  This file contains the definition of internal representations of a string
+ *  and macros to access it.
  *
- *	A Unicode string is an internationalized string. Conceptually, a
- *	Unicode string is an array of 16-bit quantities organized as a
- *	sequence of properly formed UTF-8 characters. There is a one-to-one
- *	map between Unicode and UTF characters. Because Unicode characters
- *	have a fixed width, operations such as indexing operate on Unicode
- *	data. The String object is optimized for the case where each UTF char
- *	in a string is only one byte. In this case, we store the value of
- *	numChars, but we don't store the Unicode data (unless Tcl_GetUnicode
- *	is explicitly called).
- *
- *	The String object type stores one or both formats. The default
- *	behavior is to store UTF. Once Unicode is calculated by a function, it
- *	is stored in the internal rep for future access (without an additional
- *	O(n) cost).
- *
- *	To allow many appends to be done to an object without constantly
- *	reallocating the space for the string or Unicode representation, we
- *	allocate double the space for the string or Unicode and use the
- *	internal representation to keep track of how much space is used vs.
- *	allocated.
+ *  Conceptually, a string is a sequence of Unicode code points. Internally
+ *  it may be stored in an encoding form such as a modified version of UTF-8
+ *  or UTF-16 (when TCL_UTF_MAX=3) or UTF-32.
  *
  * Copyright (c) 1995-1997 Sun Microsystems, Inc.
  * Copyright (c) 1999 by Scriptics Corporation.
@@ -39,10 +22,10 @@
 /*
  * The following structure is the internal rep for a String object. It keeps
  * track of how much memory has been used and how much has been allocated for
- * the Unicode and UTF string to enable growing and shrinking of the UTF and
- * Unicode reps of the String object with fewer mallocs. To optimize string
+ * the various representations to enable growing and shrinking of 
+ * the String object with fewer mallocs. To optimize string
  * length and indexing operations, this structure also stores the number of
- * characters (same of UTF and Unicode!) once that value has been computed.
+ * code points (independent of encoding form) once that value has been computed.
  */
 
 typedef struct {
@@ -52,17 +35,18 @@ typedef struct {
 				 * Unicode rep, or that the number of UTF bytes ==
 				 * the number of chars. */
     Tcl_Size allocated;		/* The amount of space actually allocated for
-				 * the UTF string (minus 1 byte for the
+				 * the UTF-8 string (minus 1 byte for the
 				 * termination char). */
     Tcl_Size maxChars;		/* Max number of chars that can fit in the
 				 * space allocated for the Unicode array. */
     int hasUnicode;		/* Boolean determining whether the string has
-				 * a Unicode representation. */
-    unsigned short unicode[TCLFLEXARRAY];	/* The array of Unicode chars. The actual size
-				 * of this field depends on the 'maxChars'
-				 * field above. */
+				 * a Tcl_UniChar representation. */
+    unsigned short unicode[TCLFLEXARRAY];	/* The array of Tcl_UniChar units.
+				 * The actual size of this field depends on
+				 * the maxChars field above. */
 } String;
 
+/* Limit on string lengths. The -1 because limit does not include the nul */
 #define STRING_MAXCHARS \
     (int)(((size_t)UINT_MAX - offsetof(String, unicode))/sizeof(unsigned short) - 1)
 #define STRING_SIZE(numChars) \
-- 
cgit v0.12


From e2f55284cf4bfdca0a8c1b971e9ca8895ce415e9 Mon Sep 17 00:00:00 2001
From: dkf <donal.k.fellows@manchester.ac.uk>
Date: Thu, 4 May 2023 08:24:01 +0000
Subject: Do not build every branch. Too expensive.

---
 .github/workflows/linux-build.yml | 9 ++++++++-
 .github/workflows/mac-build.yml   | 9 ++++++++-
 .github/workflows/onefiledist.yml | 9 ++++++++-
 .github/workflows/win-build.yml   | 9 ++++++++-
 4 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/linux-build.yml b/.github/workflows/linux-build.yml
index 7ba9e89..f8c30da 100644
--- a/.github/workflows/linux-build.yml
+++ b/.github/workflows/linux-build.yml
@@ -1,5 +1,12 @@
 name: Linux
-on: [push]
+on: 
+  push:
+    branches:
+    - "main"
+    - "trunk"
+    - "core-8-branch"
+    tags:
+    - "core-**"
 permissions:
   contents: read
 jobs:
diff --git a/.github/workflows/mac-build.yml b/.github/workflows/mac-build.yml
index a9345a1..b5a586e 100644
--- a/.github/workflows/mac-build.yml
+++ b/.github/workflows/mac-build.yml
@@ -1,5 +1,12 @@
 name: macOS
-on: [push]
+on: 
+  push:
+    branches:
+    - "main"
+    - "trunk"
+    - "core-8-branch"
+    tags:
+    - "core-**"
 permissions:
   contents: read
 jobs:
diff --git a/.github/workflows/onefiledist.yml b/.github/workflows/onefiledist.yml
index 1f75762..c6277af 100644
--- a/.github/workflows/onefiledist.yml
+++ b/.github/workflows/onefiledist.yml
@@ -1,5 +1,12 @@
 name: Build Binaries
-on: [push]
+on: 
+  push:
+    branches:
+    - "main"
+    - "trunk"
+    - "core-8-branch"
+    tags:
+    - "core-**"
 permissions:
   contents: read
 jobs:
diff --git a/.github/workflows/win-build.yml b/.github/workflows/win-build.yml
index ba4e5ba..9773c2a 100644
--- a/.github/workflows/win-build.yml
+++ b/.github/workflows/win-build.yml
@@ -1,5 +1,12 @@
 name: Windows
-on: [push]
+on: 
+  push:
+    branches:
+    - "main"
+    - "trunk"
+    - "core-8-branch"
+    tags:
+    - "core-**"
 permissions:
   contents: read
 env:
-- 
cgit v0.12


From af320fb41e62980c0c988a377c138ac14757ad81 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Thu, 4 May 2023 12:53:45 +0000
Subject: Fix [9c397da4a9]: AppendPrintfToObjVA(): support 64-bit pointers

---
 generic/tcl.h          | 2 +-
 generic/tclStringObj.c | 8 ++++++--
 generic/tclStringRep.h | 2 +-
 tests/tailcall.test    | 2 +-
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/generic/tcl.h b/generic/tcl.h
index 318c7a1..2b6c947 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -2176,7 +2176,7 @@ typedef struct Tcl_EncodingType {
 #define TCL_ENCODING_CHAR_LIMIT		0x10
 /* Internal use bits, do not define bits in this space. See above comment */
 #define TCL_ENCODING_INTERNAL_USE_MASK  0xFF00
-/* 
+/*
  * Reserve top byte for profile values (disjoint, not a mask). In case of
  * changes, ensure ENCODING_PROFILE_* macros in tclInt.h are modified if
  * necessary.
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 7fbf77a..0e47487 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -249,7 +249,7 @@ UpdateStringOfUTF16String(
 #endif
 
 #endif
-
+
 /*
  * TCL STRING GROWTH ALGORITHM
  *
@@ -3130,12 +3130,16 @@ AppendPrintfToObjVA(
 
 		break;
 	    }
+	    case 'p':
+		if (sizeof(size_t) == sizeof(Tcl_WideInt)) {
+		    size = 2;
+		}
+		/* FALLTHRU */
 	    case 'c':
 	    case 'i':
 	    case 'u':
 	    case 'd':
 	    case 'o':
-	    case 'p':
 	    case 'x':
 	    case 'X':
 		seekingConversion = 0;
diff --git a/generic/tclStringRep.h b/generic/tclStringRep.h
index ef64d6c..d1863fb 100644
--- a/generic/tclStringRep.h
+++ b/generic/tclStringRep.h
@@ -22,7 +22,7 @@
 /*
  * The following structure is the internal rep for a String object. It keeps
  * track of how much memory has been used and how much has been allocated for
- * the various representations to enable growing and shrinking of 
+ * the various representations to enable growing and shrinking of
  * the String object with fewer mallocs. To optimize string
  * length and indexing operations, this structure also stores the number of
  * code points (independent of encoding form) once that value has been computed.
diff --git a/tests/tailcall.test b/tests/tailcall.test
index c9ec674..0016845 100644
--- a/tests/tailcall.test
+++ b/tests/tailcall.test
@@ -709,7 +709,7 @@ test tailcall-14.1-bc {{in a deleted namespace} {byte compiled}} -body {
 } -returnCodes 1 -result {namespace "::ns" not found}
 
 test tailcall-bug-784befb0ba {tailcall crash with 254 args} -body {
-    proc tccrash args {llength $args}	
+    proc tccrash args {llength $args}
     # Must be EXACTLY 254 for crash
     proc p {} [list tailcall tccrash {*}[lrepeat 254 x]]
     p
-- 
cgit v0.12


From e51f7da2ee3eeb2de5ed29bffef7c4fd20bc2791 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Thu, 4 May 2023 12:58:41 +0000
Subject: Backport: Do not build every branch. Too expensive.

---
 .github/workflows/linux-build.yml | 9 ++++++++-
 .github/workflows/mac-build.yml   | 9 ++++++++-
 .github/workflows/onefiledist.yml | 9 ++++++++-
 .github/workflows/win-build.yml   | 9 ++++++++-
 4 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/linux-build.yml b/.github/workflows/linux-build.yml
index 7ba9e89..f8c30da 100644
--- a/.github/workflows/linux-build.yml
+++ b/.github/workflows/linux-build.yml
@@ -1,5 +1,12 @@
 name: Linux
-on: [push]
+on: 
+  push:
+    branches:
+    - "main"
+    - "trunk"
+    - "core-8-branch"
+    tags:
+    - "core-**"
 permissions:
   contents: read
 jobs:
diff --git a/.github/workflows/mac-build.yml b/.github/workflows/mac-build.yml
index a9345a1..b5a586e 100644
--- a/.github/workflows/mac-build.yml
+++ b/.github/workflows/mac-build.yml
@@ -1,5 +1,12 @@
 name: macOS
-on: [push]
+on: 
+  push:
+    branches:
+    - "main"
+    - "trunk"
+    - "core-8-branch"
+    tags:
+    - "core-**"
 permissions:
   contents: read
 jobs:
diff --git a/.github/workflows/onefiledist.yml b/.github/workflows/onefiledist.yml
index 1f75762..c6277af 100644
--- a/.github/workflows/onefiledist.yml
+++ b/.github/workflows/onefiledist.yml
@@ -1,5 +1,12 @@
 name: Build Binaries
-on: [push]
+on: 
+  push:
+    branches:
+    - "main"
+    - "trunk"
+    - "core-8-branch"
+    tags:
+    - "core-**"
 permissions:
   contents: read
 jobs:
diff --git a/.github/workflows/win-build.yml b/.github/workflows/win-build.yml
index ba4e5ba..9773c2a 100644
--- a/.github/workflows/win-build.yml
+++ b/.github/workflows/win-build.yml
@@ -1,5 +1,12 @@
 name: Windows
-on: [push]
+on: 
+  push:
+    branches:
+    - "main"
+    - "trunk"
+    - "core-8-branch"
+    tags:
+    - "core-**"
 permissions:
   contents: read
 env:
-- 
cgit v0.12