From 9a007031fac7f6b4f05a6b7c2bfcb123d823decf Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Fri, 27 Mar 2020 10:33:08 +0000
Subject: Fix [767e070d35]: Tcl_GetRange and Tcl_GetUniChar do not validate
 index inputs

---
 doc/StringObj.3        |   3 +-
 generic/tclCmdMZ.c     |   2 +-
 generic/tclInt.h       |   2 +-
 generic/tclStringObj.c | 177 ++++++++++++++++++++++++++++++++-----------------
 4 files changed, 119 insertions(+), 65 deletions(-)

diff --git a/doc/StringObj.3 b/doc/StringObj.3
index c23706f..7870b21 100644
--- a/doc/StringObj.3
+++ b/doc/StringObj.3
@@ -204,7 +204,8 @@ where the caller does not need the length of the unicode string
 representation.
 .PP
 \fBTcl_GetUniChar\fR returns the \fIindex\fR'th character in the
-value's Unicode representation.
+value's Unicode representation. The index is assumed to be in the
+appropriate range.
 .PP
 \fBTcl_GetRange\fR returns a newly created value comprised of the
 characters between \fIfirst\fR and \fIlast\fR (inclusive) in the
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 75989c7..d344678 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -2446,7 +2446,7 @@ StringRevCmd(
 	return TCL_ERROR;
     }
 
-    Tcl_SetObjResult(interp, TclStringObjReverse(objv[1]));
+    Tcl_SetObjResult(interp, TclStringReverse(objv[1]));
     return TCL_OK;
 }
 
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 39fb740..69cebe5 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3168,7 +3168,7 @@ MODULE_SCOPE int	TclStringMatch(const char *str, int strLen,
 			    const char *pattern, int ptnLen, int flags);
 MODULE_SCOPE int	TclStringMatchObj(Tcl_Obj *stringObj,
 			    Tcl_Obj *patternObj, int flags);
-MODULE_SCOPE Tcl_Obj *	TclStringObjReverse(Tcl_Obj *objPtr);
+MODULE_SCOPE Tcl_Obj *	TclStringReverse(Tcl_Obj *objPtr);
 MODULE_SCOPE void	TclSubstCompile(Tcl_Interp *interp, const char *bytes,
 			    int numBytes, int flags, int line,
 			    struct CompileEnv *envPtr);
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 1534a8b..d4f45d7 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -149,9 +149,9 @@ GrowStringBuffer(
 	objPtr->bytes = NULL;
     }
     if (flag == 0 || stringPtr->allocated > 0) {
-	attempt = 2 * needed;
-	if (attempt >= 0) {
-	    ptr = attemptckrealloc(objPtr->bytes, attempt + 1);
+	if (needed <= INT_MAX / 2) {
+	    attempt = 2 * needed;
+	    ptr = (char *)attemptckrealloc(objPtr->bytes, attempt + 1);
 	}
 	if (ptr == NULL) {
 	    /*
@@ -164,7 +164,7 @@ GrowStringBuffer(
 	    int growth = (int) ((extra > limit) ? limit : extra);
 
 	    attempt = needed + growth;
-	    ptr = attemptckrealloc(objPtr->bytes, attempt + 1);
+	    ptr = (char *)attemptckrealloc(objPtr->bytes, attempt + 1);
 	}
     }
     if (ptr == NULL) {
@@ -173,7 +173,7 @@ GrowStringBuffer(
 	 */
 
 	attempt = needed;
-	ptr = ckrealloc(objPtr->bytes, attempt + 1);
+	ptr = (char *)ckrealloc(objPtr->bytes, attempt + 1);
     }
     objPtr->bytes = ptr;
     stringPtr->allocated = attempt;
@@ -199,8 +199,8 @@ GrowUnicodeBuffer(
 	 * Subsequent appends - apply the growth algorithm.
 	 */
 
-	attempt = 2 * needed;
-	if (attempt >= 0 && attempt <= STRING_MAXCHARS) {
+	if (needed <= STRING_MAXCHARS / 2) {
+	    attempt = 2 * needed;
 	    ptr = stringAttemptRealloc(stringPtr, attempt);
 	}
 	if (ptr == NULL) {
@@ -418,6 +418,15 @@ Tcl_GetCharLength(
     int numChars;
 
     /*
+     * Quick, no-shimmer return for short string reps.
+     */
+
+    if ((objPtr->bytes) && (objPtr->length < 2)) {
+	/* 0 bytes -> 0 chars; 1 byte -> 1 char */
+	return objPtr->length;
+    }
+
+    /*
      * Optimize the case where we're really dealing with a bytearray object;
      * we don't need to convert to a string to perform the get-length operation.
      *
@@ -434,7 +443,6 @@ Tcl_GetCharLength(
 	return length;
     }
 
-
     /*
      * OK, need to work with the object as a string.
      */
@@ -465,8 +473,6 @@ Tcl_GetCharLength(
     }
     return numChars;
 }
-
-
 
 /*
  *----------------------------------------------------------------------
@@ -486,8 +492,8 @@ Tcl_GetCharLength(
  */
 int
 TclCheckEmptyString (
-    Tcl_Obj *objPtr
-) {
+    Tcl_Obj *objPtr)
+{
     int length = -1;
 
     if (objPtr->bytes == tclEmptyStringRep) {
@@ -515,8 +521,8 @@ TclCheckEmptyString (
  *
  * Tcl_GetUniChar --
  *
- *	Get the index'th Unicode character from the String object. The index
- *	is assumed to be in the appropriate range.
+ *	Get the index'th Unicode character from the String object. If index
+ *	is out of range, the result = 0xFFFD;
  *
  * Results:
  *	Returns the index'th Unicode character in the Object.
@@ -534,15 +540,22 @@ Tcl_GetUniChar(
     int index)			/* Get the index'th Unicode character. */
 {
     String *stringPtr;
+    int length;
+
+    if (index < 0) {
+	return 0xFFFD;
+    }
 
     /*
      * Optimize the case where we're really dealing with a bytearray object
-     * without string representation; we don't need to convert to a string to
-     * perform the indexing operation.
+     * we don't need to convert to a string to perform the indexing operation.
      */
 
     if (TclIsPureByteArray(objPtr)) {
-	unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, NULL);
+	unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length);
+	if (index >= length) {
+		return 0xFFFD;
+	}
 
 	return (Tcl_UniChar) bytes[index];
     }
@@ -568,6 +581,10 @@ Tcl_GetUniChar(
 	FillUnicodeRep(objPtr);
 	stringPtr = GET_STRING(objPtr);
     }
+
+    if (index >= stringPtr->numChars) {
+	return 0xFFFD;
+    }
     return stringPtr->unicode[index];
 }
 
@@ -668,17 +685,27 @@ Tcl_GetRange(
 {
     Tcl_Obj *newObjPtr;		/* The Tcl object to find the range of. */
     String *stringPtr;
+    int length;
+
+    if (first < 0) {
+	first = 0;
+    }
 
     /*
      * Optimize the case where we're really dealing with a bytearray object
-     * without string representation; we don't need to convert to a string to
-     * perform the substring operation.
+     * we don't need to convert to a string to perform the substring operation.
      */
 
     if (TclIsPureByteArray(objPtr)) {
-	unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, NULL);
+	unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length);
 
-	return Tcl_NewByteArrayObj(bytes+first, last-first+1);
+	if (last >= length) {
+	    last = length - 1;
+	}
+	if (last < first) {
+	    return Tcl_NewObj();
+	}
+	return Tcl_NewByteArrayObj(bytes + first, last - first + 1);
     }
 
     /*
@@ -697,6 +724,12 @@ Tcl_GetRange(
 	    TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length);
 	}
 	if (stringPtr->numChars == objPtr->length) {
+	    if (last >= stringPtr->numChars) {
+		last = stringPtr->numChars - 1;
+	    }
+	    if (last < first) {
+		return Tcl_NewObj();
+	    }
 	    newObjPtr = Tcl_NewStringObj(objPtr->bytes + first, last-first+1);
 
 	    /*
@@ -711,19 +744,25 @@ Tcl_GetRange(
 	FillUnicodeRep(objPtr);
 	stringPtr = GET_STRING(objPtr);
     }
-
+    if (last > stringPtr->numChars) {
+	last = stringPtr->numChars;
+    }
+    if (last < first) {
+	return Tcl_NewObj();
+    }
 #if TCL_UTF_MAX == 4
-	/* See: bug [11ae2be95dac9417] */
-	if ((first>0) && ((stringPtr->unicode[first]&0xFC00) == 0xDC00)
-		&& ((stringPtr->unicode[first-1]&0xFC00) == 0xD800)) {
-	    ++first;
-	}
-	if ((last+1<stringPtr->numChars) && ((stringPtr->unicode[last+1]&0xFC00) == 0xDC00)
-		&& ((stringPtr->unicode[last]&0xFC00) == 0xD800)) {
-	    ++last;
-	}
+    /* See: bug [11ae2be95dac9417] */
+    if ((first > 0) && ((stringPtr->unicode[first] & 0xFC00) == 0xDC00)
+		&& ((stringPtr->unicode[first-1] & 0xFC00) == 0xD800)) {
+	++first;
+    }
+    if ((last + 1 < stringPtr->numChars)
+		&& ((stringPtr->unicode[last+1] & 0xFC00) == 0xDC00)
+		&& ((stringPtr->unicode[last] & 0xFC00) == 0xD800)) {
+	++last;
+    }
 #endif
-    return Tcl_NewUnicodeObj(stringPtr->unicode + first, last-first+1);
+    return Tcl_NewUnicodeObj(stringPtr->unicode + first, last - first + 1);
 }
 
 /*
@@ -840,9 +879,9 @@ Tcl_SetObjLength(
 	     * Need to enlarge the buffer.
 	     */
 	    if (objPtr->bytes == tclEmptyStringRep) {
-		objPtr->bytes = ckalloc(length + 1);
+		objPtr->bytes = (char *)ckalloc(length + 1);
 	    } else {
-		objPtr->bytes = ckrealloc(objPtr->bytes, length + 1);
+		objPtr->bytes = (char *)ckrealloc(objPtr->bytes, length + 1);
 	    }
 	    stringPtr->allocated = length;
 	}
@@ -946,9 +985,9 @@ Tcl_AttemptSetObjLength(
 	    char *newBytes;
 
 	    if (objPtr->bytes == tclEmptyStringRep) {
-		newBytes = attemptckalloc(length + 1);
+		newBytes = (char *)attemptckalloc(length + 1);
 	    } else {
-		newBytes = attemptckrealloc(objPtr->bytes, length + 1);
+		newBytes = (char *)attemptckrealloc(objPtr->bytes, length + 1);
 	    }
 	    if (newBytes == NULL) {
 		return 0;
@@ -1291,39 +1330,45 @@ Tcl_AppendObjToObj(
 
     if ((TclIsPureByteArray(objPtr) || objPtr->bytes == tclEmptyStringRep)
 	    && TclIsPureByteArray(appendObjPtr)) {
-
 	/*
 	 * You might expect the code here to be
 	 *
 	 *  bytes = Tcl_GetByteArrayFromObj(appendObjPtr, &length);
 	 *  TclAppendBytesToByteArray(objPtr, bytes, length);
 	 *
-	 * and essentially all of the time that would be fine.  However,
-	 * it would run into trouble in the case where objPtr and
-	 * appendObjPtr point to the same thing.  That may never be a
-	 * good idea.  It seems to violate Copy On Write, and we don't
-	 * have any tests for the situation, since making any Tcl commands
-	 * that call Tcl_AppendObjToObj() do that appears impossible
-	 * (They honor Copy On Write!).  For the sake of extensions that
-	 * go off into that realm, though, here's a more complex approach
-	 * that can handle all the cases.
+	 * and essentially all of the time that would be fine. However, it
+	 * would run into trouble in the case where objPtr and appendObjPtr
+	 * point to the same thing. That may never be a good idea. It seems to
+	 * violate Copy On Write, and we don't have any tests for the
+	 * situation, since making any Tcl commands that call
+	 * Tcl_AppendObjToObj() do that appears impossible (They honor Copy On
+	 * Write!). For the sake of extensions that go off into that realm,
+	 * though, here's a more complex approach that can handle all the
+	 * cases.
+	 *
+	 * First, get the lengths.
 	 */
 
-	/* Get lengths */
 	int lengthSrc;
 
 	(void) Tcl_GetByteArrayFromObj(objPtr, &length);
 	(void) Tcl_GetByteArrayFromObj(appendObjPtr, &lengthSrc);
 
-	/* Grow buffer enough for the append */
+	/*
+	 * Grow buffer enough for the append.
+	 */
+
 	TclAppendBytesToByteArray(objPtr, NULL, lengthSrc);
 
-	/* Reset objPtr back to the original value */
+	/*
+	 * Reset objPtr back to the original value.
+	 */
+
 	Tcl_SetByteArrayLength(objPtr, length);
 
 	/*
-	 * Now do the append knowing that buffer growth cannot cause
-	 * any trouble.
+	 * Now do the append knowing that buffer growth cannot cause any
+	 * trouble.
 	 */
 
 	TclAppendBytesToByteArray(objPtr,
@@ -1375,6 +1420,7 @@ Tcl_AppendObjToObj(
     numChars = stringPtr->numChars;
     if ((numChars >= 0) && (appendObjPtr->typePtr == &tclStringType)) {
 	String *appendStringPtr = GET_STRING(appendObjPtr);
+
 	appendNumChars = appendStringPtr->numChars;
     }
 
@@ -2458,7 +2504,7 @@ Tcl_AppendFormatToObj(
 /*
  *---------------------------------------------------------------------------
  *
- * Tcl_Format--
+ * Tcl_Format --
  *
  * Results:
  *	A refcount zero Tcl_Obj.
@@ -2723,7 +2769,7 @@ TclGetStringStorage(
 /*
  *---------------------------------------------------------------------------
  *
- * TclStringObjReverse --
+ * TclStringReverse --
  *
  *	Implements the [string reverse] operation.
  *
@@ -2742,18 +2788,20 @@ static void
 ReverseBytes(
     unsigned char *to,		/* Copy bytes into here... */
     unsigned char *from,	/* ...from here... */
-    int count)		/* Until this many are copied, */
+    int count)			/* Until this many are copied, */
 				/* reversing as you go. */
 {
     unsigned char *src = from + count;
+
     if (to == from) {
 	/* Reversing in place */
 	while (--src > to) {
 	    unsigned char c = *src;
+
 	    *src = *to;
 	    *to++ = c;
 	}
-    }  else {
+    } else {
 	while (--src >= from) {
 	    *to++ = *src;
 	}
@@ -2761,7 +2809,7 @@ ReverseBytes(
 }
 
 Tcl_Obj *
-TclStringObjReverse(
+TclStringReverse(
     Tcl_Obj *objPtr)
 {
     String *stringPtr;
@@ -2800,7 +2848,10 @@ TclStringObjReverse(
 		*to++ = *src;
 	    }
 	} else {
-	    /* Reversing in place */
+	    /*
+	     * Reversing in place.
+	     */
+
 	    while (--src > from) {
 		ch = *src;
 		*src = *from;
@@ -2824,20 +2875,22 @@ TclStringObjReverse(
 	    /*
 	     * Either numChars == -1 and we don't know how many chars are
 	     * represented by objPtr->bytes and we need Pass 1 just in case,
-	     * or numChars >= 0 and we know we have fewer chars than bytes,
-	     * so we know there's a multibyte character needing Pass 1.
+	     * or numChars >= 0 and we know we have fewer chars than bytes, so
+	     * we know there's a multibyte character needing Pass 1.
 	     *
 	     * Pass 1. Reverse the bytes of each multi-byte character.
 	     */
+
 	    int charCount = 0;
 	    int bytesLeft = numBytes;
 
 	    while (bytesLeft) {
 		/*
-		 * NOTE: We know that the from buffer is NUL-terminated.
-		 * It's part of the contract for objPtr->bytes values.
-		 * Thus, we can skip calling Tcl_UtfCharComplete() here.
+		 * NOTE: We know that the from buffer is NUL-terminated. It's
+		 * part of the contract for objPtr->bytes values. Thus, we can
+		 * skip calling Tcl_UtfCharComplete() here.
 		 */
+
 		int bytesInChar = TclUtfToUniChar(from, &ch);
 
 		ReverseBytes((unsigned char *)to, (unsigned char *)from,
-- 
cgit v0.12


From 5c2d281edc798baa53966b4b5a9eadcb6a2e9ac9 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Fri, 27 Mar 2020 12:05:39 +0000
Subject: Fix harmless gcc warning, when compiling on Linux

---
 unix/tclUnixPort.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/unix/tclUnixPort.h b/unix/tclUnixPort.h
index 9961613..20e1ddc 100644
--- a/unix/tclUnixPort.h
+++ b/unix/tclUnixPort.h
@@ -118,11 +118,11 @@ typedef off_t		Tcl_SeekOffset;
     extern int TclOSstat(const char *name, void *statBuf);
     extern int TclOSlstat(const char *name, void *statBuf);
 #elif defined(HAVE_STRUCT_STAT64) && !defined(__APPLE__)
-#   define TclOSstat		stat64
-#   define TclOSlstat		lstat64
+#   define TclOSstat(name, buf) stat64(name, (struct stat64 *)buf)
+#   define TclOSlstat(name,buf) lstat64(name, (struct stat64 *)buf)
 #else
-#   define TclOSstat		stat
-#   define TclOSlstat		lstat
+#   define TclOSstat(name, buf) stat(name, (struct stat *)buf)
+#   define TclOSlstat(name, buf) lstat(name, (struct stat *)buf)
 #endif
 
 /*
-- 
cgit v0.12


From 0a00f85f84c7117557104cdd16c161c409a6c8d5 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Fri, 27 Mar 2020 16:24:30 +0000
Subject: Looks like this little hack is no longer necessary on current 32-bit
 Cygwin.

---
 unix/tclUnixPort.h | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/unix/tclUnixPort.h b/unix/tclUnixPort.h
index 20e1ddc..0ae4f25 100644
--- a/unix/tclUnixPort.h
+++ b/unix/tclUnixPort.h
@@ -86,7 +86,6 @@ typedef off_t		Tcl_SeekOffset;
 #endif
 
 #ifdef __CYGWIN__
-
     /* Make some symbols available without including <windows.h> */
 #   define DWORD unsigned int
 #   define CP_UTF8 65001
@@ -107,13 +106,7 @@ typedef off_t		Tcl_SeekOffset;
     __declspec(dllimport) extern __stdcall int GetLastError(void);
     __declspec(dllimport) extern __stdcall int GetFileAttributesW(const WCHAR *);
     __declspec(dllimport) extern __stdcall int SetFileAttributesW(const WCHAR *, int);
-
     __declspec(dllimport) extern int cygwin_conv_path(int, const void *, void *, int);
-/* On Cygwin, the environment is imported from the Cygwin DLL. */
-#ifndef __x86_64__
-#   define environ __cygwin_environ
-    extern char **__cygwin_environ;
-#endif
 #   define timezone _timezone
     extern int TclOSstat(const char *name, void *statBuf);
     extern int TclOSlstat(const char *name, void *statBuf);
-- 
cgit v0.12


From 2b68efb386e03de81ea2f0c97f7233ad8e0f183d Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Fri, 27 Mar 2020 18:02:02 +0000
Subject: Repair bad test labels.

---
 tests/binary.test | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/binary.test b/tests/binary.test
index b872a30..6eb8d87 100644
--- a/tests/binary.test
+++ b/tests/binary.test
@@ -2637,19 +2637,19 @@ test binary-73.3 {binary decode base64} -body {
 test binary-73.4 {binary decode base64} -body {
     binary decode base64 [string repeat YWJj 20]
 } -result [string repeat abc 20]
-test binary-73.5 {binary encode base64} -body {
+test binary-73.5 {binary decode base64} -body {
     binary decode base64 AAECAwQAAQID
 } -result "\0\1\2\3\4\0\1\2\3"
-test binary-73.6 {binary encode base64} -body {
+test binary-73.6 {binary decode base64} -body {
     binary decode base64 AA==
 } -result "\0"
-test binary-73.7 {binary encode base64} -body {
+test binary-73.7 {binary decode base64} -body {
     binary decode base64 AAA=
 } -result "\0\0"
-test binary-73.8 {binary encode base64} -body {
+test binary-73.8 {binary decode base64} -body {
     binary decode base64 AAAA
 } -result "\0\0\0"
-test binary-73.9 {binary encode base64} -body {
+test binary-73.9 {binary decode base64} -body {
     binary decode base64 AAAAAA==
 } -result "\0\0\0\0"
 test binary-73.10 {binary decode base64} -body {
-- 
cgit v0.12


From 74657995c7bd5b067100a26e387306888f5f6134 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Fri, 27 Mar 2020 21:21:28 +0000
Subject: Improve error reporting. If codepoint looks negative, bad char is
 reported as REPLACEMENT CHARACTER, which is wrong, therefore not helpful.

---
 generic/tclBinary.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/generic/tclBinary.c b/generic/tclBinary.c
index 81c56e7..1dd1081 100644
--- a/generic/tclBinary.c
+++ b/generic/tclBinary.c
@@ -3009,7 +3009,7 @@ BinaryDecode64(
   bad64:
     Tcl_SetObjResult(interp, Tcl_ObjPrintf(
 	    "invalid base64 character \"%c\" at position %d",
-	    (char) c, (int) (data - datastart - 1)));
+	    c, (int) (data - datastart - 1)));
     TclDecrRefCount(resultObj);
     return TCL_ERROR;
 }
-- 
cgit v0.12


From 3cd192c81cef20472a54a3c9afefc28361a0e316 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Fri, 27 Mar 2020 21:52:42 +0000
Subject: Further improvement. Report invalid multi-byte characters accurately.

---
 generic/tclBinary.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/generic/tclBinary.c b/generic/tclBinary.c
index 1dd1081..3c47843 100644
--- a/generic/tclBinary.c
+++ b/generic/tclBinary.c
@@ -3007,11 +3007,21 @@ BinaryDecode64(
     return TCL_OK;
 
   bad64:
-    Tcl_SetObjResult(interp, Tcl_ObjPrintf(
-	    "invalid base64 character \"%c\" at position %d",
-	    c, (int) (data - datastart - 1)));
-    TclDecrRefCount(resultObj);
-    return TCL_ERROR;
+    {
+	/* The decoder is byte-oriented. If we saw a byte that's not a
+	 * valid member of the base64 alphabet, it could be the lead byte
+	 * of a multi-byte character. */
+	Tcl_UniChar ch;
+
+	/* Safe because we know data is NUL-terminated */
+	TclUtfToUniChar((const char *)(data - 1), &ch);
+
+	Tcl_SetObjResult(interp, Tcl_ObjPrintf(
+		"invalid base64 character \"%c\" at position %d", ch,
+		(int) (data - datastart - 1)));
+	TclDecrRefCount(resultObj);
+	return TCL_ERROR;
+    }
 }
 
 /*
-- 
cgit v0.12


From c619f65f15cd0877658ae9a0c0e3748b8fc0896b Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Sat, 28 Mar 2020 15:35:12 +0000
Subject: [ffeb2097af] Restore the standard and original practice of ignoring
 invalid characters when decoding base64. Error only in -strict mode. See RFC
 2045.

---
 doc/binary.n        | 11 +++++++----
 generic/tclBinary.c |  9 ++-------
 tests/binary.test   |  5 ++++-
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/doc/binary.n b/doc/binary.n
index 261b765..1f93c03 100644
--- a/doc/binary.n
+++ b/doc/binary.n
@@ -78,7 +78,9 @@ During decoding, the following options are supported:
 .TP
 \fB\-strict\fR
 .
-Instructs the decoder to throw an error if it encounters whitespace characters. Otherwise it ignores them.
+Instructs the decoder to throw an error if it encounters any characters
+that are not strictly part of the encoding itself. Otherwise it ignores them.
+RFC 2045 calls for base64 decoders to be non-strict.
 .RE
 .TP
 \fBhex\fR
@@ -92,7 +94,8 @@ options are supported:
 .TP
 \fB\-strict\fR
 .
-Instructs the decoder to throw an error if it encounters whitespace characters. Otherwise it ignores them.
+Instructs the decoder to throw an error if it encounters whitespace characters.
+Otherwise it ignores them.
 .RE
 .TP
 \fBuuencode\fR
@@ -122,8 +125,8 @@ During decoding, the following options are supported:
 .TP
 \fB\-strict\fR
 .
-Instructs the decoder to throw an error if it encounters unexpected whitespace
-characters. Otherwise it ignores them.
+Instructs the decoder to throw an error if it encounters unexpected
+whitespace characters.  Otherwise it ignores them.
 .PP
 Note that neither the encoder nor the decoder handle the header and footer of
 the uuencode format.
diff --git a/generic/tclBinary.c b/generic/tclBinary.c
index 3c47843..7db4f9e 100644
--- a/generic/tclBinary.c
+++ b/generic/tclBinary.c
@@ -2951,7 +2951,7 @@ BinaryDecode64(
 		if (c == '=' && i > 1) {
 		    value <<= 6;
 		    cut++;
-		} else if (!strict && TclIsSpaceProc(c)) {
+		} else if (!strict) {
 		    i--;
 		} else {
 		    goto bad64;
@@ -2975,7 +2975,7 @@ BinaryDecode64(
 		if (i) {
 		    cut++;
 		}
-	    } else if (strict || !TclIsSpaceProc(c)) {
+	    } else if (strict) {
 		goto bad64;
 	    } else {
 		i--;
@@ -2995,11 +2995,6 @@ BinaryDecode64(
 	    if (strict) {
 		goto bad64;
 	    }
-	    for (; data < dataend; data++) {
-		if (!TclIsSpaceProc(*data)) {
-		    goto bad64;
-		}
-	    }
 	}
     }
     Tcl_SetByteArrayLength(resultObj, cursor - begin - cut);
diff --git a/tests/binary.test b/tests/binary.test
index 6eb8d87..a2a9144 100644
--- a/tests/binary.test
+++ b/tests/binary.test
@@ -2709,7 +2709,7 @@ test binary-73.30 {binary decode base64} -body {
     list [string length [set r [binary decode base64 -strict WFla\n]]] $r
 } -returnCodes error -match glob -result {invalid base64 character *}
 test binary-73.31 {binary decode base64} -body {
-    list [string length [set r [binary decode base64 WA==WFla]]] $r
+    list [string length [set r [binary decode base64 -strict WA==WFla]]] $r
 } -returnCodes error -match glob -result {invalid base64 character *}
 test binary-73.32 {binary decode base64, bug [00d04c4f12]} -body {
     list \
@@ -2751,6 +2751,9 @@ test binary-73.36 {binary decode base64: check encoded & decoded equals original
     }
     join $r \n
 } -result {}
+test binary-73.37 {binary decode base64: Bug ffeb2097af} {
+    binary decode base64 [binary encode base64 -maxlen 3 -wrapchar : abc]
+} abc
 
 test binary-74.1 {binary encode uuencode} -body {
     binary encode uuencode
-- 
cgit v0.12


From 29cb05c855a633d5df7fba523faa62f2a3d027e4 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Sat, 28 Mar 2020 17:35:23 +0000
Subject: Optimize base64 decoder to work on bytearrays without string
 generation.

---
 generic/tclBinary.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/generic/tclBinary.c b/generic/tclBinary.c
index 7db4f9e..33c8382 100644
--- a/generic/tclBinary.c
+++ b/generic/tclBinary.c
@@ -2878,8 +2878,9 @@ BinaryDecode64(
     unsigned char *data, *datastart, *dataend, c = '\0';
     unsigned char *begin = NULL;
     unsigned char *cursor = NULL;
-    int strict = 0;
+    int pure, strict = 0;
     int i, index, size, cut = 0, count = 0;
+    Tcl_UniChar ch;
     enum { OPT_STRICT };
     static const char *const optStrings[] = { "-strict", NULL };
 
@@ -2900,8 +2901,9 @@ BinaryDecode64(
     }
 
     TclNewObj(resultObj);
-    datastart = data = (unsigned char *)
-	    TclGetStringFromObj(objv[objc - 1], &count);
+    pure = TclIsPureByteArray(objv[objc - 1]);
+    datastart = data = pure ? Tcl_GetByteArrayFromObj(objv[objc - 1], &count)
+	    : (unsigned char *) TclGetStringFromObj(objv[objc - 1], &count);
     dataend = data + count;
     size = ((count + 3) & ~3) * 3 / 4;
     begin = cursor = Tcl_SetByteArrayLength(resultObj, size);
@@ -3002,21 +3004,22 @@ BinaryDecode64(
     return TCL_OK;
 
   bad64:
-    {
+    if (pure) {
+	ch = c;
+    } else {
 	/* The decoder is byte-oriented. If we saw a byte that's not a
 	 * valid member of the base64 alphabet, it could be the lead byte
 	 * of a multi-byte character. */
-	Tcl_UniChar ch;
 
 	/* Safe because we know data is NUL-terminated */
 	TclUtfToUniChar((const char *)(data - 1), &ch);
-
-	Tcl_SetObjResult(interp, Tcl_ObjPrintf(
-		"invalid base64 character \"%c\" at position %d", ch,
-		(int) (data - datastart - 1)));
-	TclDecrRefCount(resultObj);
-	return TCL_ERROR;
     }
+
+    Tcl_SetObjResult(interp, Tcl_ObjPrintf(
+	    "invalid base64 character \"%c\" at position %d", ch,
+	    (int) (data - datastart - 1)));
+    TclDecrRefCount(resultObj);
+    return TCL_ERROR;
 }
 
 /*
-- 
cgit v0.12


From 65c59cb146dd2ee5d7336f95cd8e3323661b7c21 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Sat, 28 Mar 2020 18:23:43 +0000
Subject: Missing error codes from decoder routines.

---
 generic/tclBinary.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/generic/tclBinary.c b/generic/tclBinary.c
index 33c8382..a85c045 100644
--- a/generic/tclBinary.c
+++ b/generic/tclBinary.c
@@ -2443,6 +2443,7 @@ BinaryDecodeHex(
     Tcl_SetObjResult(interp, Tcl_ObjPrintf(
 	    "invalid hexadecimal digit \"%c\" at position %d",
 	    c, (int) (data - datastart - 1)));
+    Tcl_SetErrorCode(interp, "TCL", "BINARY", "DECODE", "INVALID", NULL);
     return TCL_ERROR;
 }
 
@@ -3018,6 +3019,7 @@ BinaryDecode64(
     Tcl_SetObjResult(interp, Tcl_ObjPrintf(
 	    "invalid base64 character \"%c\" at position %d", ch,
 	    (int) (data - datastart - 1)));
+    Tcl_SetErrorCode(interp, "TCL", "BINARY", "DECODE", "INVALID", NULL);
     TclDecrRefCount(resultObj);
     return TCL_ERROR;
 }
-- 
cgit v0.12


From beae292420f249c522dbbf9526de96f5f31bd71b Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Sat, 28 Mar 2020 18:28:42 +0000
Subject: Revise comment that was a plain lie.

---
 generic/tclBinary.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/generic/tclBinary.c b/generic/tclBinary.c
index a85c045..ecd3c6b 100644
--- a/generic/tclBinary.c
+++ b/generic/tclBinary.c
@@ -2452,16 +2452,10 @@ BinaryDecodeHex(
  *
  * BinaryEncode64 --
  *
- *	This implements a generic 6 bit binary encoding. Input is broken into
- *	6 bit chunks and a lookup table passed in via clientData is used to
- *	turn these values into output characters. This is used to implement
- *	base64 binary encodings.
+ *	This procedure implements the "binary encode base64" Tcl command.
  *
  * Results:
- *	Interp result set to an encoded byte array object
- *
- * Side effects:
- *	None
+ *	The base64 encoded value prescribed by the input arguments.
  *
  *----------------------------------------------------------------------
  */
-- 
cgit v0.12


From 65c57e8170c646d921d1400c3dcbc04cbd8a3372 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Sat, 28 Mar 2020 18:52:16 +0000
Subject: Make sure maxlen value does not rely on ordering of options.

---
 generic/tclBinary.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/generic/tclBinary.c b/generic/tclBinary.c
index ecd3c6b..93aeee3 100644
--- a/generic/tclBinary.c
+++ b/generic/tclBinary.c
@@ -2517,12 +2517,12 @@ BinaryEncode64(
 	    break;
 	case OPT_WRAPCHAR:
 	    wrapchar = Tcl_GetStringFromObj(objv[i + 1], &wrapcharlen);
-	    if (wrapcharlen == 0) {
-		maxlen = 0;
-	    }
 	    break;
 	}
     }
+    if (wrapcharlen == 0) {
+	maxlen = 0;
+    }
 
     resultObj = Tcl_NewObj();
     data = Tcl_GetByteArrayFromObj(objv[objc - 1], &count);
-- 
cgit v0.12


From a32dffe303445fed997b22794752d372bd997399 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Sat, 28 Mar 2020 19:21:56 +0000
Subject: [8edfcedfa0] [binary encode base64] build a string instead of a
 bytearray whenever it might be required to get the right result.

---
 generic/tclBinary.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/generic/tclBinary.c b/generic/tclBinary.c
index 93aeee3..e4d7365 100644
--- a/generic/tclBinary.c
+++ b/generic/tclBinary.c
@@ -2484,11 +2484,11 @@ BinaryEncode64(
     Tcl_Obj *const objv[])
 {
     Tcl_Obj *resultObj;
-    unsigned char *data, *cursor, *limit;
+    unsigned char *data, *limit;
     int maxlen = 0;
     const char *wrapchar = "\n";
     int wrapcharlen = 1;
-    int offset, i, index, size, outindex = 0, count = 0;
+    int offset, i, index, size, outindex = 0, count = 0, purewrap = 1;
     enum { OPT_MAXLEN, OPT_WRAPCHAR };
     static const char *const optStrings[] = { "-maxlen", "-wrapchar", NULL };
 
@@ -2516,7 +2516,13 @@ BinaryEncode64(
 	    }
 	    break;
 	case OPT_WRAPCHAR:
-	    wrapchar = Tcl_GetStringFromObj(objv[i + 1], &wrapcharlen);
+	    purewrap = TclIsPureByteArray(objv[i + 1]);
+	    if (purewrap) {
+		wrapchar = (const char *) Tcl_GetByteArrayFromObj(
+			objv[i + 1], &wrapcharlen);
+	    } else {
+		wrapchar = Tcl_GetStringFromObj(objv[i + 1], &wrapcharlen);
+	    }
 	    break;
 	}
     }
@@ -2527,6 +2533,8 @@ BinaryEncode64(
     resultObj = Tcl_NewObj();
     data = Tcl_GetByteArrayFromObj(objv[objc - 1], &count);
     if (count > 0) {
+	unsigned char *cursor = NULL;
+
 	size = (((count * 4) / 3) + 3) & ~3;	/* ensure 4 byte chunks */
 	if (maxlen > 0 && size > maxlen) {
 	    int adjusted = size + (wrapcharlen * (size / maxlen));
@@ -2535,8 +2543,17 @@ BinaryEncode64(
 		adjusted -= wrapcharlen;
 	    }
 	    size = adjusted;
+
+	    if (purewrap == 0) {
+		/* Wrapchar is (possibly) non-byte, so build result as
+		 * general string, not bytearray */
+		Tcl_SetObjLength(resultObj, size);
+		cursor = (unsigned char *) TclGetString(resultObj);
+	    }
+	}
+	if (cursor == NULL) {
+	    cursor = Tcl_SetByteArrayLength(resultObj, size);
 	}
-	cursor = Tcl_SetByteArrayLength(resultObj, size);
 	limit = cursor + size;
 	for (offset = 0; offset < count; offset += 3) {
 	    unsigned char d[3] = {0, 0, 0};
-- 
cgit v0.12


From e7e6720ba42b35b01294a33db00b2d80a3ca1fab Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Sat, 28 Mar 2020 19:25:54 +0000
Subject: Add a test for fixed bug.

---
 tests/binary.test | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/binary.test b/tests/binary.test
index a2a9144..399a07c 100644
--- a/tests/binary.test
+++ b/tests/binary.test
@@ -2624,6 +2624,9 @@ test binary-72.27 {binary encode base64} -body {
 test binary-72.28 {binary encode base64} -body {
     binary encode base64 -maxlen 6 -wrapchar 0123456789 abcabcabc
 } -result {YWJjYW0123456789JjYWJj}
+test binary-72.29 {binary encode base64} {
+    string length [binary encode base64 -maxlen 3 -wrapchar \xca abc]
+} 5
 
 test binary-73.1 {binary decode base64} -body {
     binary decode base64
-- 
cgit v0.12