From 9a007031fac7f6b4f05a6b7c2bfcb123d823decf Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 27 Mar 2020 10:33:08 +0000 Subject: Fix [767e070d35]: Tcl_GetRange and Tcl_GetUniChar do not validate index inputs --- doc/StringObj.3 | 3 +- generic/tclCmdMZ.c | 2 +- generic/tclInt.h | 2 +- generic/tclStringObj.c | 177 ++++++++++++++++++++++++++++++++----------------- 4 files changed, 119 insertions(+), 65 deletions(-) diff --git a/doc/StringObj.3 b/doc/StringObj.3 index c23706f..7870b21 100644 --- a/doc/StringObj.3 +++ b/doc/StringObj.3 @@ -204,7 +204,8 @@ where the caller does not need the length of the unicode string representation. .PP \fBTcl_GetUniChar\fR returns the \fIindex\fR'th character in the -value's Unicode representation. +value's Unicode representation. The index is assumed to be in the +appropriate range. .PP \fBTcl_GetRange\fR returns a newly created value comprised of the characters between \fIfirst\fR and \fIlast\fR (inclusive) in the diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 75989c7..d344678 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -2446,7 +2446,7 @@ StringRevCmd( return TCL_ERROR; } - Tcl_SetObjResult(interp, TclStringObjReverse(objv[1])); + Tcl_SetObjResult(interp, TclStringReverse(objv[1])); return TCL_OK; } diff --git a/generic/tclInt.h b/generic/tclInt.h index 39fb740..69cebe5 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3168,7 +3168,7 @@ MODULE_SCOPE int TclStringMatch(const char *str, int strLen, const char *pattern, int ptnLen, int flags); MODULE_SCOPE int TclStringMatchObj(Tcl_Obj *stringObj, Tcl_Obj *patternObj, int flags); -MODULE_SCOPE Tcl_Obj * TclStringObjReverse(Tcl_Obj *objPtr); +MODULE_SCOPE Tcl_Obj * TclStringReverse(Tcl_Obj *objPtr); MODULE_SCOPE void TclSubstCompile(Tcl_Interp *interp, const char *bytes, int numBytes, int flags, int line, struct CompileEnv *envPtr); diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 1534a8b..d4f45d7 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -149,9 +149,9 @@ GrowStringBuffer( objPtr->bytes = NULL; } if (flag == 0 || stringPtr->allocated > 0) { - attempt = 2 * needed; - if (attempt >= 0) { - ptr = attemptckrealloc(objPtr->bytes, attempt + 1); + if (needed <= INT_MAX / 2) { + attempt = 2 * needed; + ptr = (char *)attemptckrealloc(objPtr->bytes, attempt + 1); } if (ptr == NULL) { /* @@ -164,7 +164,7 @@ GrowStringBuffer( int growth = (int) ((extra > limit) ? limit : extra); attempt = needed + growth; - ptr = attemptckrealloc(objPtr->bytes, attempt + 1); + ptr = (char *)attemptckrealloc(objPtr->bytes, attempt + 1); } } if (ptr == NULL) { @@ -173,7 +173,7 @@ GrowStringBuffer( */ attempt = needed; - ptr = ckrealloc(objPtr->bytes, attempt + 1); + ptr = (char *)ckrealloc(objPtr->bytes, attempt + 1); } objPtr->bytes = ptr; stringPtr->allocated = attempt; @@ -199,8 +199,8 @@ GrowUnicodeBuffer( * Subsequent appends - apply the growth algorithm. */ - attempt = 2 * needed; - if (attempt >= 0 && attempt <= STRING_MAXCHARS) { + if (needed <= STRING_MAXCHARS / 2) { + attempt = 2 * needed; ptr = stringAttemptRealloc(stringPtr, attempt); } if (ptr == NULL) { @@ -418,6 +418,15 @@ Tcl_GetCharLength( int numChars; /* + * Quick, no-shimmer return for short string reps. + */ + + if ((objPtr->bytes) && (objPtr->length < 2)) { + /* 0 bytes -> 0 chars; 1 byte -> 1 char */ + return objPtr->length; + } + + /* * Optimize the case where we're really dealing with a bytearray object; * we don't need to convert to a string to perform the get-length operation. * @@ -434,7 +443,6 @@ Tcl_GetCharLength( return length; } - /* * OK, need to work with the object as a string. */ @@ -465,8 +473,6 @@ Tcl_GetCharLength( } return numChars; } - - /* *---------------------------------------------------------------------- @@ -486,8 +492,8 @@ Tcl_GetCharLength( */ int TclCheckEmptyString ( - Tcl_Obj *objPtr -) { + Tcl_Obj *objPtr) +{ int length = -1; if (objPtr->bytes == tclEmptyStringRep) { @@ -515,8 +521,8 @@ TclCheckEmptyString ( * * Tcl_GetUniChar -- * - * Get the index'th Unicode character from the String object. The index - * is assumed to be in the appropriate range. + * Get the index'th Unicode character from the String object. If index + * is out of range, the result = 0xFFFD; * * Results: * Returns the index'th Unicode character in the Object. @@ -534,15 +540,22 @@ Tcl_GetUniChar( int index) /* Get the index'th Unicode character. */ { String *stringPtr; + int length; + + if (index < 0) { + return 0xFFFD; + } /* * Optimize the case where we're really dealing with a bytearray object - * without string representation; we don't need to convert to a string to - * perform the indexing operation. + * we don't need to convert to a string to perform the indexing operation. */ if (TclIsPureByteArray(objPtr)) { - unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, NULL); + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); + if (index >= length) { + return 0xFFFD; + } return (Tcl_UniChar) bytes[index]; } @@ -568,6 +581,10 @@ Tcl_GetUniChar( FillUnicodeRep(objPtr); stringPtr = GET_STRING(objPtr); } + + if (index >= stringPtr->numChars) { + return 0xFFFD; + } return stringPtr->unicode[index]; } @@ -668,17 +685,27 @@ Tcl_GetRange( { Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ String *stringPtr; + int length; + + if (first < 0) { + first = 0; + } /* * Optimize the case where we're really dealing with a bytearray object - * without string representation; we don't need to convert to a string to - * perform the substring operation. + * we don't need to convert to a string to perform the substring operation. */ if (TclIsPureByteArray(objPtr)) { - unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, NULL); + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); - return Tcl_NewByteArrayObj(bytes+first, last-first+1); + if (last >= length) { + last = length - 1; + } + if (last < first) { + return Tcl_NewObj(); + } + return Tcl_NewByteArrayObj(bytes + first, last - first + 1); } /* @@ -697,6 +724,12 @@ Tcl_GetRange( TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length); } if (stringPtr->numChars == objPtr->length) { + if (last >= stringPtr->numChars) { + last = stringPtr->numChars - 1; + } + if (last < first) { + return Tcl_NewObj(); + } newObjPtr = Tcl_NewStringObj(objPtr->bytes + first, last-first+1); /* @@ -711,19 +744,25 @@ Tcl_GetRange( FillUnicodeRep(objPtr); stringPtr = GET_STRING(objPtr); } - + if (last > stringPtr->numChars) { + last = stringPtr->numChars; + } + if (last < first) { + return Tcl_NewObj(); + } #if TCL_UTF_MAX == 4 - /* See: bug [11ae2be95dac9417] */ - if ((first>0) && ((stringPtr->unicode[first]&0xFC00) == 0xDC00) - && ((stringPtr->unicode[first-1]&0xFC00) == 0xD800)) { - ++first; - } - if ((last+1numChars) && ((stringPtr->unicode[last+1]&0xFC00) == 0xDC00) - && ((stringPtr->unicode[last]&0xFC00) == 0xD800)) { - ++last; - } + /* See: bug [11ae2be95dac9417] */ + if ((first > 0) && ((stringPtr->unicode[first] & 0xFC00) == 0xDC00) + && ((stringPtr->unicode[first-1] & 0xFC00) == 0xD800)) { + ++first; + } + if ((last + 1 < stringPtr->numChars) + && ((stringPtr->unicode[last+1] & 0xFC00) == 0xDC00) + && ((stringPtr->unicode[last] & 0xFC00) == 0xD800)) { + ++last; + } #endif - return Tcl_NewUnicodeObj(stringPtr->unicode + first, last-first+1); + return Tcl_NewUnicodeObj(stringPtr->unicode + first, last - first + 1); } /* @@ -840,9 +879,9 @@ Tcl_SetObjLength( * Need to enlarge the buffer. */ if (objPtr->bytes == tclEmptyStringRep) { - objPtr->bytes = ckalloc(length + 1); + objPtr->bytes = (char *)ckalloc(length + 1); } else { - objPtr->bytes = ckrealloc(objPtr->bytes, length + 1); + objPtr->bytes = (char *)ckrealloc(objPtr->bytes, length + 1); } stringPtr->allocated = length; } @@ -946,9 +985,9 @@ Tcl_AttemptSetObjLength( char *newBytes; if (objPtr->bytes == tclEmptyStringRep) { - newBytes = attemptckalloc(length + 1); + newBytes = (char *)attemptckalloc(length + 1); } else { - newBytes = attemptckrealloc(objPtr->bytes, length + 1); + newBytes = (char *)attemptckrealloc(objPtr->bytes, length + 1); } if (newBytes == NULL) { return 0; @@ -1291,39 +1330,45 @@ Tcl_AppendObjToObj( if ((TclIsPureByteArray(objPtr) || objPtr->bytes == tclEmptyStringRep) && TclIsPureByteArray(appendObjPtr)) { - /* * You might expect the code here to be * * bytes = Tcl_GetByteArrayFromObj(appendObjPtr, &length); * TclAppendBytesToByteArray(objPtr, bytes, length); * - * and essentially all of the time that would be fine. However, - * it would run into trouble in the case where objPtr and - * appendObjPtr point to the same thing. That may never be a - * good idea. It seems to violate Copy On Write, and we don't - * have any tests for the situation, since making any Tcl commands - * that call Tcl_AppendObjToObj() do that appears impossible - * (They honor Copy On Write!). For the sake of extensions that - * go off into that realm, though, here's a more complex approach - * that can handle all the cases. + * and essentially all of the time that would be fine. However, it + * would run into trouble in the case where objPtr and appendObjPtr + * point to the same thing. That may never be a good idea. It seems to + * violate Copy On Write, and we don't have any tests for the + * situation, since making any Tcl commands that call + * Tcl_AppendObjToObj() do that appears impossible (They honor Copy On + * Write!). For the sake of extensions that go off into that realm, + * though, here's a more complex approach that can handle all the + * cases. + * + * First, get the lengths. */ - /* Get lengths */ int lengthSrc; (void) Tcl_GetByteArrayFromObj(objPtr, &length); (void) Tcl_GetByteArrayFromObj(appendObjPtr, &lengthSrc); - /* Grow buffer enough for the append */ + /* + * Grow buffer enough for the append. + */ + TclAppendBytesToByteArray(objPtr, NULL, lengthSrc); - /* Reset objPtr back to the original value */ + /* + * Reset objPtr back to the original value. + */ + Tcl_SetByteArrayLength(objPtr, length); /* - * Now do the append knowing that buffer growth cannot cause - * any trouble. + * Now do the append knowing that buffer growth cannot cause any + * trouble. */ TclAppendBytesToByteArray(objPtr, @@ -1375,6 +1420,7 @@ Tcl_AppendObjToObj( numChars = stringPtr->numChars; if ((numChars >= 0) && (appendObjPtr->typePtr == &tclStringType)) { String *appendStringPtr = GET_STRING(appendObjPtr); + appendNumChars = appendStringPtr->numChars; } @@ -2458,7 +2504,7 @@ Tcl_AppendFormatToObj( /* *--------------------------------------------------------------------------- * - * Tcl_Format-- + * Tcl_Format -- * * Results: * A refcount zero Tcl_Obj. @@ -2723,7 +2769,7 @@ TclGetStringStorage( /* *--------------------------------------------------------------------------- * - * TclStringObjReverse -- + * TclStringReverse -- * * Implements the [string reverse] operation. * @@ -2742,18 +2788,20 @@ static void ReverseBytes( unsigned char *to, /* Copy bytes into here... */ unsigned char *from, /* ...from here... */ - int count) /* Until this many are copied, */ + int count) /* Until this many are copied, */ /* reversing as you go. */ { unsigned char *src = from + count; + if (to == from) { /* Reversing in place */ while (--src > to) { unsigned char c = *src; + *src = *to; *to++ = c; } - } else { + } else { while (--src >= from) { *to++ = *src; } @@ -2761,7 +2809,7 @@ ReverseBytes( } Tcl_Obj * -TclStringObjReverse( +TclStringReverse( Tcl_Obj *objPtr) { String *stringPtr; @@ -2800,7 +2848,10 @@ TclStringObjReverse( *to++ = *src; } } else { - /* Reversing in place */ + /* + * Reversing in place. + */ + while (--src > from) { ch = *src; *src = *from; @@ -2824,20 +2875,22 @@ TclStringObjReverse( /* * Either numChars == -1 and we don't know how many chars are * represented by objPtr->bytes and we need Pass 1 just in case, - * or numChars >= 0 and we know we have fewer chars than bytes, - * so we know there's a multibyte character needing Pass 1. + * or numChars >= 0 and we know we have fewer chars than bytes, so + * we know there's a multibyte character needing Pass 1. * * Pass 1. Reverse the bytes of each multi-byte character. */ + int charCount = 0; int bytesLeft = numBytes; while (bytesLeft) { /* - * NOTE: We know that the from buffer is NUL-terminated. - * It's part of the contract for objPtr->bytes values. - * Thus, we can skip calling Tcl_UtfCharComplete() here. + * NOTE: We know that the from buffer is NUL-terminated. It's + * part of the contract for objPtr->bytes values. Thus, we can + * skip calling Tcl_UtfCharComplete() here. */ + int bytesInChar = TclUtfToUniChar(from, &ch); ReverseBytes((unsigned char *)to, (unsigned char *)from, -- cgit v0.12 From 5c2d281edc798baa53966b4b5a9eadcb6a2e9ac9 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 27 Mar 2020 12:05:39 +0000 Subject: Fix harmless gcc warning, when compiling on Linux --- unix/tclUnixPort.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/unix/tclUnixPort.h b/unix/tclUnixPort.h index 9961613..20e1ddc 100644 --- a/unix/tclUnixPort.h +++ b/unix/tclUnixPort.h @@ -118,11 +118,11 @@ typedef off_t Tcl_SeekOffset; extern int TclOSstat(const char *name, void *statBuf); extern int TclOSlstat(const char *name, void *statBuf); #elif defined(HAVE_STRUCT_STAT64) && !defined(__APPLE__) -# define TclOSstat stat64 -# define TclOSlstat lstat64 +# define TclOSstat(name, buf) stat64(name, (struct stat64 *)buf) +# define TclOSlstat(name,buf) lstat64(name, (struct stat64 *)buf) #else -# define TclOSstat stat -# define TclOSlstat lstat +# define TclOSstat(name, buf) stat(name, (struct stat *)buf) +# define TclOSlstat(name, buf) lstat(name, (struct stat *)buf) #endif /* -- cgit v0.12 From 0a00f85f84c7117557104cdd16c161c409a6c8d5 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 27 Mar 2020 16:24:30 +0000 Subject: Looks like this little hack is no longer necessary on current 32-bit Cygwin. --- unix/tclUnixPort.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/unix/tclUnixPort.h b/unix/tclUnixPort.h index 20e1ddc..0ae4f25 100644 --- a/unix/tclUnixPort.h +++ b/unix/tclUnixPort.h @@ -86,7 +86,6 @@ typedef off_t Tcl_SeekOffset; #endif #ifdef __CYGWIN__ - /* Make some symbols available without including */ # define DWORD unsigned int # define CP_UTF8 65001 @@ -107,13 +106,7 @@ typedef off_t Tcl_SeekOffset; __declspec(dllimport) extern __stdcall int GetLastError(void); __declspec(dllimport) extern __stdcall int GetFileAttributesW(const WCHAR *); __declspec(dllimport) extern __stdcall int SetFileAttributesW(const WCHAR *, int); - __declspec(dllimport) extern int cygwin_conv_path(int, const void *, void *, int); -/* On Cygwin, the environment is imported from the Cygwin DLL. */ -#ifndef __x86_64__ -# define environ __cygwin_environ - extern char **__cygwin_environ; -#endif # define timezone _timezone extern int TclOSstat(const char *name, void *statBuf); extern int TclOSlstat(const char *name, void *statBuf); -- cgit v0.12 From 2b68efb386e03de81ea2f0c97f7233ad8e0f183d Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 27 Mar 2020 18:02:02 +0000 Subject: Repair bad test labels. --- tests/binary.test | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/binary.test b/tests/binary.test index b872a30..6eb8d87 100644 --- a/tests/binary.test +++ b/tests/binary.test @@ -2637,19 +2637,19 @@ test binary-73.3 {binary decode base64} -body { test binary-73.4 {binary decode base64} -body { binary decode base64 [string repeat YWJj 20] } -result [string repeat abc 20] -test binary-73.5 {binary encode base64} -body { +test binary-73.5 {binary decode base64} -body { binary decode base64 AAECAwQAAQID } -result "\0\1\2\3\4\0\1\2\3" -test binary-73.6 {binary encode base64} -body { +test binary-73.6 {binary decode base64} -body { binary decode base64 AA== } -result "\0" -test binary-73.7 {binary encode base64} -body { +test binary-73.7 {binary decode base64} -body { binary decode base64 AAA= } -result "\0\0" -test binary-73.8 {binary encode base64} -body { +test binary-73.8 {binary decode base64} -body { binary decode base64 AAAA } -result "\0\0\0" -test binary-73.9 {binary encode base64} -body { +test binary-73.9 {binary decode base64} -body { binary decode base64 AAAAAA== } -result "\0\0\0\0" test binary-73.10 {binary decode base64} -body { -- cgit v0.12 From 74657995c7bd5b067100a26e387306888f5f6134 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 27 Mar 2020 21:21:28 +0000 Subject: Improve error reporting. If codepoint looks negative, bad char is reported as REPLACEMENT CHARACTER, which is wrong, therefore not helpful. --- generic/tclBinary.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 81c56e7..1dd1081 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -3009,7 +3009,7 @@ BinaryDecode64( bad64: Tcl_SetObjResult(interp, Tcl_ObjPrintf( "invalid base64 character \"%c\" at position %d", - (char) c, (int) (data - datastart - 1))); + c, (int) (data - datastart - 1))); TclDecrRefCount(resultObj); return TCL_ERROR; } -- cgit v0.12 From 3cd192c81cef20472a54a3c9afefc28361a0e316 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 27 Mar 2020 21:52:42 +0000 Subject: Further improvement. Report invalid multi-byte characters accurately. --- generic/tclBinary.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 1dd1081..3c47843 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -3007,11 +3007,21 @@ BinaryDecode64( return TCL_OK; bad64: - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "invalid base64 character \"%c\" at position %d", - c, (int) (data - datastart - 1))); - TclDecrRefCount(resultObj); - return TCL_ERROR; + { + /* The decoder is byte-oriented. If we saw a byte that's not a + * valid member of the base64 alphabet, it could be the lead byte + * of a multi-byte character. */ + Tcl_UniChar ch; + + /* Safe because we know data is NUL-terminated */ + TclUtfToUniChar((const char *)(data - 1), &ch); + + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "invalid base64 character \"%c\" at position %d", ch, + (int) (data - datastart - 1))); + TclDecrRefCount(resultObj); + return TCL_ERROR; + } } /* -- cgit v0.12 From c619f65f15cd0877658ae9a0c0e3748b8fc0896b Mon Sep 17 00:00:00 2001 From: dgp Date: Sat, 28 Mar 2020 15:35:12 +0000 Subject: [ffeb2097af] Restore the standard and original practice of ignoring invalid characters when decoding base64. Error only in -strict mode. See RFC 2045. --- doc/binary.n | 11 +++++++---- generic/tclBinary.c | 9 ++------- tests/binary.test | 5 ++++- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/doc/binary.n b/doc/binary.n index 261b765..1f93c03 100644 --- a/doc/binary.n +++ b/doc/binary.n @@ -78,7 +78,9 @@ During decoding, the following options are supported: .TP \fB\-strict\fR . -Instructs the decoder to throw an error if it encounters whitespace characters. Otherwise it ignores them. +Instructs the decoder to throw an error if it encounters any characters +that are not strictly part of the encoding itself. Otherwise it ignores them. +RFC 2045 calls for base64 decoders to be non-strict. .RE .TP \fBhex\fR @@ -92,7 +94,8 @@ options are supported: .TP \fB\-strict\fR . -Instructs the decoder to throw an error if it encounters whitespace characters. Otherwise it ignores them. +Instructs the decoder to throw an error if it encounters whitespace characters. +Otherwise it ignores them. .RE .TP \fBuuencode\fR @@ -122,8 +125,8 @@ During decoding, the following options are supported: .TP \fB\-strict\fR . -Instructs the decoder to throw an error if it encounters unexpected whitespace -characters. Otherwise it ignores them. +Instructs the decoder to throw an error if it encounters unexpected +whitespace characters. Otherwise it ignores them. .PP Note that neither the encoder nor the decoder handle the header and footer of the uuencode format. diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 3c47843..7db4f9e 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -2951,7 +2951,7 @@ BinaryDecode64( if (c == '=' && i > 1) { value <<= 6; cut++; - } else if (!strict && TclIsSpaceProc(c)) { + } else if (!strict) { i--; } else { goto bad64; @@ -2975,7 +2975,7 @@ BinaryDecode64( if (i) { cut++; } - } else if (strict || !TclIsSpaceProc(c)) { + } else if (strict) { goto bad64; } else { i--; @@ -2995,11 +2995,6 @@ BinaryDecode64( if (strict) { goto bad64; } - for (; data < dataend; data++) { - if (!TclIsSpaceProc(*data)) { - goto bad64; - } - } } } Tcl_SetByteArrayLength(resultObj, cursor - begin - cut); diff --git a/tests/binary.test b/tests/binary.test index 6eb8d87..a2a9144 100644 --- a/tests/binary.test +++ b/tests/binary.test @@ -2709,7 +2709,7 @@ test binary-73.30 {binary decode base64} -body { list [string length [set r [binary decode base64 -strict WFla\n]]] $r } -returnCodes error -match glob -result {invalid base64 character *} test binary-73.31 {binary decode base64} -body { - list [string length [set r [binary decode base64 WA==WFla]]] $r + list [string length [set r [binary decode base64 -strict WA==WFla]]] $r } -returnCodes error -match glob -result {invalid base64 character *} test binary-73.32 {binary decode base64, bug [00d04c4f12]} -body { list \ @@ -2751,6 +2751,9 @@ test binary-73.36 {binary decode base64: check encoded & decoded equals original } join $r \n } -result {} +test binary-73.37 {binary decode base64: Bug ffeb2097af} { + binary decode base64 [binary encode base64 -maxlen 3 -wrapchar : abc] +} abc test binary-74.1 {binary encode uuencode} -body { binary encode uuencode -- cgit v0.12 From 29cb05c855a633d5df7fba523faa62f2a3d027e4 Mon Sep 17 00:00:00 2001 From: dgp Date: Sat, 28 Mar 2020 17:35:23 +0000 Subject: Optimize base64 decoder to work on bytearrays without string generation. --- generic/tclBinary.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 7db4f9e..33c8382 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -2878,8 +2878,9 @@ BinaryDecode64( unsigned char *data, *datastart, *dataend, c = '\0'; unsigned char *begin = NULL; unsigned char *cursor = NULL; - int strict = 0; + int pure, strict = 0; int i, index, size, cut = 0, count = 0; + Tcl_UniChar ch; enum { OPT_STRICT }; static const char *const optStrings[] = { "-strict", NULL }; @@ -2900,8 +2901,9 @@ BinaryDecode64( } TclNewObj(resultObj); - datastart = data = (unsigned char *) - TclGetStringFromObj(objv[objc - 1], &count); + pure = TclIsPureByteArray(objv[objc - 1]); + datastart = data = pure ? Tcl_GetByteArrayFromObj(objv[objc - 1], &count) + : (unsigned char *) TclGetStringFromObj(objv[objc - 1], &count); dataend = data + count; size = ((count + 3) & ~3) * 3 / 4; begin = cursor = Tcl_SetByteArrayLength(resultObj, size); @@ -3002,21 +3004,22 @@ BinaryDecode64( return TCL_OK; bad64: - { + if (pure) { + ch = c; + } else { /* The decoder is byte-oriented. If we saw a byte that's not a * valid member of the base64 alphabet, it could be the lead byte * of a multi-byte character. */ - Tcl_UniChar ch; /* Safe because we know data is NUL-terminated */ TclUtfToUniChar((const char *)(data - 1), &ch); - - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "invalid base64 character \"%c\" at position %d", ch, - (int) (data - datastart - 1))); - TclDecrRefCount(resultObj); - return TCL_ERROR; } + + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "invalid base64 character \"%c\" at position %d", ch, + (int) (data - datastart - 1))); + TclDecrRefCount(resultObj); + return TCL_ERROR; } /* -- cgit v0.12 From 65c59cb146dd2ee5d7336f95cd8e3323661b7c21 Mon Sep 17 00:00:00 2001 From: dgp Date: Sat, 28 Mar 2020 18:23:43 +0000 Subject: Missing error codes from decoder routines. --- generic/tclBinary.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 33c8382..a85c045 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -2443,6 +2443,7 @@ BinaryDecodeHex( Tcl_SetObjResult(interp, Tcl_ObjPrintf( "invalid hexadecimal digit \"%c\" at position %d", c, (int) (data - datastart - 1))); + Tcl_SetErrorCode(interp, "TCL", "BINARY", "DECODE", "INVALID", NULL); return TCL_ERROR; } @@ -3018,6 +3019,7 @@ BinaryDecode64( Tcl_SetObjResult(interp, Tcl_ObjPrintf( "invalid base64 character \"%c\" at position %d", ch, (int) (data - datastart - 1))); + Tcl_SetErrorCode(interp, "TCL", "BINARY", "DECODE", "INVALID", NULL); TclDecrRefCount(resultObj); return TCL_ERROR; } -- cgit v0.12 From beae292420f249c522dbbf9526de96f5f31bd71b Mon Sep 17 00:00:00 2001 From: dgp Date: Sat, 28 Mar 2020 18:28:42 +0000 Subject: Revise comment that was a plain lie. --- generic/tclBinary.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/generic/tclBinary.c b/generic/tclBinary.c index a85c045..ecd3c6b 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -2452,16 +2452,10 @@ BinaryDecodeHex( * * BinaryEncode64 -- * - * This implements a generic 6 bit binary encoding. Input is broken into - * 6 bit chunks and a lookup table passed in via clientData is used to - * turn these values into output characters. This is used to implement - * base64 binary encodings. + * This procedure implements the "binary encode base64" Tcl command. * * Results: - * Interp result set to an encoded byte array object - * - * Side effects: - * None + * The base64 encoded value prescribed by the input arguments. * *---------------------------------------------------------------------- */ -- cgit v0.12 From 65c57e8170c646d921d1400c3dcbc04cbd8a3372 Mon Sep 17 00:00:00 2001 From: dgp Date: Sat, 28 Mar 2020 18:52:16 +0000 Subject: Make sure maxlen value does not rely on ordering of options. --- generic/tclBinary.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/generic/tclBinary.c b/generic/tclBinary.c index ecd3c6b..93aeee3 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -2517,12 +2517,12 @@ BinaryEncode64( break; case OPT_WRAPCHAR: wrapchar = Tcl_GetStringFromObj(objv[i + 1], &wrapcharlen); - if (wrapcharlen == 0) { - maxlen = 0; - } break; } } + if (wrapcharlen == 0) { + maxlen = 0; + } resultObj = Tcl_NewObj(); data = Tcl_GetByteArrayFromObj(objv[objc - 1], &count); -- cgit v0.12 From a32dffe303445fed997b22794752d372bd997399 Mon Sep 17 00:00:00 2001 From: dgp Date: Sat, 28 Mar 2020 19:21:56 +0000 Subject: [8edfcedfa0] [binary encode base64] build a string instead of a bytearray whenever it might be required to get the right result. --- generic/tclBinary.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 93aeee3..e4d7365 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -2484,11 +2484,11 @@ BinaryEncode64( Tcl_Obj *const objv[]) { Tcl_Obj *resultObj; - unsigned char *data, *cursor, *limit; + unsigned char *data, *limit; int maxlen = 0; const char *wrapchar = "\n"; int wrapcharlen = 1; - int offset, i, index, size, outindex = 0, count = 0; + int offset, i, index, size, outindex = 0, count = 0, purewrap = 1; enum { OPT_MAXLEN, OPT_WRAPCHAR }; static const char *const optStrings[] = { "-maxlen", "-wrapchar", NULL }; @@ -2516,7 +2516,13 @@ BinaryEncode64( } break; case OPT_WRAPCHAR: - wrapchar = Tcl_GetStringFromObj(objv[i + 1], &wrapcharlen); + purewrap = TclIsPureByteArray(objv[i + 1]); + if (purewrap) { + wrapchar = (const char *) Tcl_GetByteArrayFromObj( + objv[i + 1], &wrapcharlen); + } else { + wrapchar = Tcl_GetStringFromObj(objv[i + 1], &wrapcharlen); + } break; } } @@ -2527,6 +2533,8 @@ BinaryEncode64( resultObj = Tcl_NewObj(); data = Tcl_GetByteArrayFromObj(objv[objc - 1], &count); if (count > 0) { + unsigned char *cursor = NULL; + size = (((count * 4) / 3) + 3) & ~3; /* ensure 4 byte chunks */ if (maxlen > 0 && size > maxlen) { int adjusted = size + (wrapcharlen * (size / maxlen)); @@ -2535,8 +2543,17 @@ BinaryEncode64( adjusted -= wrapcharlen; } size = adjusted; + + if (purewrap == 0) { + /* Wrapchar is (possibly) non-byte, so build result as + * general string, not bytearray */ + Tcl_SetObjLength(resultObj, size); + cursor = (unsigned char *) TclGetString(resultObj); + } + } + if (cursor == NULL) { + cursor = Tcl_SetByteArrayLength(resultObj, size); } - cursor = Tcl_SetByteArrayLength(resultObj, size); limit = cursor + size; for (offset = 0; offset < count; offset += 3) { unsigned char d[3] = {0, 0, 0}; -- cgit v0.12 From e7e6720ba42b35b01294a33db00b2d80a3ca1fab Mon Sep 17 00:00:00 2001 From: dgp Date: Sat, 28 Mar 2020 19:25:54 +0000 Subject: Add a test for fixed bug. --- tests/binary.test | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/binary.test b/tests/binary.test index a2a9144..399a07c 100644 --- a/tests/binary.test +++ b/tests/binary.test @@ -2624,6 +2624,9 @@ test binary-72.27 {binary encode base64} -body { test binary-72.28 {binary encode base64} -body { binary encode base64 -maxlen 6 -wrapchar 0123456789 abcabcabc } -result {YWJjYW0123456789JjYWJj} +test binary-72.29 {binary encode base64} { + string length [binary encode base64 -maxlen 3 -wrapchar \xca abc] +} 5 test binary-73.1 {binary decode base64} -body { binary decode base64 -- cgit v0.12