From 74657995c7bd5b067100a26e387306888f5f6134 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 27 Mar 2020 21:21:28 +0000 Subject: Improve error reporting. If codepoint looks negative, bad char is reported as REPLACEMENT CHARACTER, which is wrong, therefore not helpful. --- generic/tclBinary.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 81c56e7..1dd1081 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -3009,7 +3009,7 @@ BinaryDecode64( bad64: Tcl_SetObjResult(interp, Tcl_ObjPrintf( "invalid base64 character \"%c\" at position %d", - (char) c, (int) (data - datastart - 1))); + c, (int) (data - datastart - 1))); TclDecrRefCount(resultObj); return TCL_ERROR; } -- cgit v0.12 From 3cd192c81cef20472a54a3c9afefc28361a0e316 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 27 Mar 2020 21:52:42 +0000 Subject: Further improvement. Report invalid multi-byte characters accurately. --- generic/tclBinary.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 1dd1081..3c47843 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -3007,11 +3007,21 @@ BinaryDecode64( return TCL_OK; bad64: - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "invalid base64 character \"%c\" at position %d", - c, (int) (data - datastart - 1))); - TclDecrRefCount(resultObj); - return TCL_ERROR; + { + /* The decoder is byte-oriented. If we saw a byte that's not a + * valid member of the base64 alphabet, it could be the lead byte + * of a multi-byte character. */ + Tcl_UniChar ch; + + /* Safe because we know data is NUL-terminated */ + TclUtfToUniChar((const char *)(data - 1), &ch); + + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "invalid base64 character \"%c\" at position %d", ch, + (int) (data - datastart - 1))); + TclDecrRefCount(resultObj); + return TCL_ERROR; + } } /* -- cgit v0.12 From c619f65f15cd0877658ae9a0c0e3748b8fc0896b Mon Sep 17 00:00:00 2001 From: dgp Date: Sat, 28 Mar 2020 15:35:12 +0000 Subject: [ffeb2097af] Restore the standard and original practice of ignoring invalid characters when decoding base64. Error only in -strict mode. See RFC 2045. --- doc/binary.n | 11 +++++++---- generic/tclBinary.c | 9 ++------- tests/binary.test | 5 ++++- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/doc/binary.n b/doc/binary.n index 261b765..1f93c03 100644 --- a/doc/binary.n +++ b/doc/binary.n @@ -78,7 +78,9 @@ During decoding, the following options are supported: .TP \fB\-strict\fR . -Instructs the decoder to throw an error if it encounters whitespace characters. Otherwise it ignores them. +Instructs the decoder to throw an error if it encounters any characters +that are not strictly part of the encoding itself. Otherwise it ignores them. +RFC 2045 calls for base64 decoders to be non-strict. .RE .TP \fBhex\fR @@ -92,7 +94,8 @@ options are supported: .TP \fB\-strict\fR . -Instructs the decoder to throw an error if it encounters whitespace characters. Otherwise it ignores them. +Instructs the decoder to throw an error if it encounters whitespace characters. +Otherwise it ignores them. .RE .TP \fBuuencode\fR @@ -122,8 +125,8 @@ During decoding, the following options are supported: .TP \fB\-strict\fR . -Instructs the decoder to throw an error if it encounters unexpected whitespace -characters. Otherwise it ignores them. +Instructs the decoder to throw an error if it encounters unexpected +whitespace characters. Otherwise it ignores them. .PP Note that neither the encoder nor the decoder handle the header and footer of the uuencode format. diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 3c47843..7db4f9e 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -2951,7 +2951,7 @@ BinaryDecode64( if (c == '=' && i > 1) { value <<= 6; cut++; - } else if (!strict && TclIsSpaceProc(c)) { + } else if (!strict) { i--; } else { goto bad64; @@ -2975,7 +2975,7 @@ BinaryDecode64( if (i) { cut++; } - } else if (strict || !TclIsSpaceProc(c)) { + } else if (strict) { goto bad64; } else { i--; @@ -2995,11 +2995,6 @@ BinaryDecode64( if (strict) { goto bad64; } - for (; data < dataend; data++) { - if (!TclIsSpaceProc(*data)) { - goto bad64; - } - } } } Tcl_SetByteArrayLength(resultObj, cursor - begin - cut); diff --git a/tests/binary.test b/tests/binary.test index 6eb8d87..a2a9144 100644 --- a/tests/binary.test +++ b/tests/binary.test @@ -2709,7 +2709,7 @@ test binary-73.30 {binary decode base64} -body { list [string length [set r [binary decode base64 -strict WFla\n]]] $r } -returnCodes error -match glob -result {invalid base64 character *} test binary-73.31 {binary decode base64} -body { - list [string length [set r [binary decode base64 WA==WFla]]] $r + list [string length [set r [binary decode base64 -strict WA==WFla]]] $r } -returnCodes error -match glob -result {invalid base64 character *} test binary-73.32 {binary decode base64, bug [00d04c4f12]} -body { list \ @@ -2751,6 +2751,9 @@ test binary-73.36 {binary decode base64: check encoded & decoded equals original } join $r \n } -result {} +test binary-73.37 {binary decode base64: Bug ffeb2097af} { + binary decode base64 [binary encode base64 -maxlen 3 -wrapchar : abc] +} abc test binary-74.1 {binary encode uuencode} -body { binary encode uuencode -- cgit v0.12