From 29cb05c855a633d5df7fba523faa62f2a3d027e4 Mon Sep 17 00:00:00 2001 From: dgp Date: Sat, 28 Mar 2020 17:35:23 +0000 Subject: Optimize base64 decoder to work on bytearrays without string generation. --- generic/tclBinary.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 7db4f9e..33c8382 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -2878,8 +2878,9 @@ BinaryDecode64( unsigned char *data, *datastart, *dataend, c = '\0'; unsigned char *begin = NULL; unsigned char *cursor = NULL; - int strict = 0; + int pure, strict = 0; int i, index, size, cut = 0, count = 0; + Tcl_UniChar ch; enum { OPT_STRICT }; static const char *const optStrings[] = { "-strict", NULL }; @@ -2900,8 +2901,9 @@ BinaryDecode64( } TclNewObj(resultObj); - datastart = data = (unsigned char *) - TclGetStringFromObj(objv[objc - 1], &count); + pure = TclIsPureByteArray(objv[objc - 1]); + datastart = data = pure ? Tcl_GetByteArrayFromObj(objv[objc - 1], &count) + : (unsigned char *) TclGetStringFromObj(objv[objc - 1], &count); dataend = data + count; size = ((count + 3) & ~3) * 3 / 4; begin = cursor = Tcl_SetByteArrayLength(resultObj, size); @@ -3002,21 +3004,22 @@ BinaryDecode64( return TCL_OK; bad64: - { + if (pure) { + ch = c; + } else { /* The decoder is byte-oriented. If we saw a byte that's not a * valid member of the base64 alphabet, it could be the lead byte * of a multi-byte character. */ - Tcl_UniChar ch; /* Safe because we know data is NUL-terminated */ TclUtfToUniChar((const char *)(data - 1), &ch); - - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "invalid base64 character \"%c\" at position %d", ch, - (int) (data - datastart - 1))); - TclDecrRefCount(resultObj); - return TCL_ERROR; } + + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "invalid base64 character \"%c\" at position %d", ch, + (int) (data - datastart - 1))); + TclDecrRefCount(resultObj); + return TCL_ERROR; } /* -- cgit v0.12 From 65c59cb146dd2ee5d7336f95cd8e3323661b7c21 Mon Sep 17 00:00:00 2001 From: dgp Date: Sat, 28 Mar 2020 18:23:43 +0000 Subject: Missing error codes from decoder routines. --- generic/tclBinary.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 33c8382..a85c045 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -2443,6 +2443,7 @@ BinaryDecodeHex( Tcl_SetObjResult(interp, Tcl_ObjPrintf( "invalid hexadecimal digit \"%c\" at position %d", c, (int) (data - datastart - 1))); + Tcl_SetErrorCode(interp, "TCL", "BINARY", "DECODE", "INVALID", NULL); return TCL_ERROR; } @@ -3018,6 +3019,7 @@ BinaryDecode64( Tcl_SetObjResult(interp, Tcl_ObjPrintf( "invalid base64 character \"%c\" at position %d", ch, (int) (data - datastart - 1))); + Tcl_SetErrorCode(interp, "TCL", "BINARY", "DECODE", "INVALID", NULL); TclDecrRefCount(resultObj); return TCL_ERROR; } -- cgit v0.12 From beae292420f249c522dbbf9526de96f5f31bd71b Mon Sep 17 00:00:00 2001 From: dgp Date: Sat, 28 Mar 2020 18:28:42 +0000 Subject: Revise comment that was a plain lie. --- generic/tclBinary.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/generic/tclBinary.c b/generic/tclBinary.c index a85c045..ecd3c6b 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -2452,16 +2452,10 @@ BinaryDecodeHex( * * BinaryEncode64 -- * - * This implements a generic 6 bit binary encoding. Input is broken into - * 6 bit chunks and a lookup table passed in via clientData is used to - * turn these values into output characters. This is used to implement - * base64 binary encodings. + * This procedure implements the "binary encode base64" Tcl command. * * Results: - * Interp result set to an encoded byte array object - * - * Side effects: - * None + * The base64 encoded value prescribed by the input arguments. * *---------------------------------------------------------------------- */ -- cgit v0.12 From 65c57e8170c646d921d1400c3dcbc04cbd8a3372 Mon Sep 17 00:00:00 2001 From: dgp Date: Sat, 28 Mar 2020 18:52:16 +0000 Subject: Make sure maxlen value does not rely on ordering of options. --- generic/tclBinary.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/generic/tclBinary.c b/generic/tclBinary.c index ecd3c6b..93aeee3 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -2517,12 +2517,12 @@ BinaryEncode64( break; case OPT_WRAPCHAR: wrapchar = Tcl_GetStringFromObj(objv[i + 1], &wrapcharlen); - if (wrapcharlen == 0) { - maxlen = 0; - } break; } } + if (wrapcharlen == 0) { + maxlen = 0; + } resultObj = Tcl_NewObj(); data = Tcl_GetByteArrayFromObj(objv[objc - 1], &count); -- cgit v0.12 From a32dffe303445fed997b22794752d372bd997399 Mon Sep 17 00:00:00 2001 From: dgp Date: Sat, 28 Mar 2020 19:21:56 +0000 Subject: [8edfcedfa0] [binary encode base64] build a string instead of a bytearray whenever it might be required to get the right result. --- generic/tclBinary.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 93aeee3..e4d7365 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -2484,11 +2484,11 @@ BinaryEncode64( Tcl_Obj *const objv[]) { Tcl_Obj *resultObj; - unsigned char *data, *cursor, *limit; + unsigned char *data, *limit; int maxlen = 0; const char *wrapchar = "\n"; int wrapcharlen = 1; - int offset, i, index, size, outindex = 0, count = 0; + int offset, i, index, size, outindex = 0, count = 0, purewrap = 1; enum { OPT_MAXLEN, OPT_WRAPCHAR }; static const char *const optStrings[] = { "-maxlen", "-wrapchar", NULL }; @@ -2516,7 +2516,13 @@ BinaryEncode64( } break; case OPT_WRAPCHAR: - wrapchar = Tcl_GetStringFromObj(objv[i + 1], &wrapcharlen); + purewrap = TclIsPureByteArray(objv[i + 1]); + if (purewrap) { + wrapchar = (const char *) Tcl_GetByteArrayFromObj( + objv[i + 1], &wrapcharlen); + } else { + wrapchar = Tcl_GetStringFromObj(objv[i + 1], &wrapcharlen); + } break; } } @@ -2527,6 +2533,8 @@ BinaryEncode64( resultObj = Tcl_NewObj(); data = Tcl_GetByteArrayFromObj(objv[objc - 1], &count); if (count > 0) { + unsigned char *cursor = NULL; + size = (((count * 4) / 3) + 3) & ~3; /* ensure 4 byte chunks */ if (maxlen > 0 && size > maxlen) { int adjusted = size + (wrapcharlen * (size / maxlen)); @@ -2535,8 +2543,17 @@ BinaryEncode64( adjusted -= wrapcharlen; } size = adjusted; + + if (purewrap == 0) { + /* Wrapchar is (possibly) non-byte, so build result as + * general string, not bytearray */ + Tcl_SetObjLength(resultObj, size); + cursor = (unsigned char *) TclGetString(resultObj); + } + } + if (cursor == NULL) { + cursor = Tcl_SetByteArrayLength(resultObj, size); } - cursor = Tcl_SetByteArrayLength(resultObj, size); limit = cursor + size; for (offset = 0; offset < count; offset += 3) { unsigned char d[3] = {0, 0, 0}; -- cgit v0.12 From e7e6720ba42b35b01294a33db00b2d80a3ca1fab Mon Sep 17 00:00:00 2001 From: dgp Date: Sat, 28 Mar 2020 19:25:54 +0000 Subject: Add a test for fixed bug. --- tests/binary.test | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/binary.test b/tests/binary.test index a2a9144..399a07c 100644 --- a/tests/binary.test +++ b/tests/binary.test @@ -2624,6 +2624,9 @@ test binary-72.27 {binary encode base64} -body { test binary-72.28 {binary encode base64} -body { binary encode base64 -maxlen 6 -wrapchar 0123456789 abcabcabc } -result {YWJjYW0123456789JjYWJj} +test binary-72.29 {binary encode base64} { + string length [binary encode base64 -maxlen 3 -wrapchar \xca abc] +} 5 test binary-73.1 {binary decode base64} -body { binary decode base64 -- cgit v0.12