diff options
author | dkf <donal.k.fellows@manchester.ac.uk> | 2012-11-20 12:11:57 (GMT) |
---|---|---|
committer | dkf <donal.k.fellows@manchester.ac.uk> | 2012-11-20 12:11:57 (GMT) |
commit | ce8b0d55e3f2faa869ba2727c5b6362ead7d5472 (patch) | |
tree | 22785c6b7a87eb10e84af4007289ea5016c23818 | |
parent | 094f23c172acca8f32b0888cd536f01fc1daab1b (diff) | |
parent | 2742530e9d84e0347a415ae41cc5057a80a23d35 (diff) | |
download | tcl-ce8b0d55e3f2faa869ba2727c5b6362ead7d5472.zip tcl-ce8b0d55e3f2faa869ba2727c5b6362ead7d5472.tar.gz tcl-ce8b0d55e3f2faa869ba2727c5b6362ead7d5472.tar.bz2 |
[Bug 3033307]: fix [binary decode base64] whitespace handling with '=' suffixes
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | generic/tclBinary.c | 96 | ||||
-rw-r--r-- | tests/binary.test | 21 |
3 files changed, 96 insertions, 27 deletions
@@ -1,3 +1,9 @@ +2012-11-20 Donal K. Fellows <dkf@users.sf.net> + + * generic/tclBinary.c (BinaryDecode64): [Bug 3033307]: Corrected + handling of trailing whitespace when decoding base64. Thanks to Anton + Kovalenko for reporting, and Andy Goth for the fix and tests. + 2012-11-19 Donal K. Fellows <dkf@users.sf.net> * generic/tclExecute.c (INST_STR_RANGE_IMM): [Bug 3588366]: Corrected diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 3d8b24c..5c33308 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -2658,12 +2658,12 @@ BinaryDecode64( Tcl_Obj *const objv[]) { Tcl_Obj *resultObj = NULL; - unsigned char *data, *datastart, *dataend, c; + unsigned char *data, *datastart, *dataend, c = '\0'; unsigned char *begin = NULL; unsigned char *cursor = NULL; int strict = 0; int i, index, size, cut = 0, count = 0; - enum {OPT_STRICT }; + enum { OPT_STRICT }; static const char *const optStrings[] = { "-strict", NULL }; if (objc < 2 || objc > 3) { @@ -2691,43 +2691,85 @@ BinaryDecode64( while (data < dataend) { unsigned long value = 0; - for (i=0 ; i<4 ; i++) { + /* + * Decode the current block. Each base64 block consists of four input + * characters A-Z, a-z, 0-9, +, or /. Each character supplies six bits + * of output data, so each block's output is 24 bits (three bytes) in + * length. The final block can be shorter by one or two bytes, denoted + * by the input ending with one or two ='s, respectively. + */ + + for (i = 0; i < 4; i++) { + /* + * Get the next input character. At end of input, pad with at most + * two ='s. If more than two ='s would be needed, instead discard + * the block read thus far. + */ + if (data < dataend) { c = *data++; + } else if (i > 1) { + c = '='; + } else { + cut += 3; + break; + } - if (c >= 'A' && c <= 'Z') { - value = (value << 6) | ((c - 'A') & 0x3f); - } else if (c >= 'a' && c <= 'z') { - value = (value << 6) | ((c - 'a' + 26) & 0x3f); - } else if (c >= '0' && c <= '9') { - value = (value << 6) | ((c - '0' + 52) & 0x3f); - } else if (c == '+') { - value = (value << 6) | 0x3e; - } else if (c == '/') { - value = (value << 6) | 0x3f; - } else if (c == '=') { - value <<= 6; - if (cut < 2) { - cut++; - } + /* + * Load the character into the block value. Handle ='s specially + * because they're only valid as the last character or two of the + * final block of input. Unless strict mode is enabled, skip any + * input whitespace characters. + */ + + if (cut) { + if (c == '=' && i > 1) { + value <<= 6; + cut++; + } else if (!strict && isspace(c)) { + i--; } else { - if (strict || !isspace(c)) { - goto bad64; - } - i--; - continue; + goto bad64; } - } else { + } else if (c >= 'A' && c <= 'Z') { + value = (value << 6) | ((c - 'A') & 0x3f); + } else if (c >= 'a' && c <= 'z') { + value = (value << 6) | ((c - 'a' + 26) & 0x3f); + } else if (c >= '0' && c <= '9') { + value = (value << 6) | ((c - '0' + 52) & 0x3f); + } else if (c == '+') { + value = (value << 6) | 0x3e; + } else if (c == '/') { + value = (value << 6) | 0x3f; + } else if (c == '=') { value <<= 6; cut++; + } else if (strict || !isspace(c)) { + goto bad64; + } else { + i--; } } *cursor++ = UCHAR((value >> 16) & 0xff); *cursor++ = UCHAR((value >> 8) & 0xff); *cursor++ = UCHAR(value & 0xff); - } - if (cut > size) { - cut = size; + + /* + * Since = is only valid within the final block, if it was encountered + * but there are still more input characters, confirm that strict mode + * is off and all subsequent characters are whitespace. + */ + + if (cut && data < dataend) { + if (strict) { + goto bad64; + } + for (; data < dataend; data++) { + if (!isspace(*data)) { + goto bad64; + } + } + } } Tcl_SetByteArrayLength(resultObj, cursor - begin - cut); Tcl_SetObjResult(interp, resultObj); diff --git a/tests/binary.test b/tests/binary.test index 6c00508..ccd0f29 100644 --- a/tests/binary.test +++ b/tests/binary.test @@ -2642,6 +2642,27 @@ test binary-73.23 {binary decode base64} -body { test binary-73.24 {binary decode base64} -body { string length [binary decode base64 " "] } -result 0 +test binary-73.25 {binary decode base64} -body { + list [string length [set r [binary decode base64 WA==\n]]] $r +} -result {1 X} +test binary-73.26 {binary decode base64} -body { + list [string length [set r [binary decode base64 WFk=\n]]] $r +} -result {2 XY} +test binary-73.27 {binary decode base64} -body { + list [string length [set r [binary decode base64 WFla\n]]] $r +} -result {3 XYZ} +test binary-73.28 {binary decode base64} -body { + list [string length [set r [binary decode base64 -strict WA==\n]]] $r +} -returnCodes error -match glob -result {invalid base64 character *} +test binary-73.29 {binary decode base64} -body { + list [string length [set r [binary decode base64 -strict WFk=\n]]] $r +} -returnCodes error -match glob -result {invalid base64 character *} +test binary-73.30 {binary decode base64} -body { + list [string length [set r [binary decode base64 -strict WFla\n]]] $r +} -returnCodes error -match glob -result {invalid base64 character *} +test binary-73.31 {binary decode base64} -body { + list [string length [set r [binary decode base64 WA==WFla]]] $r +} -returnCodes error -match glob -result {invalid base64 character *} test binary-74.1 {binary encode uuencode} -body { binary encode uuencode |