[Bug 3033307]: fix [binary decode base64] whitespace handling with '=' suffixes

author: dkf <donal.k.fellows@manchester.ac.uk> 2012-11-20 12:11:57 (GMT)
committer: dkf <donal.k.fellows@manchester.ac.uk> 2012-11-20 12:11:57 (GMT)
commit: ce8b0d55e3f2faa869ba2727c5b6362ead7d5472 (patch)
tree: 22785c6b7a87eb10e84af4007289ea5016c23818
parent: 094f23c172acca8f32b0888cd536f01fc1daab1b (diff)
parent: 2742530e9d84e0347a415ae41cc5057a80a23d35 (diff)
download: tcl-ce8b0d55e3f2faa869ba2727c5b6362ead7d5472.zip
tcl-ce8b0d55e3f2faa869ba2727c5b6362ead7d5472.tar.gz
tcl-ce8b0d55e3f2faa869ba2727c5b6362ead7d5472.tar.bz2
3 files changed, 96 insertions, 27 deletions
diff --git a/ChangeLog b/ChangeLog
index 70234e4..9b4772c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2012-11-20  Donal K. Fellows  <dkf@users.sf.net>
+
+	* generic/tclBinary.c (BinaryDecode64): [Bug 3033307]: Corrected
+	handling of trailing whitespace when decoding base64. Thanks to Anton
+	Kovalenko for reporting, and Andy Goth for the fix and tests.
+
 2012-11-19  Donal K. Fellows  <dkf@users.sf.net>
 
 	* generic/tclExecute.c (INST_STR_RANGE_IMM): [Bug 3588366]: Corrected
diff --git a/generic/tclBinary.c b/generic/tclBinary.c
index 3d8b24c..5c33308 100644
--- a/generic/tclBinary.c
+++ b/generic/tclBinary.c
@@ -2658,12 +2658,12 @@ BinaryDecode64(
     Tcl_Obj *const objv[])
 {
     Tcl_Obj *resultObj = NULL;
-    unsigned char *data, *datastart, *dataend, c;
+    unsigned char *data, *datastart, *dataend, c = '\0';
     unsigned char *begin = NULL;
     unsigned char *cursor = NULL;
     int strict = 0;
     int i, index, size, cut = 0, count = 0;
-    enum {OPT_STRICT };
+    enum { OPT_STRICT };
     static const char *const optStrings[] = { "-strict", NULL };
 
     if (objc < 2 || objc > 3) {
@@ -2691,43 +2691,85 @@ BinaryDecode64(
     while (data < dataend) {
 	unsigned long value = 0;
 
-	for (i=0 ; i<4 ; i++) {
+	/*
+	 * Decode the current block. Each base64 block consists of four input
+	 * characters A-Z, a-z, 0-9, +, or /. Each character supplies six bits
+	 * of output data, so each block's output is 24 bits (three bytes) in
+	 * length. The final block can be shorter by one or two bytes, denoted
+	 * by the input ending with one or two ='s, respectively.
+	 */
+
+	for (i = 0; i < 4; i++) {
+	    /*
+	     * Get the next input character. At end of input, pad with at most
+	     * two ='s. If more than two ='s would be needed, instead discard
+	     * the block read thus far.
+	     */
+
 	    if (data < dataend) {
 		c = *data++;
+	    } else if (i > 1) {
+		c = '=';
+	    } else {
+		cut += 3;
+		break;
+	    }
 
-		if (c >= 'A' && c <= 'Z') {
-		    value = (value << 6) | ((c - 'A') & 0x3f);
-		} else if (c >= 'a' && c <= 'z') {
-		    value = (value << 6) | ((c - 'a' + 26) & 0x3f);
-		} else if (c >= '0' && c <= '9') {
-		    value = (value << 6) | ((c - '0' + 52) & 0x3f);
-		} else if (c == '+') {
-		    value = (value << 6) | 0x3e;
-		} else if (c == '/') {
-		    value = (value << 6) | 0x3f;
-		} else if (c == '=') {
-		    value <<= 6;
-		    if (cut < 2) {
-			cut++;
-		    }
+	    /*
+	     * Load the character into the block value. Handle ='s specially
+	     * because they're only valid as the last character or two of the
+	     * final block of input. Unless strict mode is enabled, skip any
+	     * input whitespace characters.
+	     */
+
+	    if (cut) {
+		if (c == '=' && i > 1) {
+		     value <<= 6;
+		     cut++;
+		} else if (!strict && isspace(c)) {
+		     i--;
 		} else {
-		    if (strict || !isspace(c)) {
-			goto bad64;
-		    }
-		    i--;
-		    continue;
+		    goto bad64;
 		}
-	    } else {
+	    } else if (c >= 'A' && c <= 'Z') {
+		value = (value << 6) | ((c - 'A') & 0x3f);
+	    } else if (c >= 'a' && c <= 'z') {
+		value = (value << 6) | ((c - 'a' + 26) & 0x3f);
+	    } else if (c >= '0' && c <= '9') {
+		value = (value << 6) | ((c - '0' + 52) & 0x3f);
+	    } else if (c == '+') {
+		value = (value << 6) | 0x3e;
+	    } else if (c == '/') {
+		value = (value << 6) | 0x3f;
+	    } else if (c == '=') {
 		value <<= 6;
 		cut++;
+	    } else if (strict || !isspace(c)) {
+		goto bad64;
+	    } else {
+		i--;
 	    }
 	}
 	*cursor++ = UCHAR((value >> 16) & 0xff);
 	*cursor++ = UCHAR((value >> 8) & 0xff);
 	*cursor++ = UCHAR(value & 0xff);
-    }
-    if (cut > size) {
-	cut = size;
+
+	/*
+	 * Since = is only valid within the final block, if it was encountered
+	 * but there are still more input characters, confirm that strict mode
+	 * is off and all subsequent characters are whitespace.
+	 */
+
+	if (cut && data < dataend) {
+	    if (strict) {
+		goto bad64;
+	    }
+	    for (; data < dataend; data++) {
+		if (!isspace(*data)) {
+		    goto bad64;
+		}
+	    }
+	}
     }
     Tcl_SetByteArrayLength(resultObj, cursor - begin - cut);
     Tcl_SetObjResult(interp, resultObj);
diff --git a/tests/binary.test b/tests/binary.test
index 6c00508..ccd0f29 100644
--- a/tests/binary.test
+++ b/tests/binary.test
@@ -2642,6 +2642,27 @@ test binary-73.23 {binary decode base64} -body {
 test binary-73.24 {binary decode base64} -body {
     string length [binary decode base64 " "]
 } -result 0
+test binary-73.25 {binary decode base64} -body {
+    list [string length [set r [binary decode base64 WA==\n]]] $r
+} -result {1 X}
+test binary-73.26 {binary decode base64} -body {
+    list [string length [set r [binary decode base64 WFk=\n]]] $r
+} -result {2 XY}
+test binary-73.27 {binary decode base64} -body {
+    list [string length [set r [binary decode base64 WFla\n]]] $r
+} -result {3 XYZ}
+test binary-73.28 {binary decode base64} -body {
+    list [string length [set r [binary decode base64 -strict WA==\n]]] $r
+} -returnCodes error -match glob -result {invalid base64 character *}
+test binary-73.29 {binary decode base64} -body {
+    list [string length [set r [binary decode base64 -strict WFk=\n]]] $r
+} -returnCodes error -match glob -result {invalid base64 character *}
+test binary-73.30 {binary decode base64} -body {
+    list [string length [set r [binary decode base64 -strict WFla\n]]] $r
+} -returnCodes error -match glob -result {invalid base64 character *}
+test binary-73.31 {binary decode base64} -body {
+    list [string length [set r [binary decode base64 WA==WFla]]] $r
+} -returnCodes error -match glob -result {invalid base64 character *}
 
 test binary-74.1 {binary encode uuencode} -body {
     binary encode uuencode
author	dkf <donal.k.fellows@manchester.ac.uk>	2012-11-20 12:11:57 (GMT)
committer	dkf <donal.k.fellows@manchester.ac.uk>	2012-11-20 12:11:57 (GMT)
commit	ce8b0d55e3f2faa869ba2727c5b6362ead7d5472 (patch)
tree	22785c6b7a87eb10e84af4007289ea5016c23818
parent	094f23c172acca8f32b0888cd536f01fc1daab1b (diff)
parent	2742530e9d84e0347a415ae41cc5057a80a23d35 (diff)
download	tcl-ce8b0d55e3f2faa869ba2727c5b6362ead7d5472.zip tcl-ce8b0d55e3f2faa869ba2727c5b6362ead7d5472.tar.gz tcl-ce8b0d55e3f2faa869ba2727c5b6362ead7d5472.tar.bz2