summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2012-11-20 12:11:57 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2012-11-20 12:11:57 (GMT)
commitce8b0d55e3f2faa869ba2727c5b6362ead7d5472 (patch)
tree22785c6b7a87eb10e84af4007289ea5016c23818
parent094f23c172acca8f32b0888cd536f01fc1daab1b (diff)
parent2742530e9d84e0347a415ae41cc5057a80a23d35 (diff)
downloadtcl-ce8b0d55e3f2faa869ba2727c5b6362ead7d5472.zip
tcl-ce8b0d55e3f2faa869ba2727c5b6362ead7d5472.tar.gz
tcl-ce8b0d55e3f2faa869ba2727c5b6362ead7d5472.tar.bz2
[Bug 3033307]: fix [binary decode base64] whitespace handling with '=' suffixes
-rw-r--r--ChangeLog6
-rw-r--r--generic/tclBinary.c96
-rw-r--r--tests/binary.test21
3 files changed, 96 insertions, 27 deletions
diff --git a/ChangeLog b/ChangeLog
index 70234e4..9b4772c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2012-11-20 Donal K. Fellows <dkf@users.sf.net>
+
+ * generic/tclBinary.c (BinaryDecode64): [Bug 3033307]: Corrected
+ handling of trailing whitespace when decoding base64. Thanks to Anton
+ Kovalenko for reporting, and Andy Goth for the fix and tests.
+
2012-11-19 Donal K. Fellows <dkf@users.sf.net>
* generic/tclExecute.c (INST_STR_RANGE_IMM): [Bug 3588366]: Corrected
diff --git a/generic/tclBinary.c b/generic/tclBinary.c
index 3d8b24c..5c33308 100644
--- a/generic/tclBinary.c
+++ b/generic/tclBinary.c
@@ -2658,12 +2658,12 @@ BinaryDecode64(
Tcl_Obj *const objv[])
{
Tcl_Obj *resultObj = NULL;
- unsigned char *data, *datastart, *dataend, c;
+ unsigned char *data, *datastart, *dataend, c = '\0';
unsigned char *begin = NULL;
unsigned char *cursor = NULL;
int strict = 0;
int i, index, size, cut = 0, count = 0;
- enum {OPT_STRICT };
+ enum { OPT_STRICT };
static const char *const optStrings[] = { "-strict", NULL };
if (objc < 2 || objc > 3) {
@@ -2691,43 +2691,85 @@ BinaryDecode64(
while (data < dataend) {
unsigned long value = 0;
- for (i=0 ; i<4 ; i++) {
+ /*
+ * Decode the current block. Each base64 block consists of four input
+ * characters A-Z, a-z, 0-9, +, or /. Each character supplies six bits
+ * of output data, so each block's output is 24 bits (three bytes) in
+ * length. The final block can be shorter by one or two bytes, denoted
+ * by the input ending with one or two ='s, respectively.
+ */
+
+ for (i = 0; i < 4; i++) {
+ /*
+ * Get the next input character. At end of input, pad with at most
+ * two ='s. If more than two ='s would be needed, instead discard
+ * the block read thus far.
+ */
+
if (data < dataend) {
c = *data++;
+ } else if (i > 1) {
+ c = '=';
+ } else {
+ cut += 3;
+ break;
+ }
- if (c >= 'A' && c <= 'Z') {
- value = (value << 6) | ((c - 'A') & 0x3f);
- } else if (c >= 'a' && c <= 'z') {
- value = (value << 6) | ((c - 'a' + 26) & 0x3f);
- } else if (c >= '0' && c <= '9') {
- value = (value << 6) | ((c - '0' + 52) & 0x3f);
- } else if (c == '+') {
- value = (value << 6) | 0x3e;
- } else if (c == '/') {
- value = (value << 6) | 0x3f;
- } else if (c == '=') {
- value <<= 6;
- if (cut < 2) {
- cut++;
- }
+ /*
+ * Load the character into the block value. Handle ='s specially
+ * because they're only valid as the last character or two of the
+ * final block of input. Unless strict mode is enabled, skip any
+ * input whitespace characters.
+ */
+
+ if (cut) {
+ if (c == '=' && i > 1) {
+ value <<= 6;
+ cut++;
+ } else if (!strict && isspace(c)) {
+ i--;
} else {
- if (strict || !isspace(c)) {
- goto bad64;
- }
- i--;
- continue;
+ goto bad64;
}
- } else {
+ } else if (c >= 'A' && c <= 'Z') {
+ value = (value << 6) | ((c - 'A') & 0x3f);
+ } else if (c >= 'a' && c <= 'z') {
+ value = (value << 6) | ((c - 'a' + 26) & 0x3f);
+ } else if (c >= '0' && c <= '9') {
+ value = (value << 6) | ((c - '0' + 52) & 0x3f);
+ } else if (c == '+') {
+ value = (value << 6) | 0x3e;
+ } else if (c == '/') {
+ value = (value << 6) | 0x3f;
+ } else if (c == '=') {
value <<= 6;
cut++;
+ } else if (strict || !isspace(c)) {
+ goto bad64;
+ } else {
+ i--;
}
}
*cursor++ = UCHAR((value >> 16) & 0xff);
*cursor++ = UCHAR((value >> 8) & 0xff);
*cursor++ = UCHAR(value & 0xff);
- }
- if (cut > size) {
- cut = size;
+
+ /*
+ * Since = is only valid within the final block, if it was encountered
+ * but there are still more input characters, confirm that strict mode
+ * is off and all subsequent characters are whitespace.
+ */
+
+ if (cut && data < dataend) {
+ if (strict) {
+ goto bad64;
+ }
+ for (; data < dataend; data++) {
+ if (!isspace(*data)) {
+ goto bad64;
+ }
+ }
+ }
}
Tcl_SetByteArrayLength(resultObj, cursor - begin - cut);
Tcl_SetObjResult(interp, resultObj);
diff --git a/tests/binary.test b/tests/binary.test
index 6c00508..ccd0f29 100644
--- a/tests/binary.test
+++ b/tests/binary.test
@@ -2642,6 +2642,27 @@ test binary-73.23 {binary decode base64} -body {
test binary-73.24 {binary decode base64} -body {
string length [binary decode base64 " "]
} -result 0
+test binary-73.25 {binary decode base64} -body {
+ list [string length [set r [binary decode base64 WA==\n]]] $r
+} -result {1 X}
+test binary-73.26 {binary decode base64} -body {
+ list [string length [set r [binary decode base64 WFk=\n]]] $r
+} -result {2 XY}
+test binary-73.27 {binary decode base64} -body {
+ list [string length [set r [binary decode base64 WFla\n]]] $r
+} -result {3 XYZ}
+test binary-73.28 {binary decode base64} -body {
+ list [string length [set r [binary decode base64 -strict WA==\n]]] $r
+} -returnCodes error -match glob -result {invalid base64 character *}
+test binary-73.29 {binary decode base64} -body {
+ list [string length [set r [binary decode base64 -strict WFk=\n]]] $r
+} -returnCodes error -match glob -result {invalid base64 character *}
+test binary-73.30 {binary decode base64} -body {
+ list [string length [set r [binary decode base64 -strict WFla\n]]] $r
+} -returnCodes error -match glob -result {invalid base64 character *}
+test binary-73.31 {binary decode base64} -body {
+ list [string length [set r [binary decode base64 WA==WFla]]] $r
+} -returnCodes error -match glob -result {invalid base64 character *}
test binary-74.1 {binary encode uuencode} -body {
binary encode uuencode