summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2013-09-14 18:25:45 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2013-09-14 18:25:45 (GMT)
commit0f1f636a50239c2409172a34da5fa1b1f45bf9c4 (patch)
tree409912fdc7eb0211ad6a6d3ae95417a0cf357006
parent92eea63a7df5071fddef9b89b9122a5761670908 (diff)
downloadtcl-0f1f636a50239c2409172a34da5fa1b1f45bf9c4.zip
tcl-0f1f636a50239c2409172a34da5fa1b1f45bf9c4.tar.gz
tcl-0f1f636a50239c2409172a34da5fa1b1f45bf9c4.tar.bz2
And the decoder too.
-rw-r--r--doc/binary.n2
-rw-r--r--generic/tclBinary.c104
-rw-r--r--tests/binary.test41
3 files changed, 109 insertions, 38 deletions
diff --git a/doc/binary.n b/doc/binary.n
index 95be36e..b301f4d 100644
--- a/doc/binary.n
+++ b/doc/binary.n
@@ -107,7 +107,7 @@ During decoding, the following options are supported:
.TP
\fB\-strict\fR
.
-Instructs the decoder to throw an error if it encounters whitespace
+Instructs the decoder to throw an error if it encounters unexpected whitespace
characters. Otherwise it ignores them.
.PP
Note that neither the encoder nor the decoder handle the header and footer of
diff --git a/generic/tclBinary.c b/generic/tclBinary.c
index 71da309..c0dd926 100644
--- a/generic/tclBinary.c
+++ b/generic/tclBinary.c
@@ -2690,8 +2690,8 @@ BinaryDecodeUu(
Tcl_Obj *resultObj = NULL;
unsigned char *data, *datastart, *dataend;
unsigned char *begin, *cursor;
- int i, index, size, count = 0, cut = 0, strict = 0;
- char c;
+ int i, index, size, count = 0, strict = 0, lineLen;
+ unsigned char c;
enum {OPT_STRICT };
static const char *const optStrings[] = { "-strict", NULL };
@@ -2717,44 +2717,112 @@ BinaryDecodeUu(
dataend = data + count;
size = ((count + 3) & ~3) * 3 / 4;
begin = cursor = Tcl_SetByteArrayLength(resultObj, size);
+ lineLen = -1;
+
+ /*
+ * The decoding loop. First, we get the length of line (strictly, the
+ * number of data bytes we expect to generate from the line) we're
+ * processing this time round if it is not already known (i.e., when the
+ * lineLen variable is set to the magic value, -1).
+ */
+
while (data < dataend) {
char d[4] = {0, 0, 0, 0};
+ if (lineLen < 0) {
+ c = *data++;
+ if (c < 32 || c > 96) {
+ if (strict || !isspace(c)) {
+ goto badUu;
+ }
+ i--;
+ continue;
+ }
+ lineLen = (c - 32) & 0x3f;
+ }
+
+ /*
+ * Now we read a four-character grouping.
+ */
+
for (i=0 ; i<4 ; i++) {
if (data < dataend) {
d[i] = c = *data++;
- if (c < 33 || c > 96) {
- if (strict || !isspace(UCHAR(c))) {
- goto badUu;
+ if (c < 32 || c > 96) {
+ if (strict) {
+ if (!isspace(c)) {
+ goto badUu;
+ } else if (c == '\n') {
+ goto shortUu;
+ }
}
i--;
continue;
}
- } else {
- cut++;
}
}
- if (cut > 3) {
- cut = 3;
+
+ /*
+ * Translate that grouping into (up to) three binary bytes output.
+ */
+
+ if (lineLen > 0) {
+ *cursor++ = (((d[0] - 0x20) & 0x3f) << 2)
+ | (((d[1] - 0x20) & 0x3f) >> 4);
+ if (--lineLen > 0) {
+ *cursor++ = (((d[1] - 0x20) & 0x3f) << 4)
+ | (((d[2] - 0x20) & 0x3f) >> 2);
+ if (--lineLen > 0) {
+ *cursor++ = (((d[2] - 0x20) & 0x3f) << 6)
+ | (((d[3] - 0x20) & 0x3f));
+ lineLen--;
+ }
+ }
+ }
+
+ /*
+ * If we've reached the end of the line, skip until we process a
+ * newline.
+ */
+
+ if (lineLen == 0 && data < dataend) {
+ lineLen = -1;
+ do {
+ c = *data++;
+ if (c == '\n') {
+ break;
+ } else if (c >= 32 && c <= 96) {
+ data--;
+ break;
+ } else if (strict || !isspace(c)) {
+ goto badUu;
+ }
+ } while (data < dataend);
}
- *cursor++ = (((d[0] - 0x20) & 0x3f) << 2)
- | (((d[1] - 0x20) & 0x3f) >> 4);
- *cursor++ = (((d[1] - 0x20) & 0x3f) << 4)
- | (((d[2] - 0x20) & 0x3f) >> 2);
- *cursor++ = (((d[2] - 0x20) & 0x3f) << 6)
- | (((d[3] - 0x20) & 0x3f));
}
- if (cut > size) {
- cut = size;
+
+ /*
+ * Sanity check, clean up and finish.
+ */
+
+ if (lineLen > 0 && strict) {
+ goto shortUu;
}
- Tcl_SetByteArrayLength(resultObj, cursor - begin - cut);
+ Tcl_SetByteArrayLength(resultObj, cursor - begin);
Tcl_SetObjResult(interp, resultObj);
return TCL_OK;
+ shortUu:
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf("short uuencode data"));
+ Tcl_SetErrorCode(interp, "TCL", "BINARY", "DECODE", "SHORT", NULL);
+ TclDecrRefCount(resultObj);
+ return TCL_ERROR;
+
badUu:
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
"invalid uuencode character \"%c\" at position %d",
c, (int) (data - datastart - 1)));
+ Tcl_SetErrorCode(interp, "TCL", "BINARY", "DECODE", "INVALID", NULL);
TclDecrRefCount(resultObj);
return TCL_ERROR;
}
diff --git a/tests/binary.test b/tests/binary.test
index 4b71b77..607a070 100644
--- a/tests/binary.test
+++ b/tests/binary.test
@@ -2755,72 +2755,75 @@ test binary-75.1 {binary decode uuencode} -body {
binary decode uuencode
} -returnCodes error -match glob -result "wrong # args: *"
test binary-75.2 {binary decode uuencode} -body {
- binary decode uuencode 86)C
+ binary decode uuencode "#86)C\n"
} -result {abc}
test binary-75.3 {binary decode uuencode} -body {
binary decode uuencode {}
} -result {}
+test binary-75.3.1 {binary decode uuencode} -body {
+ binary decode uuencode `\n
+} -result {}
test binary-75.4 {binary decode uuencode} -body {
- binary decode uuencode [string repeat "86)C" 20]
+ binary decode uuencode "M[string repeat 86)C 15]\n/[string repeat 86)C 5]\n"
} -result [string repeat abc 20]
test binary-75.5 {binary decode uuencode} -body {
- binary decode uuencode "``\$\"`P0``0(#"
+ binary decode uuencode ")``\$\"`P0``0(#"
} -result "\0\1\2\3\4\0\1\2\3"
test binary-75.6 {binary decode uuencode} -body {
- string length [binary decode uuencode {`}]
+ string length [binary decode uuencode "`\n"]
} -result 0
test binary-75.7 {binary decode uuencode} -body {
- string length [binary decode uuencode {``}]
+ string length [binary decode uuencode "!`\n"]
} -result 1
test binary-75.8 {binary decode uuencode} -body {
- string length [binary decode uuencode {```}]
+ string length [binary decode uuencode "\"``\n"]
} -result 2
test binary-75.9 {binary decode uuencode} -body {
- string length [binary decode uuencode {````}]
+ string length [binary decode uuencode "#```\n"]
} -result 3
test binary-75.10 {binary decode uuencode} -body {
- set s "[string repeat 86)C 10]\n[string repeat 86)C 10]"
+ set s ">[string repeat 86)C 10]\n>[string repeat 86)C 10]"
binary decode uuencode $s
} -result [string repeat abc 20]
test binary-75.11 {binary decode uuencode} -body {
- set s "[string repeat 86)C 10]\n [string repeat 86)C 10]"
+ set s ">[string repeat 86)C 10]\n\t>\t[string repeat 86)C 10]\r"
binary decode uuencode $s
} -result [string repeat abc 20]
test binary-75.12 {binary decode uuencode} -body {
binary decode uuencode -strict "|86)C"
} -returnCodes error -match glob -result {invalid uuencode character "|" at position 0}
test binary-75.13 {binary decode uuencode} -body {
- set s "[string repeat 86)C 10]|[string repeat 86)C 10]"
+ set s ">[string repeat 86)C 10]|[string repeat 86)C 10]"
binary decode uuencode -strict $s
-} -returnCodes error -match glob -result {invalid uuencode character "|" at position 40}
+} -returnCodes error -match glob -result {invalid uuencode character "|" at position 41}
test binary-75.14 {binary decode uuencode} -body {
- set s "[string repeat 86)C 10]\n [string repeat 86)C 10]"
+ set s ">[string repeat 86)C 10]\na[string repeat 86)C 10]"
binary decode uuencode -strict $s
} -returnCodes error -match glob -result {invalid uuencode character *}
test binary-75.20 {binary decode uuencode} -body {
- set r [binary decode uuencode 8]
+ set r [binary decode uuencode " 8"]
list [string length $r] $r
} -result {0 {}}
test binary-75.21 {binary decode uuencode} -body {
- set r [binary decode uuencode 86]
+ set r [binary decode uuencode "!86"]
list [string length $r] $r
} -result {1 a}
test binary-75.22 {binary decode uuencode} -body {
- set r [binary decode uuencode 86)]
+ set r [binary decode uuencode "\"86)"]
list [string length $r] $r
} -result {2 ab}
test binary-75.23 {binary decode uuencode} -body {
- set r [binary decode uuencode 86)C]
+ set r [binary decode uuencode "#86)C"]
list [string length $r] $r
} -result {3 abc}
test binary-75.24 {binary decode uuencode} -body {
- set s "04)\# "
+ set s "#04)\# "
binary decode uuencode $s
} -result ABC
test binary-75.25 {binary decode uuencode} -body {
- set s "04)\#z"
+ set s "#04)\#z"
binary decode uuencode $s
-} -returnCodes error -match glob -result {invalid uuencode character "z" at position 4}
+} -returnCodes error -match glob -result {invalid uuencode character "z" at position 5}
test binary-75.26 {binary decode uuencode} -body {
string length [binary decode uuencode " "]
} -result 0