From 0f1f636a50239c2409172a34da5fa1b1f45bf9c4 Mon Sep 17 00:00:00 2001 From: dkf Date: Sat, 14 Sep 2013 18:25:45 +0000 Subject: And the decoder too. --- doc/binary.n | 2 +- generic/tclBinary.c | 104 +++++++++++++++++++++++++++++++++++++++++++--------- tests/binary.test | 41 +++++++++++---------- 3 files changed, 109 insertions(+), 38 deletions(-) diff --git a/doc/binary.n b/doc/binary.n index 95be36e..b301f4d 100644 --- a/doc/binary.n +++ b/doc/binary.n @@ -107,7 +107,7 @@ During decoding, the following options are supported: .TP \fB\-strict\fR . -Instructs the decoder to throw an error if it encounters whitespace +Instructs the decoder to throw an error if it encounters unexpected whitespace characters. Otherwise it ignores them. .PP Note that neither the encoder nor the decoder handle the header and footer of diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 71da309..c0dd926 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -2690,8 +2690,8 @@ BinaryDecodeUu( Tcl_Obj *resultObj = NULL; unsigned char *data, *datastart, *dataend; unsigned char *begin, *cursor; - int i, index, size, count = 0, cut = 0, strict = 0; - char c; + int i, index, size, count = 0, strict = 0, lineLen; + unsigned char c; enum {OPT_STRICT }; static const char *const optStrings[] = { "-strict", NULL }; @@ -2717,44 +2717,112 @@ BinaryDecodeUu( dataend = data + count; size = ((count + 3) & ~3) * 3 / 4; begin = cursor = Tcl_SetByteArrayLength(resultObj, size); + lineLen = -1; + + /* + * The decoding loop. First, we get the length of line (strictly, the + * number of data bytes we expect to generate from the line) we're + * processing this time round if it is not already known (i.e., when the + * lineLen variable is set to the magic value, -1). + */ + while (data < dataend) { char d[4] = {0, 0, 0, 0}; + if (lineLen < 0) { + c = *data++; + if (c < 32 || c > 96) { + if (strict || !isspace(c)) { + goto badUu; + } + i--; + continue; + } + lineLen = (c - 32) & 0x3f; + } + + /* + * Now we read a four-character grouping. + */ + for (i=0 ; i<4 ; i++) { if (data < dataend) { d[i] = c = *data++; - if (c < 33 || c > 96) { - if (strict || !isspace(UCHAR(c))) { - goto badUu; + if (c < 32 || c > 96) { + if (strict) { + if (!isspace(c)) { + goto badUu; + } else if (c == '\n') { + goto shortUu; + } } i--; continue; } - } else { - cut++; } } - if (cut > 3) { - cut = 3; + + /* + * Translate that grouping into (up to) three binary bytes output. + */ + + if (lineLen > 0) { + *cursor++ = (((d[0] - 0x20) & 0x3f) << 2) + | (((d[1] - 0x20) & 0x3f) >> 4); + if (--lineLen > 0) { + *cursor++ = (((d[1] - 0x20) & 0x3f) << 4) + | (((d[2] - 0x20) & 0x3f) >> 2); + if (--lineLen > 0) { + *cursor++ = (((d[2] - 0x20) & 0x3f) << 6) + | (((d[3] - 0x20) & 0x3f)); + lineLen--; + } + } + } + + /* + * If we've reached the end of the line, skip until we process a + * newline. + */ + + if (lineLen == 0 && data < dataend) { + lineLen = -1; + do { + c = *data++; + if (c == '\n') { + break; + } else if (c >= 32 && c <= 96) { + data--; + break; + } else if (strict || !isspace(c)) { + goto badUu; + } + } while (data < dataend); } - *cursor++ = (((d[0] - 0x20) & 0x3f) << 2) - | (((d[1] - 0x20) & 0x3f) >> 4); - *cursor++ = (((d[1] - 0x20) & 0x3f) << 4) - | (((d[2] - 0x20) & 0x3f) >> 2); - *cursor++ = (((d[2] - 0x20) & 0x3f) << 6) - | (((d[3] - 0x20) & 0x3f)); } - if (cut > size) { - cut = size; + + /* + * Sanity check, clean up and finish. + */ + + if (lineLen > 0 && strict) { + goto shortUu; } - Tcl_SetByteArrayLength(resultObj, cursor - begin - cut); + Tcl_SetByteArrayLength(resultObj, cursor - begin); Tcl_SetObjResult(interp, resultObj); return TCL_OK; + shortUu: + Tcl_SetObjResult(interp, Tcl_ObjPrintf("short uuencode data")); + Tcl_SetErrorCode(interp, "TCL", "BINARY", "DECODE", "SHORT", NULL); + TclDecrRefCount(resultObj); + return TCL_ERROR; + badUu: Tcl_SetObjResult(interp, Tcl_ObjPrintf( "invalid uuencode character \"%c\" at position %d", c, (int) (data - datastart - 1))); + Tcl_SetErrorCode(interp, "TCL", "BINARY", "DECODE", "INVALID", NULL); TclDecrRefCount(resultObj); return TCL_ERROR; } diff --git a/tests/binary.test b/tests/binary.test index 4b71b77..607a070 100644 --- a/tests/binary.test +++ b/tests/binary.test @@ -2755,72 +2755,75 @@ test binary-75.1 {binary decode uuencode} -body { binary decode uuencode } -returnCodes error -match glob -result "wrong # args: *" test binary-75.2 {binary decode uuencode} -body { - binary decode uuencode 86)C + binary decode uuencode "#86)C\n" } -result {abc} test binary-75.3 {binary decode uuencode} -body { binary decode uuencode {} } -result {} +test binary-75.3.1 {binary decode uuencode} -body { + binary decode uuencode `\n +} -result {} test binary-75.4 {binary decode uuencode} -body { - binary decode uuencode [string repeat "86)C" 20] + binary decode uuencode "M[string repeat 86)C 15]\n/[string repeat 86)C 5]\n" } -result [string repeat abc 20] test binary-75.5 {binary decode uuencode} -body { - binary decode uuencode "``\$\"`P0``0(#" + binary decode uuencode ")``\$\"`P0``0(#" } -result "\0\1\2\3\4\0\1\2\3" test binary-75.6 {binary decode uuencode} -body { - string length [binary decode uuencode {`}] + string length [binary decode uuencode "`\n"] } -result 0 test binary-75.7 {binary decode uuencode} -body { - string length [binary decode uuencode {``}] + string length [binary decode uuencode "!`\n"] } -result 1 test binary-75.8 {binary decode uuencode} -body { - string length [binary decode uuencode {```}] + string length [binary decode uuencode "\"``\n"] } -result 2 test binary-75.9 {binary decode uuencode} -body { - string length [binary decode uuencode {````}] + string length [binary decode uuencode "#```\n"] } -result 3 test binary-75.10 {binary decode uuencode} -body { - set s "[string repeat 86)C 10]\n[string repeat 86)C 10]" + set s ">[string repeat 86)C 10]\n>[string repeat 86)C 10]" binary decode uuencode $s } -result [string repeat abc 20] test binary-75.11 {binary decode uuencode} -body { - set s "[string repeat 86)C 10]\n [string repeat 86)C 10]" + set s ">[string repeat 86)C 10]\n\t>\t[string repeat 86)C 10]\r" binary decode uuencode $s } -result [string repeat abc 20] test binary-75.12 {binary decode uuencode} -body { binary decode uuencode -strict "|86)C" } -returnCodes error -match glob -result {invalid uuencode character "|" at position 0} test binary-75.13 {binary decode uuencode} -body { - set s "[string repeat 86)C 10]|[string repeat 86)C 10]" + set s ">[string repeat 86)C 10]|[string repeat 86)C 10]" binary decode uuencode -strict $s -} -returnCodes error -match glob -result {invalid uuencode character "|" at position 40} +} -returnCodes error -match glob -result {invalid uuencode character "|" at position 41} test binary-75.14 {binary decode uuencode} -body { - set s "[string repeat 86)C 10]\n [string repeat 86)C 10]" + set s ">[string repeat 86)C 10]\na[string repeat 86)C 10]" binary decode uuencode -strict $s } -returnCodes error -match glob -result {invalid uuencode character *} test binary-75.20 {binary decode uuencode} -body { - set r [binary decode uuencode 8] + set r [binary decode uuencode " 8"] list [string length $r] $r } -result {0 {}} test binary-75.21 {binary decode uuencode} -body { - set r [binary decode uuencode 86] + set r [binary decode uuencode "!86"] list [string length $r] $r } -result {1 a} test binary-75.22 {binary decode uuencode} -body { - set r [binary decode uuencode 86)] + set r [binary decode uuencode "\"86)"] list [string length $r] $r } -result {2 ab} test binary-75.23 {binary decode uuencode} -body { - set r [binary decode uuencode 86)C] + set r [binary decode uuencode "#86)C"] list [string length $r] $r } -result {3 abc} test binary-75.24 {binary decode uuencode} -body { - set s "04)\# " + set s "#04)\# " binary decode uuencode $s } -result ABC test binary-75.25 {binary decode uuencode} -body { - set s "04)\#z" + set s "#04)\#z" binary decode uuencode $s -} -returnCodes error -match glob -result {invalid uuencode character "z" at position 4} +} -returnCodes error -match glob -result {invalid uuencode character "z" at position 5} test binary-75.26 {binary decode uuencode} -body { string length [binary decode uuencode " "] } -result 0 -- cgit v0.12