diff options
author | dgp <dgp@users.sourceforge.net> | 2020-04-02 22:46:39 (GMT) |
---|---|---|
committer | dgp <dgp@users.sourceforge.net> | 2020-04-02 22:46:39 (GMT) |
commit | 8cc9a5a6c472395264ef81e6a3c3994a42442280 (patch) | |
tree | 69ac8b72218b7920997029ef40f28ee1716fccca | |
parent | f426afea86185f404c7193538a853bafbad1e7c9 (diff) | |
download | tcl-8cc9a5a6c472395264ef81e6a3c3994a42442280.zip tcl-8cc9a5a6c472395264ef81e6a3c3994a42442280.tar.gz tcl-8cc9a5a6c472395264ef81e6a3c3994a42442280.tar.bz2 |
Use new utility routine so that error characters using surrogates are
reported correctly.
-rw-r--r-- | generic/tclBinary.c | 36 | ||||
-rw-r--r-- | tests/binary.test | 14 |
2 files changed, 25 insertions, 25 deletions
diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 6b70fde..2119043 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -470,16 +470,16 @@ TclGetBytesFromObj( if (irPtr == NULL) { if (interp) { const char *nonbyte; - Tcl_UniChar ch; + int ucs4; irPtr = TclFetchIntRep(objPtr, &tclByteArrayType); baPtr = GET_BYTEARRAY(irPtr); nonbyte = Tcl_UtfAtIndex(Tcl_GetString(objPtr), baPtr->bad); - Tcl_UtfToUniChar(nonbyte, &ch); + TclUtfToUCS4(nonbyte, &ucs4); Tcl_SetObjResult(interp, Tcl_ObjPrintf( "expected byte sequence but character %d " - "was '%1s' (U+%04X)", baPtr->bad, nonbyte, ch)); + "was '%1s' (U+%06X)", baPtr->bad, nonbyte, ucs4)); Tcl_SetErrorCode(interp, "TCL", "VALUE", "BYTES", NULL); } return NULL; @@ -2595,7 +2595,7 @@ BinaryDecodeHex( unsigned char *data, *datastart, *dataend; unsigned char *begin, *cursor, c; int i, index, value, size, pure = 1, count = 0, cut = 0, strict = 0; - Tcl_UniChar ch = 0; + int ucs4; enum {OPT_STRICT }; static const char *const optStrings[] = { "-strict", NULL }; @@ -2667,14 +2667,14 @@ BinaryDecodeHex( badChar: if (pure) { - ch = c; + ucs4 = c; } else { - TclUtfToUniChar((const char *)(data - 1), &ch); + TclUtfToUCS4((const char *)(data - 1), &ucs4); } TclDecrRefCount(resultObj); Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "invalid hexadecimal digit \"%c\" at position %d", - ch, (int) (data - datastart - 1))); + "invalid hexadecimal digit \"%c\" (U+%06X) at position %d", + ucs4, ucs4, (int) (data - datastart - 1))); Tcl_SetErrorCode(interp, "TCL", "BINARY", "DECODE", "INVALID", NULL); return TCL_ERROR; } @@ -2990,7 +2990,7 @@ BinaryDecodeUu( unsigned char *begin, *cursor; int i, index, size, pure = 1, count = 0, strict = 0, lineLen; unsigned char c; - Tcl_UniChar ch = 0; + int ucs4; enum { OPT_STRICT }; static const char *const optStrings[] = { "-strict", NULL }; @@ -3123,13 +3123,13 @@ BinaryDecodeUu( badUu: if (pure) { - ch = c; + ucs4 = c; } else { - TclUtfToUniChar((const char *)(data - 1), &ch); + TclUtfToUCS4((const char *)(data - 1), &ucs4); } Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "invalid uuencode character \"%c\" at position %d", - ch, (int) (data - datastart - 1))); + "invalid uuencode character \"%c\" (U+%06X) at position %d", + ucs4, ucs4, (int) (data - datastart - 1))); Tcl_SetErrorCode(interp, "TCL", "BINARY", "DECODE", "INVALID", NULL); TclDecrRefCount(resultObj); return TCL_ERROR; @@ -3164,7 +3164,7 @@ BinaryDecode64( unsigned char *cursor = NULL; int pure = 1, strict = 0; int i, index, size, cut = 0, count = 0; - Tcl_UniChar ch = 0; + int ucs4; enum { OPT_STRICT }; static const char *const optStrings[] = { "-strict", NULL }; @@ -3292,19 +3292,19 @@ BinaryDecode64( bad64: if (pure) { - ch = c; + ucs4 = c; } else { /* The decoder is byte-oriented. If we saw a byte that's not a * valid member of the base64 alphabet, it could be the lead byte * of a multi-byte character. */ /* Safe because we know data is NUL-terminated */ - TclUtfToUniChar((const char *)(data - 1), &ch); + TclUtfToUCS4((const char *)(data - 1), &ucs4); } Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "invalid base64 character \"%c\" at position %d", ch, - (int) (data - datastart - 1))); + "invalid base64 character \"%c\" (U+%06X) at position %d", + ucs4, ucs4, (int) (data - datastart - 1))); Tcl_SetErrorCode(interp, "TCL", "BINARY", "DECODE", "INVALID", NULL); TclDecrRefCount(resultObj); return TCL_ERROR; diff --git a/tests/binary.test b/tests/binary.test index cd3f642..a777b2a 100644 --- a/tests/binary.test +++ b/tests/binary.test @@ -2510,9 +2510,9 @@ test binary-71.6 {binary decode hex} -body { test binary-71.7 {binary decode hex} -body { binary decode hex "61\n\n\n61" } -result {aa} -test binary-71.8 {binary decode hex} -body { +test binary-71.8 {binary decode hex} -match glob -body { binary decode hex -strict "61 61" -} -returnCodes error -result {invalid hexadecimal digit " " at position 2} +} -returnCodes error -result {invalid hexadecimal digit " " * at position 2} test binary-71.9 {binary decode hex} -body { set r [binary decode hex "6"] list [string length $r] $r @@ -2674,11 +2674,11 @@ test binary-73.11 {binary decode base64} -body { } -result [string repeat abc 20] test binary-73.12 {binary decode base64} -body { binary decode base64 -strict ":YWJj" -} -returnCodes error -match glob -result {invalid base64 character ":" at position 0} +} -returnCodes error -match glob -result {invalid base64 character ":" * at position 0} test binary-73.13 {binary decode base64} -body { set s "[string repeat YWJj 10]:[string repeat YWJj 10]" binary decode base64 -strict $s -} -returnCodes error -match glob -result {invalid base64 character ":" at position 40} +} -returnCodes error -match glob -result {invalid base64 character ":" * at position 40} test binary-73.14 {binary decode base64} -body { set s "[string repeat YWJj 10]\n [string repeat YWJj 10]" binary decode base64 -strict $s @@ -2855,11 +2855,11 @@ test binary-75.11 {binary decode uuencode} -body { } -result [string repeat abc 20] test binary-75.12 {binary decode uuencode} -body { binary decode uuencode -strict "|86)C" -} -returnCodes error -match glob -result {invalid uuencode character "|" at position 0} +} -returnCodes error -match glob -result {invalid uuencode character "|" * at position 0} test binary-75.13 {binary decode uuencode} -body { set s ">[string repeat 86)C 10]|[string repeat 86)C 10]" binary decode uuencode -strict $s -} -returnCodes error -match glob -result {invalid uuencode character "|" at position 41} +} -returnCodes error -match glob -result {invalid uuencode character "|" * at position 41} test binary-75.14 {binary decode uuencode} -body { set s ">[string repeat 86)C 10]\na[string repeat 86)C 10]" binary decode uuencode -strict $s @@ -2887,7 +2887,7 @@ test binary-75.24 {binary decode uuencode} -body { test binary-75.25 {binary decode uuencode} -body { set s "#04)\#z" binary decode uuencode $s -} -returnCodes error -match glob -result {invalid uuencode character "z" at position 5} +} -returnCodes error -match glob -result {invalid uuencode character "z" * at position 5} test binary-75.26 {binary decode uuencode} -body { string length [binary decode uuencode " "] } -result 0 |