From c188371e379c3b139e3ddc924121ad3c64860c7e Mon Sep 17 00:00:00 2001 From: dkf Date: Fri, 11 Jul 2003 21:18:55 +0000 Subject: Documented and tested for the current behaviour of [binary format a] and [binary scan ? a]. This is what they've been doing all along. [Bug 735364] --- ChangeLog | 8 ++++++++ doc/binary.n | 14 ++++++++++++-- tests/binary.test | 24 +++++++++++++++++++++++- 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 77ff66d..e6e44ae 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2003-07-11 Donal K. Fellows + + * tests/binary.test (binary-46.*): Tests to help enforce the + current behaviour. + * doc/binary.n: Documented that [binary format a] and [binary scan a] + do encoding conversion by dropping high bytes, unlike the rest of + the core. [Bug 735364] + 2003-07-11 Don Porter * library/package.tcl: Corrected [pkg_mkIndex] bug reported on diff --git a/doc/binary.n b/doc/binary.n index 997963a..80a6460 100644 --- a/doc/binary.n +++ b/doc/binary.n @@ -4,7 +4,7 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" RCS: @(#) $Id: binary.n,v 1.11.2.2 2003/04/10 08:28:15 dkf Exp $ +'\" RCS: @(#) $Id: binary.n,v 1.11.2.3 2003/07/11 21:18:55 dkf Exp $ '\" .so man.macros .TH binary n 8.0 Tcl "Tcl Built-In Commands" @@ -65,6 +65,12 @@ position 0 at the beginning of the data. The type may be any one of the following characters: .IP \fBa\fR 5 Stores a character string of length \fIcount\fR in the output string. +Every character is taken as modulo 256 (i.e. the low byte of every +character is used, and the high byte discarded) so when storing +character strings not wholly expressible using the characters +\bu0000-\bu00ff, the \fBencoding convertto\fR command should be used +first if this truncation is not desired (i.e. if the characters are +not part of the ISO 8859-1 character set.) If \fIarg\fR has fewer than \fIcount\fR bytes, then additional zero bytes are used to pad out the field. If \fIarg\fR is longer than the specified length, the extra characters will be ignored. If @@ -383,7 +389,11 @@ the following characters: The data is a character string of length \fIcount\fR. If \fIcount\fR is \fB*\fR, then all of the remaining bytes in \fIstring\fR will be scanned into the variable. If \fIcount\fR is omitted, then one -character will be scanned. For example, +character will be scanned. +All characters scanned will be interpreted as being in the range +\bu0000-\bu00ff so the \fBencoding convertfrom\fR command might be +needed if the string is not an ISO 8859\-1 string. +For example, .RS .CS \fBbinary scan abcde\\000fghi a6a10 var1 var2\fR diff --git a/tests/binary.test b/tests/binary.test index 967fcbc..fcc6df6 100644 --- a/tests/binary.test +++ b/tests/binary.test @@ -10,7 +10,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: binary.test,v 1.11 2003/02/21 21:54:11 dkf Exp $ +# RCS: @(#) $Id: binary.test,v 1.11.2.1 2003/07/11 21:18:55 dkf Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest @@ -1494,6 +1494,28 @@ test binary-45.2 {Tcl_BinaryObjCmd: combined wide int handling} { set x } {66 64 0 0 0 0 127 -1 -1 -1 65 76} +test binary-46.1 {Tcl_BinaryObjCmd: handling of non-ISO8859-1 chars} { + binary format a* \u20ac +} \u00ac +test binary-46.2 {Tcl_BinaryObjCmd: handling of non-ISO8859-1 chars} { + list [binary scan [binary format a* \u20ac\u20bd] s x] $x +} {1 -16980} +test binary-46.3 {Tcl_BinaryObjCmd: handling of non-ISO8859-1 chars} { + set x {} + set y {} + set z {} + list [binary scan [binary format a* \u20ac\u20bd] aaa x y z] $x $y $z +} "2 \u00ac \u00bd {}" +test binary-46.4 {Tcl_BinaryObjCmd: handling of non-ISO8859-1 chars} { + set x [encoding convertto iso8859-15 \u20ac] + set y [binary format a* $x] + list $x $y +} "\u00a4 \u00a4" +test binary-46.5 {Tcl_BinaryObjCmd: handling of non-ISO8859-1 chars} { + set x [binary scan \u00a4 a* y] + list $x $y [encoding convertfrom iso8859-15 $y] +} "1 \u00a4 \u20ac" + # cleanup ::tcltest::cleanupTests return -- cgit v0.12