summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog8
-rw-r--r--doc/binary.n14
-rw-r--r--tests/binary.test24
3 files changed, 43 insertions, 3 deletions
diff --git a/ChangeLog b/ChangeLog
index 77ff66d..e6e44ae 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2003-07-11 Donal K. Fellows <fellowsd@cs.man.ac.uk>
+
+ * tests/binary.test (binary-46.*): Tests to help enforce the
+ current behaviour.
+ * doc/binary.n: Documented that [binary format a] and [binary scan a]
+ do encoding conversion by dropping high bytes, unlike the rest of
+ the core. [Bug 735364]
+
2003-07-11 Don Porter <dgp@users.sourceforge.net>
* library/package.tcl: Corrected [pkg_mkIndex] bug reported on
diff --git a/doc/binary.n b/doc/binary.n
index 997963a..80a6460 100644
--- a/doc/binary.n
+++ b/doc/binary.n
@@ -4,7 +4,7 @@
'\" See the file "license.terms" for information on usage and redistribution
'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
'\"
-'\" RCS: @(#) $Id: binary.n,v 1.11.2.2 2003/04/10 08:28:15 dkf Exp $
+'\" RCS: @(#) $Id: binary.n,v 1.11.2.3 2003/07/11 21:18:55 dkf Exp $
'\"
.so man.macros
.TH binary n 8.0 Tcl "Tcl Built-In Commands"
@@ -65,6 +65,12 @@ position 0 at the beginning of the data. The type may be any one of
the following characters:
.IP \fBa\fR 5
Stores a character string of length \fIcount\fR in the output string.
+Every character is taken as modulo 256 (i.e. the low byte of every
+character is used, and the high byte discarded) so when storing
+character strings not wholly expressible using the characters
+\bu0000-\bu00ff, the \fBencoding convertto\fR command should be used
+first if this truncation is not desired (i.e. if the characters are
+not part of the ISO 8859-1 character set.)
If \fIarg\fR has fewer than \fIcount\fR bytes, then additional zero
bytes are used to pad out the field. If \fIarg\fR is longer than the
specified length, the extra characters will be ignored. If
@@ -383,7 +389,11 @@ the following characters:
The data is a character string of length \fIcount\fR. If \fIcount\fR
is \fB*\fR, then all of the remaining bytes in \fIstring\fR will be
scanned into the variable. If \fIcount\fR is omitted, then one
-character will be scanned. For example,
+character will be scanned.
+All characters scanned will be interpreted as being in the range
+\bu0000-\bu00ff so the \fBencoding convertfrom\fR command might be
+needed if the string is not an ISO 8859\-1 string.
+For example,
.RS
.CS
\fBbinary scan abcde\\000fghi a6a10 var1 var2\fR
diff --git a/tests/binary.test b/tests/binary.test
index 967fcbc..fcc6df6 100644
--- a/tests/binary.test
+++ b/tests/binary.test
@@ -10,7 +10,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: binary.test,v 1.11 2003/02/21 21:54:11 dkf Exp $
+# RCS: @(#) $Id: binary.test,v 1.11.2.1 2003/07/11 21:18:55 dkf Exp $
if {[lsearch [namespace children] ::tcltest] == -1} {
package require tcltest
@@ -1494,6 +1494,28 @@ test binary-45.2 {Tcl_BinaryObjCmd: combined wide int handling} {
set x
} {66 64 0 0 0 0 127 -1 -1 -1 65 76}
+test binary-46.1 {Tcl_BinaryObjCmd: handling of non-ISO8859-1 chars} {
+ binary format a* \u20ac
+} \u00ac
+test binary-46.2 {Tcl_BinaryObjCmd: handling of non-ISO8859-1 chars} {
+ list [binary scan [binary format a* \u20ac\u20bd] s x] $x
+} {1 -16980}
+test binary-46.3 {Tcl_BinaryObjCmd: handling of non-ISO8859-1 chars} {
+ set x {}
+ set y {}
+ set z {}
+ list [binary scan [binary format a* \u20ac\u20bd] aaa x y z] $x $y $z
+} "2 \u00ac \u00bd {}"
+test binary-46.4 {Tcl_BinaryObjCmd: handling of non-ISO8859-1 chars} {
+ set x [encoding convertto iso8859-15 \u20ac]
+ set y [binary format a* $x]
+ list $x $y
+} "\u00a4 \u00a4"
+test binary-46.5 {Tcl_BinaryObjCmd: handling of non-ISO8859-1 chars} {
+ set x [binary scan \u00a4 a* y]
+ list $x $y [encoding convertfrom iso8859-15 $y]
+} "1 \u00a4 \u20ac"
+
# cleanup
::tcltest::cleanupTests
return