summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog17
-rw-r--r--generic/tclIO.c15
-rw-r--r--tests/encoding.test92
3 files changed, 100 insertions, 24 deletions
diff --git a/ChangeLog b/ChangeLog
index b357462..be0ce13 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,10 +1,25 @@
+2002-03-01 Jeff Hobbs <jeffh@ActiveState.com>
+
+ * library/encoding/iso2022-jp.enc:
+ * library/encoding/iso2022.enc:
+ * tools/encoding/iso2022-jp.esc:
+ * tools/encoding/iso2022.esc: gave <ESC>$B precedence over <ESC>$@,
+ based on comments (point 1) in [Bug #219283] (rfc 1468)
+
+ * tests/encoding.test: added encoding-23.* tests
+ * generic/tclIO.c (FilterInputBytes): reset the TCL_ENCODING_START
+ flags in the ChannelState when using 'gets'. [Bug #523988]
+ Also reduced the value of ENCODING_LINESIZE from 30 to 20 as this
+ seems to improve the performance of 'gets' according to tclbench.
+
2002-02-28 Jeff Hobbs <jeffh@ActiveState.com>
* generic/tclCmdMZ.c (TraceCommandProc): ensure that TraceCommandInfo
structure was also deleted when a command was deleted to prevent a
mem leak.
- * generic/tclBasic.c (Tcl_CreateObjTrace): set tracePtr->flags correctly.
+ * generic/tclBasic.c (Tcl_CreateObjTrace): set tracePtr->flags
+ correctly.
* generic/tclTimer.c (TimerExitProc): remove remaining events in
tls on thread exit.
diff --git a/generic/tclIO.c b/generic/tclIO.c
index df8a8c2..cefab6c 100644
--- a/generic/tclIO.c
+++ b/generic/tclIO.c
@@ -10,7 +10,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclIO.c,v 1.52 2002/02/15 14:28:49 dkf Exp $
+ * RCS: @(#) $Id: tclIO.c,v 1.53 2002/03/02 04:55:31 hobbs Exp $
*/
#include "tclInt.h"
@@ -3777,7 +3777,7 @@ FilterInputBytes(chanPtr, gsPtr)
char *dst;
int offset, toRead, dstNeeded, spaceLeft, result, rawLen, length;
Tcl_Obj *objPtr;
-#define ENCODING_LINESIZE 30 /* Lower bound on how many bytes to convert
+#define ENCODING_LINESIZE 20 /* Lower bound on how many bytes to convert
* at a time. Since we don't know a priori
* how many bytes of storage this many source
* bytes will use, we actually need at least
@@ -3806,7 +3806,7 @@ FilterInputBytes(chanPtr, gsPtr)
* seen EOL. Need to read more bytes from the channel device.
* Side effect is to allocate another channel buffer.
*/
-
+
read:
if (statePtr->flags & CHANNEL_BLOCKED) {
if (statePtr->flags & CHANNEL_NONBLOCKING) {
@@ -3859,7 +3859,14 @@ FilterInputBytes(chanPtr, gsPtr)
result = Tcl_ExternalToUtf(NULL, gsPtr->encoding, raw, rawLen,
statePtr->inputEncodingFlags, &statePtr->inputEncodingState,
dst, spaceLeft, &gsPtr->rawRead, &gsPtr->bytesWrote,
- &gsPtr->charsWrote);
+ &gsPtr->charsWrote);
+
+ /*
+ * Make sure that if we go through 'gets', that we reset the
+ * TCL_ENCODING_START flag still. [Bug #523988]
+ */
+ statePtr->inputEncodingFlags &= ~TCL_ENCODING_START;
+
if (result == TCL_CONVERT_MULTIBYTE) {
/*
* The last few bytes in this channel buffer were the start of a
diff --git a/tests/encoding.test b/tests/encoding.test
index 21c53b0..bfe7e3c 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -8,7 +8,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: encoding.test,v 1.8 2000/04/10 17:18:58 ericm Exp $
+# RCS: @(#) $Id: encoding.test,v 1.9 2002/03/02 04:55:31 hobbs Exp $
if {[lsearch [namespace children] ::tcltest] == -1} {
package require tcltest
@@ -196,6 +196,18 @@ test encoding-10.1 {Tcl_UtfToExternal} {
set x
} "ab\x8c\xc1g"
+proc viewable {str} {
+ set res ""
+ foreach c [split $str {}] {
+ if {[string is print $c]} {
+ append res $c
+ } else {
+ append res "\\u[format %4.4x [scan $c %c]]"
+ }
+ }
+ return "$str ($res)"
+}
+
test encoding-11.1 {LoadEncodingFile: unknown encoding} {testencoding} {
set system [encoding system]
set path [testencoding path]
@@ -216,8 +228,11 @@ test encoding-11.4 {LoadEncodingFile: multi-byte} {
encoding convertfrom shiftjis \x8c\xc1
} "\u4e4e"
test encoding-11.5 {LoadEncodingFile: escape file} {
- encoding convertto iso2022 \u4e4e
-} "\x1b(B\x1b$@8C"
+ viewable [encoding convertto iso2022 \u4e4e]
+} [viewable "\x1b(B\x1b\$B8C"]
+test encoding-11.5.1 {LoadEncodingFile: escape file} {
+ viewable [encoding convertto iso2022-jp \u4e4e]
+} [viewable "\x1b(B\x1b\$B8C"]
test encoding-11.6 {LoadEncodingFile: invalid file} {testencoding} {
set system [encoding system]
set path [testencoding path]
@@ -262,8 +277,8 @@ test encoding-12.5 {LoadTableEncoding: symbol encoding} {
} "\x67\x67\u3b3"
test encoding-13.1 {LoadEscapeTable} {
- set x [encoding convertto iso2022 ab\u4e4e\u68d9g]
-} "\x1b(Bab\x1b$@8C\x1b$\(DD%\x1b(Bg"
+ viewable [set x [encoding convertto iso2022 ab\u4e4e\u68d9g]]
+} [viewable "\x1b(Bab\x1b\$B8C\x1b\$\(DD%\x1b(Bg"]
test encoding-14.1 {BinaryProc} {
encoding convertto identity \x12\x34\x56\xff\x69
@@ -295,23 +310,62 @@ test encoding-21.1 {EscapeToUtfProc} {
test encoding-22.1 {EscapeFromUtfProc} {
} {}
+set ::iso2022encData "\u001b\$B;d\$I\$b\$G\$O!\"%A%C%W\$49XF~;~\$K\$4EPO?\$\$\$?\$@\$\$\$?\$4=;=j\$r%-%c%C%7%e%\"%&%H\$N:]\$N\u001b(B
+\u001b\$B>.@Z<jAwIU@h\$H\$7\$F;HMQ\$7\$F\$*\$j\$^\$9!#62\$lF~\$j\$^\$9\$,!\"@5\$7\$\$=;=j\$r\$4EPO?\$7\$J\$*\u001b(B
+\u001b\$B\$*4j\$\$\$\$\$?\$7\$^\$9!#\$^\$?!\"BgJQ62=L\$G\$9\$,!\"=;=jJQ99\$N\$\"\$H!\"F|K\\8l%5!<%S%9It!J\u001b(B
+casino_japanese@___.com \u001b\$B!K\$^\$G\$4=;=jJQ99:Q\$NO\"Mm\$r\$\$\$?\$@\$1\$J\$\$\$G\u001b(B
+\u001b\$B\$7\$g\$&\$+!)\u001b(B"
+
+set ::iso2022uniData [encoding convertfrom iso2022-jp $::iso2022encData]
+set ::iso2022uniData2 "\u79c1\u3069\u3082\u3067\u306f\u3001\u30c1\u30c3\u30d7\u3054\u8cfc\u5165\u6642\u306b\u3054\u767b\u9332\u3044\u305f\u3060\u3044\u305f\u3054\u4f4f\u6240\u3092\u30ad\u30e3\u30c3\u30b7\u30e5\u30a2\u30a6\u30c8\u306e\u969b\u306e
+\u5c0f\u5207\u624b\u9001\u4ed8\u5148\u3068\u3057\u3066\u4f7f\u7528\u3057\u3066\u304a\u308a\u307e\u3059\u3002\u6050\u308c\u5165\u308a\u307e\u3059\u304c\u3001\u6b63\u3057\u3044\u4f4f\u6240\u3092\u3054\u767b\u9332\u3057\u306a\u304a
+\u304a\u9858\u3044\u3044\u305f\u3057\u307e\u3059\u3002\u307e\u305f\u3001\u5927\u5909\u6050\u7e2e\u3067\u3059\u304c\u3001\u4f4f\u6240\u5909\u66f4\u306e\u3042\u3068\u3001\u65e5\u672c\u8a9e\u30b5\u30fc\u30d3\u30b9\u90e8\uff08
+\u0063\u0061\u0073\u0069\u006e\u006f\u005f\u006a\u0061\u0070\u0061\u006e\u0065\u0073\u0065\u0040\u005f\u005f\u005f\u002e\u0063\u006f\u006d\u0020\uff09\u307e\u3067\u3054\u4f4f\u6240\u5909\u66f4\u6e08\u306e\u9023\u7d61\u3092\u3044\u305f\u3060\u3051\u306a\u3044\u3067
+\u3057\u3087\u3046\u304b\uff1f"
+
+set fid [open iso2022.txt w]
+fconfigure $fid -encoding binary
+puts -nonewline $fid $::iso2022encData
+close $fid
+
+test encoding-23.2 {iso2022-jp escape encoding test} {
+ string equal $::iso2022uniData $::iso2022uniData2
+} 1
+test encoding-23.2 {iso2022-jp escape encoding test} {
+ # This checks that 'gets' isn't resetting the encoding inappropriately.
+ # [Bug #523988]
+ set fid [open iso2022.txt r]
+ fconfigure $fid -encoding iso2022-jp
+ set out ""
+ set count 0
+ while {[set num [gets $fid line]] >= 0} {
+ if {$count} {
+ incr count 1 ; # account for newline
+ append out \n
+ }
+ append out $line
+ incr count $num
+ }
+ close $fid
+ if {[string compare $::iso2022uniData $out]} {
+ return -code error "iso2022-jp read in doesn't match original"
+ }
+ list $count $out
+} [list [string length $::iso2022uniData] $::iso2022uniData]
+test encoding-23.3 {iso2022-jp escape encoding test} {
+ # read $fis <size> reads size in chars, not raw bytes.
+ set fid [open iso2022.txt r]
+ fconfigure $fid -encoding iso2022-jp
+ set data [read $fid 50]
+ close $fid
+ set data
+} [string range $::iso2022uniData 0 49] ; # 0 .. 49 inclusive == 50
+
+::tcltest::removeFile iso2022.txt
+
# EscapeFreeProc, GetTableEncoding, unilen
# are fully tested by the rest of this file
# cleanup
::tcltest::cleanupTests
return
-
-
-
-
-
-
-
-
-
-
-
-
-
-