summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorhobbs <hobbs>2003-02-21 02:40:57 (GMT)
committerhobbs <hobbs>2003-02-21 02:40:57 (GMT)
commitcc0e6436fbea0546291e1b2cbed4d3f8cecda514 (patch)
treef82ec344f5ec77eeefc3b3366748601a8ac9ee6b
parent2fad92d8ea334f30a6fbe1b10359e70d6370f626 (diff)
downloadtcl-cc0e6436fbea0546291e1b2cbed4d3f8cecda514.zip
tcl-cc0e6436fbea0546291e1b2cbed4d3f8cecda514.tar.gz
tcl-cc0e6436fbea0546291e1b2cbed4d3f8cecda514.tar.bz2
* generic/tclEncoding.c (LoadTableEncoding):
* library/encoding/cp932.enc: Correct jis round-trip encoding * library/encoding/euc-jp.enc: by adding 'R' type to .enc files. * library/encoding/iso2022-jp.enc: [Patch #689341] (koboyasi, taguchi) * library/encoding/jis0208.enc: * library/encoding/shiftjis.enc: * tests/encoding.test:
-rw-r--r--ChangeLog15
-rw-r--r--generic/tclEncoding.c47
-rw-r--r--library/encoding/cp932.enc16
-rw-r--r--library/encoding/euc-jp.enc7
-rw-r--r--library/encoding/iso2022-jp.enc2
-rw-r--r--library/encoding/jis0208.enc7
-rw-r--r--library/encoding/shiftjis.enc7
-rw-r--r--tests/encoding.test132
8 files changed, 230 insertions, 3 deletions
diff --git a/ChangeLog b/ChangeLog
index 90532e7..f9612aa 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2003-02-20 Jeff Hobbs <jeffh@ActiveState.com>
+
+ * generic/tclEncoding.c (LoadTableEncoding):
+ * library/encoding/cp932.enc: Correct jis round-trip encoding
+ * library/encoding/euc-jp.enc: by adding 'R' type to .enc files.
+ * library/encoding/iso2022-jp.enc: [Patch #689341] (koboyasi, taguchi)
+ * library/encoding/jis0208.enc:
+ * library/encoding/shiftjis.enc:
+ * tests/encoding.test:
+
+ * unix/tclUnixChan.c (Tcl_MakeTcpClientChannel): add
+ MakeTcpClientChannelMode that takes actual mode flags to avoid
+ hang on OS X (may be OS X bug, but patch works x-plat).
+ [Bug #689835] (steffen)
+
2003-02-20 Donal K. Fellows <fellowsd@cs.man.ac.uk>
* doc/regsub.n: Typo fix [Bug #688943]
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 93505c3..576a479 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclEncoding.c,v 1.15 2002/11/27 02:53:40 hobbs Exp $
+ * RCS: @(#) $Id: tclEncoding.c,v 1.16 2003/02/21 02:40:58 hobbs Exp $
*/
#include "tclInt.h"
@@ -1535,6 +1535,48 @@ LoadTableEncoding(interp, name, type, chan)
dataPtr->fromUnicode[hi] = emptyPage;
}
}
+ /*
+ * For trailing 'R'everse encoding, see [Patch #689341]
+ */
+ Tcl_DStringInit(&lineString);
+ do {
+ int len;
+ /* skip leading empty lines */
+ while ((len = Tcl_Gets(chan, &lineString)) == 0)
+ ;
+ if (len < 0) {
+ break;
+ }
+ line = Tcl_DStringValue(&lineString);
+ if (line[0] != 'R') {
+ break;
+ }
+ for (Tcl_DStringSetLength(&lineString, 0);
+ (len = Tcl_Gets(chan, &lineString)) >= 0;
+ Tcl_DStringSetLength(&lineString, 0)) {
+ unsigned char* p;
+ int to, from;
+ if (len < 5) {
+ continue;
+ }
+ p = (unsigned char*) Tcl_DStringValue(&lineString);
+ to = (staticHex[p[0]] << 12) + (staticHex[p[1]] << 8)
+ + (staticHex[p[2]] << 4) + staticHex[p[3]];
+ if (to == 0) {
+ continue;
+ }
+ for (p += 5, len -= 5; len >= 0 && *p; p += 5, len -= 5) {
+ from = (staticHex[p[0]] << 12) + (staticHex[p[1]] << 8)
+ + (staticHex[p[2]] << 4) + staticHex[p[3]];
+ if (from == 0) {
+ continue;
+ }
+ dataPtr->fromUnicode[from >> 8][from & 0xff] = to;
+ }
+ }
+ } while (0);
+ Tcl_DStringFree(&lineString);
+
encType.encodingName = name;
encType.toUtfProc = TableToUtfProc;
encType.fromUtfProc = TableFromUtfProc;
@@ -1615,6 +1657,9 @@ LoadEscapeEncoding(name, chan)
strncpy(est.name, argv[0], sizeof(est.name));
est.name[sizeof(est.name) - 1] = '\0';
+ /* To avoid infinite recursion in [encoding system iso2022-*]*/
+ Tcl_GetEncoding(NULL, est.name);
+
est.encodingPtr = NULL;
Tcl_DStringAppend(&escapeData, (char *) &est, sizeof(est));
}
diff --git a/library/encoding/cp932.enc b/library/encoding/cp932.enc
index 027f7d8..8da8cd6 100644
--- a/library/encoding/cp932.enc
+++ b/library/encoding/cp932.enc
@@ -783,3 +783,19 @@ FC
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
+R
+8160 301C FF5E
+8161 2016 2225
+817C 2212 FF0D
+8191 00A2 FFE0
+8192 00A3 FFE1
+81CA 00AC FFE2
+81BE 222a
+81BF 2229
+81DA 2220
+81DB 22a5
+81DF 2261
+81E0 2252
+81E3 221a
+81E6 2235
+81E7 222b
diff --git a/library/encoding/euc-jp.enc b/library/encoding/euc-jp.enc
index 9b7abb1..db56c88 100644
--- a/library/encoding/euc-jp.enc
+++ b/library/encoding/euc-jp.enc
@@ -1344,3 +1344,10 @@ F4
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
+R
+A1C1 301C FF5E
+A1C2 2016 2225
+A1DD 2212 FF0D
+A1F1 00A2 FFE0
+A1F2 00A3 FFE1
+A2CC 00AC FFE2
diff --git a/library/encoding/iso2022-jp.enc b/library/encoding/iso2022-jp.enc
index 6f43d7c..f6dabe5 100644
--- a/library/encoding/iso2022-jp.enc
+++ b/library/encoding/iso2022-jp.enc
@@ -3,7 +3,7 @@ E
name iso2022-jp
init {}
final {}
-iso8859-1 \x1b(B
+ascii \x1b(B
jis0201 \x1b(J
jis0208 \x1b$B
jis0208 \x1b$@
diff --git a/library/encoding/jis0208.enc b/library/encoding/jis0208.enc
index 7102e88..8460b69 100644
--- a/library/encoding/jis0208.enc
+++ b/library/encoding/jis0208.enc
@@ -1310,3 +1310,10 @@ FF50FF51FF52FF53FF54FF55FF56FF57FF58FF59FF5A00000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
+R
+2141 301C FF5E
+2142 2016 2225
+215D 2212 FF0D
+2171 00A2 FFE0
+2172 00A3 FFE1
+224C 00AC FFE2
diff --git a/library/encoding/shiftjis.enc b/library/encoding/shiftjis.enc
index c8d2504..140aec4 100644
--- a/library/encoding/shiftjis.enc
+++ b/library/encoding/shiftjis.enc
@@ -681,3 +681,10 @@ EA
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
+R
+8160 301C FF5E
+8161 2016 2225
+817C 2212 FF0D
+8191 00A2 FFE0
+8192 00A3 FFE1
+81CA 00AC FFE2
diff --git a/tests/encoding.test b/tests/encoding.test
index a0a76ce..2ea4463 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -8,7 +8,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: encoding.test,v 1.15 2002/07/10 11:56:44 dgp Exp $
+# RCS: @(#) $Id: encoding.test,v 1.16 2003/02/21 02:40:58 hobbs Exp $
package require tcltest 2
namespace import -force ::tcltest::*
@@ -414,6 +414,136 @@ test encoding-24.3 {EscapeFreeProc on open channels} {stdio} {
file delete [file join [temporaryDirectory] iso2022.txt]
+#
+# Begin jajp encoding round-trip conformity tests
+#
+proc foreach-jisx0208 {varName command} {
+ upvar 1 $varName code
+ foreach range {
+ {2121 217E}
+ {2221 222E}
+ {223A 2241}
+ {224A 2250}
+ {225C 226A}
+ {2272 2279}
+ {227E 227E}
+ {2330 2339}
+ {2421 2473}
+ {2521 2576}
+ {2821 2821}
+ {282C 282C}
+ {2837 2837}
+
+ {30 21 4E 7E}
+ {4F21 4F53}
+
+ {50 21 73 7E}
+ {7421 7426}
+ } {
+ if {[llength $range] == 2} {
+ # for adhoc range. simple {first last}. inclusive.
+ set first [scan [lindex $range 0] %x]
+ set last [scan [lindex $range 1] %x]
+ for {set i $first} {$i <= $last} {incr i} {
+ set code $i
+ uplevel 1 $command
+ }
+ } elseif {[llength $range] == 4} {
+ # for uniform range.
+ set h0 [scan [lindex $range 0] %x]
+ set l0 [scan [lindex $range 1] %x]
+ set hend [scan [lindex $range 2] %x]
+ set lend [scan [lindex $range 3] %x]
+ for {set hi $h0} {$hi <= $hend} {incr hi} {
+ for {set lo $l0} {$lo <= $lend} {incr lo} {
+ set code [expr {$hi << 8 | ($lo & 0xff)}]
+ uplevel 1 $command
+ }
+ }
+ } else {
+ error "really?"
+ }
+ }
+}
+proc gen-jisx0208-euc-jp {code} {
+ binary format cc \
+ [expr {($code >> 8) | 0x80}] [expr {($code & 0xff) | 0x80}]
+}
+proc gen-jisx0208-iso2022-jp {code} {
+ binary format a3cca3 \
+ "\x1b\$B" [expr {$code >> 8}] [expr {$code & 0xff}] "\x1b(B"
+}
+proc gen-jisx0208-cp932 {code} {
+ set c1 [expr {($code >> 8) | 0x80}]
+ set c2 [expr {($code & 0xff)| 0x80}]
+ if {$c1 % 2} {
+ set c1 [expr {($c1 >> 1) + ($c1 < 0xdf ? 0x31 : 0x71)}]
+ incr c2 [expr {- (0x60 + ($c2 < 0xe0))}]
+ } else {
+ set c1 [expr {($c1 >> 1) + ($c1 < 0xdf ? 0x30 : 0x70)}]
+ incr c2 -2
+ }
+ binary format cc $c1 $c2
+}
+proc channel-diff {fa fb} {
+ set diff {}
+ while {[gets $fa la] >= 0 && [gets $fb lb] >= 0} {
+ if {[string compare $la $lb] == 0} continue
+ # lappend diff $la $lb
+
+ # For more readable (easy to analyze) output.
+ set code [lindex $la 0]
+ binary scan [lindex $la 1] H* expected
+ binary scan [lindex $lb 1] H* got
+ lappend diff [list $code $expected $got]
+ }
+ set diff
+}
+
+# Create char tables.
+cd [temporaryDirectory]
+foreach enc {cp932 euc-jp iso2022-jp} {
+ set f [open $enc.chars w]
+ fconfigure $f -encoding binary
+ foreach-jisx0208 code {
+ puts $f [format "%04X %s" $code [gen-jisx0208-$enc $code]]
+ }
+ close $f
+}
+# shiftjis == cp932 for jisx0208.
+file copy -force cp932.chars shiftjis.chars
+
+set NUM 0
+foreach from {cp932 shiftjis euc-jp iso2022-jp} {
+ foreach to {cp932 shiftjis euc-jp iso2022-jp} {
+ test encoding-25.[incr NUM] "jisx0208 $from => $to" {
+ cd [temporaryDirectory]
+ set f [open $from.chars]
+ fconfigure $f -encoding $from
+ set out [open $from.$to.out w]
+ fconfigure $out -encoding $to
+ puts -nonewline $out [read $f]
+ close $out
+ close $f
+
+ # then compare $to.chars <=> $from.to.out as binary.
+ set fa [open $to.chars]
+ fconfigure $fa -encoding binary
+ set fb [open $from.$to.out]
+ fconfigure $fb -encoding binary
+ set diff [channel-diff $fa $fb]
+ close $fa
+ close $fb
+
+ # Difference should be empty.
+ set diff
+ } {}
+ }
+}
+
+eval [list file delete] [glob -directory [temporaryDirectory] *.chars *.out]
+# ===> Cut here <===
+
# EscapeFreeProc, GetTableEncoding, unilen
# are fully tested by the rest of this file