diff options
author | hobbs <hobbs> | 2003-02-21 02:40:57 (GMT) |
---|---|---|
committer | hobbs <hobbs> | 2003-02-21 02:40:57 (GMT) |
commit | cc0e6436fbea0546291e1b2cbed4d3f8cecda514 (patch) | |
tree | f82ec344f5ec77eeefc3b3366748601a8ac9ee6b | |
parent | 2fad92d8ea334f30a6fbe1b10359e70d6370f626 (diff) | |
download | tcl-cc0e6436fbea0546291e1b2cbed4d3f8cecda514.zip tcl-cc0e6436fbea0546291e1b2cbed4d3f8cecda514.tar.gz tcl-cc0e6436fbea0546291e1b2cbed4d3f8cecda514.tar.bz2 |
* generic/tclEncoding.c (LoadTableEncoding):
* library/encoding/cp932.enc: Correct jis round-trip encoding
* library/encoding/euc-jp.enc: by adding 'R' type to .enc files.
* library/encoding/iso2022-jp.enc: [Patch #689341] (koboyasi, taguchi)
* library/encoding/jis0208.enc:
* library/encoding/shiftjis.enc:
* tests/encoding.test:
-rw-r--r-- | ChangeLog | 15 | ||||
-rw-r--r-- | generic/tclEncoding.c | 47 | ||||
-rw-r--r-- | library/encoding/cp932.enc | 16 | ||||
-rw-r--r-- | library/encoding/euc-jp.enc | 7 | ||||
-rw-r--r-- | library/encoding/iso2022-jp.enc | 2 | ||||
-rw-r--r-- | library/encoding/jis0208.enc | 7 | ||||
-rw-r--r-- | library/encoding/shiftjis.enc | 7 | ||||
-rw-r--r-- | tests/encoding.test | 132 |
8 files changed, 230 insertions, 3 deletions
@@ -1,3 +1,18 @@ +2003-02-20 Jeff Hobbs <jeffh@ActiveState.com> + + * generic/tclEncoding.c (LoadTableEncoding): + * library/encoding/cp932.enc: Correct jis round-trip encoding + * library/encoding/euc-jp.enc: by adding 'R' type to .enc files. + * library/encoding/iso2022-jp.enc: [Patch #689341] (koboyasi, taguchi) + * library/encoding/jis0208.enc: + * library/encoding/shiftjis.enc: + * tests/encoding.test: + + * unix/tclUnixChan.c (Tcl_MakeTcpClientChannel): add + MakeTcpClientChannelMode that takes actual mode flags to avoid + hang on OS X (may be OS X bug, but patch works x-plat). + [Bug #689835] (steffen) + 2003-02-20 Donal K. Fellows <fellowsd@cs.man.ac.uk> * doc/regsub.n: Typo fix [Bug #688943] diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 93505c3..576a479 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclEncoding.c,v 1.15 2002/11/27 02:53:40 hobbs Exp $ + * RCS: @(#) $Id: tclEncoding.c,v 1.16 2003/02/21 02:40:58 hobbs Exp $ */ #include "tclInt.h" @@ -1535,6 +1535,48 @@ LoadTableEncoding(interp, name, type, chan) dataPtr->fromUnicode[hi] = emptyPage; } } + /* + * For trailing 'R'everse encoding, see [Patch #689341] + */ + Tcl_DStringInit(&lineString); + do { + int len; + /* skip leading empty lines */ + while ((len = Tcl_Gets(chan, &lineString)) == 0) + ; + if (len < 0) { + break; + } + line = Tcl_DStringValue(&lineString); + if (line[0] != 'R') { + break; + } + for (Tcl_DStringSetLength(&lineString, 0); + (len = Tcl_Gets(chan, &lineString)) >= 0; + Tcl_DStringSetLength(&lineString, 0)) { + unsigned char* p; + int to, from; + if (len < 5) { + continue; + } + p = (unsigned char*) Tcl_DStringValue(&lineString); + to = (staticHex[p[0]] << 12) + (staticHex[p[1]] << 8) + + (staticHex[p[2]] << 4) + staticHex[p[3]]; + if (to == 0) { + continue; + } + for (p += 5, len -= 5; len >= 0 && *p; p += 5, len -= 5) { + from = (staticHex[p[0]] << 12) + (staticHex[p[1]] << 8) + + (staticHex[p[2]] << 4) + staticHex[p[3]]; + if (from == 0) { + continue; + } + dataPtr->fromUnicode[from >> 8][from & 0xff] = to; + } + } + } while (0); + Tcl_DStringFree(&lineString); + encType.encodingName = name; encType.toUtfProc = TableToUtfProc; encType.fromUtfProc = TableFromUtfProc; @@ -1615,6 +1657,9 @@ LoadEscapeEncoding(name, chan) strncpy(est.name, argv[0], sizeof(est.name)); est.name[sizeof(est.name) - 1] = '\0'; + /* To avoid infinite recursion in [encoding system iso2022-*]*/ + Tcl_GetEncoding(NULL, est.name); + est.encodingPtr = NULL; Tcl_DStringAppend(&escapeData, (char *) &est, sizeof(est)); } diff --git a/library/encoding/cp932.enc b/library/encoding/cp932.enc index 027f7d8..8da8cd6 100644 --- a/library/encoding/cp932.enc +++ b/library/encoding/cp932.enc @@ -783,3 +783,19 @@ FC 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 +R +8160 301C FF5E +8161 2016 2225 +817C 2212 FF0D +8191 00A2 FFE0 +8192 00A3 FFE1 +81CA 00AC FFE2 +81BE 222a +81BF 2229 +81DA 2220 +81DB 22a5 +81DF 2261 +81E0 2252 +81E3 221a +81E6 2235 +81E7 222b diff --git a/library/encoding/euc-jp.enc b/library/encoding/euc-jp.enc index 9b7abb1..db56c88 100644 --- a/library/encoding/euc-jp.enc +++ b/library/encoding/euc-jp.enc @@ -1344,3 +1344,10 @@ F4 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 +R +A1C1 301C FF5E +A1C2 2016 2225 +A1DD 2212 FF0D +A1F1 00A2 FFE0 +A1F2 00A3 FFE1 +A2CC 00AC FFE2 diff --git a/library/encoding/iso2022-jp.enc b/library/encoding/iso2022-jp.enc index 6f43d7c..f6dabe5 100644 --- a/library/encoding/iso2022-jp.enc +++ b/library/encoding/iso2022-jp.enc @@ -3,7 +3,7 @@ E name iso2022-jp init {} final {} -iso8859-1 \x1b(B +ascii \x1b(B jis0201 \x1b(J jis0208 \x1b$B jis0208 \x1b$@ diff --git a/library/encoding/jis0208.enc b/library/encoding/jis0208.enc index 7102e88..8460b69 100644 --- a/library/encoding/jis0208.enc +++ b/library/encoding/jis0208.enc @@ -1310,3 +1310,10 @@ FF50FF51FF52FF53FF54FF55FF56FF57FF58FF59FF5A00000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 +R +2141 301C FF5E +2142 2016 2225 +215D 2212 FF0D +2171 00A2 FFE0 +2172 00A3 FFE1 +224C 00AC FFE2 diff --git a/library/encoding/shiftjis.enc b/library/encoding/shiftjis.enc index c8d2504..140aec4 100644 --- a/library/encoding/shiftjis.enc +++ b/library/encoding/shiftjis.enc @@ -681,3 +681,10 @@ EA 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 +R +8160 301C FF5E +8161 2016 2225 +817C 2212 FF0D +8191 00A2 FFE0 +8192 00A3 FFE1 +81CA 00AC FFE2 diff --git a/tests/encoding.test b/tests/encoding.test index a0a76ce..2ea4463 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -8,7 +8,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: encoding.test,v 1.15 2002/07/10 11:56:44 dgp Exp $ +# RCS: @(#) $Id: encoding.test,v 1.16 2003/02/21 02:40:58 hobbs Exp $ package require tcltest 2 namespace import -force ::tcltest::* @@ -414,6 +414,136 @@ test encoding-24.3 {EscapeFreeProc on open channels} {stdio} { file delete [file join [temporaryDirectory] iso2022.txt] +# +# Begin jajp encoding round-trip conformity tests +# +proc foreach-jisx0208 {varName command} { + upvar 1 $varName code + foreach range { + {2121 217E} + {2221 222E} + {223A 2241} + {224A 2250} + {225C 226A} + {2272 2279} + {227E 227E} + {2330 2339} + {2421 2473} + {2521 2576} + {2821 2821} + {282C 282C} + {2837 2837} + + {30 21 4E 7E} + {4F21 4F53} + + {50 21 73 7E} + {7421 7426} + } { + if {[llength $range] == 2} { + # for adhoc range. simple {first last}. inclusive. + set first [scan [lindex $range 0] %x] + set last [scan [lindex $range 1] %x] + for {set i $first} {$i <= $last} {incr i} { + set code $i + uplevel 1 $command + } + } elseif {[llength $range] == 4} { + # for uniform range. + set h0 [scan [lindex $range 0] %x] + set l0 [scan [lindex $range 1] %x] + set hend [scan [lindex $range 2] %x] + set lend [scan [lindex $range 3] %x] + for {set hi $h0} {$hi <= $hend} {incr hi} { + for {set lo $l0} {$lo <= $lend} {incr lo} { + set code [expr {$hi << 8 | ($lo & 0xff)}] + uplevel 1 $command + } + } + } else { + error "really?" + } + } +} +proc gen-jisx0208-euc-jp {code} { + binary format cc \ + [expr {($code >> 8) | 0x80}] [expr {($code & 0xff) | 0x80}] +} +proc gen-jisx0208-iso2022-jp {code} { + binary format a3cca3 \ + "\x1b\$B" [expr {$code >> 8}] [expr {$code & 0xff}] "\x1b(B" +} +proc gen-jisx0208-cp932 {code} { + set c1 [expr {($code >> 8) | 0x80}] + set c2 [expr {($code & 0xff)| 0x80}] + if {$c1 % 2} { + set c1 [expr {($c1 >> 1) + ($c1 < 0xdf ? 0x31 : 0x71)}] + incr c2 [expr {- (0x60 + ($c2 < 0xe0))}] + } else { + set c1 [expr {($c1 >> 1) + ($c1 < 0xdf ? 0x30 : 0x70)}] + incr c2 -2 + } + binary format cc $c1 $c2 +} +proc channel-diff {fa fb} { + set diff {} + while {[gets $fa la] >= 0 && [gets $fb lb] >= 0} { + if {[string compare $la $lb] == 0} continue + # lappend diff $la $lb + + # For more readable (easy to analyze) output. + set code [lindex $la 0] + binary scan [lindex $la 1] H* expected + binary scan [lindex $lb 1] H* got + lappend diff [list $code $expected $got] + } + set diff +} + +# Create char tables. +cd [temporaryDirectory] +foreach enc {cp932 euc-jp iso2022-jp} { + set f [open $enc.chars w] + fconfigure $f -encoding binary + foreach-jisx0208 code { + puts $f [format "%04X %s" $code [gen-jisx0208-$enc $code]] + } + close $f +} +# shiftjis == cp932 for jisx0208. +file copy -force cp932.chars shiftjis.chars + +set NUM 0 +foreach from {cp932 shiftjis euc-jp iso2022-jp} { + foreach to {cp932 shiftjis euc-jp iso2022-jp} { + test encoding-25.[incr NUM] "jisx0208 $from => $to" { + cd [temporaryDirectory] + set f [open $from.chars] + fconfigure $f -encoding $from + set out [open $from.$to.out w] + fconfigure $out -encoding $to + puts -nonewline $out [read $f] + close $out + close $f + + # then compare $to.chars <=> $from.to.out as binary. + set fa [open $to.chars] + fconfigure $fa -encoding binary + set fb [open $from.$to.out] + fconfigure $fb -encoding binary + set diff [channel-diff $fa $fb] + close $fa + close $fb + + # Difference should be empty. + set diff + } {} + } +} + +eval [list file delete] [glob -directory [temporaryDirectory] *.chars *.out] +# ===> Cut here <=== + # EscapeFreeProc, GetTableEncoding, unilen # are fully tested by the rest of this file |