From f95599f4d4b6e502a92971909286a8ec6533c8c2 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 21 Feb 2023 23:16:14 +0000 Subject: More encoding fixes, e.g. range 0x80-0x9F for dingbats and symbol. Remove "-m" option from txt2enc.c tool, since the same is already handled in the table encoding code in Tcl itself. This was wat prevent Tcl to handle throwing exceptions correctly --- library/encoding/dingbats.enc | 4 ++-- library/encoding/ebcdic.enc | 1 + library/encoding/symbol.enc | 4 ++-- tools/encoding/Makefile | 2 +- tools/encoding/dingbats.txt | 1 + tools/encoding/gb1988.txt | 1 + tools/encoding/macTurkish.txt | 1 + tools/encoding/macUkraine.txt | 1 + tools/encoding/symbol.txt | 1 + tools/encoding/txt2enc.c | 14 +------------- 10 files changed, 12 insertions(+), 18 deletions(-) diff --git a/library/encoding/dingbats.enc b/library/encoding/dingbats.enc index 9729487..bd466b2 100644 --- a/library/encoding/dingbats.enc +++ b/library/encoding/dingbats.enc @@ -10,8 +10,8 @@ S 2730273127322733273427352736273727382739273A273B273C273D273E273F 2740274127422743274427452746274727482749274A274B25CF274D25A0274F 27502751275225B225BC25C6275625D727582759275A275B275C275D275E007F -0080008100820083008400850086008700880089008A008B008C008D008E008F -0090009100920093009400950096009700980099009A009B009C009D009E009F +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 0000276127622763276427652766276726632666266526602460246124622463 2464246524662467246824692776277727782779277A277B277C277D277E277F 2780278127822783278427852786278727882789278A278B278C278D278E278F diff --git a/library/encoding/ebcdic.enc b/library/encoding/ebcdic.enc index f451de5..f83ce7d 100644 --- a/library/encoding/ebcdic.enc +++ b/library/encoding/ebcdic.enc @@ -1,3 +1,4 @@ +# Encoding file: ebcdic, single-byte S 006F 0 1 00 diff --git a/library/encoding/symbol.enc b/library/encoding/symbol.enc index ffda9e3..ebd2f49 100644 --- a/library/encoding/symbol.enc +++ b/library/encoding/symbol.enc @@ -10,8 +10,8 @@ S 03A0039803A103A303A403A503C203A9039E03A80396005B2234005D22A5005F F8E503B103B203C703B403B503C603B303B703B903D503BA03BB03BC03BD03BF 03C003B803C103C303C403C503D603C903BE03C803B6007B007C007D223C007F -0080008100820083008400850086008700880089008A008B008C008D008E008F -0090009100920093009400950096009700980099009A009B009C009D009E009F +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 000003D2203222642044221E0192266326662665266021942190219121922193 00B000B12033226500D7221D2202202200F72260226122482026F8E6F8E721B5 21352111211C21182297229522052229222A2283228722842282228622082209 diff --git a/tools/encoding/Makefile b/tools/encoding/Makefile index 7235b47..a2122d5 100644 --- a/tools/encoding/Makefile +++ b/tools/encoding/Makefile @@ -67,7 +67,7 @@ encodings: clean txt2enc $(EUC_ENCODINGS) @for p in *.txt; do \ enc=`echo $$p | sed 's/\..*$$/\.enc/'`; \ echo $$enc; \ - ./txt2enc -m $$p > $$enc; \ + ./txt2enc $$p > $$enc; \ done @echo @echo Compiling special versions of encoding files. diff --git a/tools/encoding/dingbats.txt b/tools/encoding/dingbats.txt index 334f8d6..93a6081 100644 --- a/tools/encoding/dingbats.txt +++ b/tools/encoding/dingbats.txt @@ -155,6 +155,7 @@ 0x7C 0x275C # HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT 0x7D 0x275D # HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT 0x7E 0x275E # HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT +0x7F 0x275E # DELETE 0xA1 0x2761 # CURVED STEM PARAGRAPH SIGN ORNAMENT 0xA2 0x2762 # HEAVY EXCLAMATION MARK ORNAMENT 0xA3 0x2763 # HEAVY HEART EXCLAMATION MARK ORNAMENT diff --git a/tools/encoding/gb1988.txt b/tools/encoding/gb1988.txt index 800cd68..b9197e5 100644 --- a/tools/encoding/gb1988.txt +++ b/tools/encoding/gb1988.txt @@ -93,6 +93,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x203E # OVERLINE +0x7F 0x007F # DELETE 0xA1 0xFF61 # HALFWIDTH IDEOGRAPHIC FULL STOP 0xA2 0xFF62 # HALFWIDTH LEFT CORNER BRACKET 0xA3 0xFF63 # HALFWIDTH RIGHT CORNER BRACKET diff --git a/tools/encoding/macTurkish.txt b/tools/encoding/macTurkish.txt index 4a1ddab..ca3cda3 100644 --- a/tools/encoding/macTurkish.txt +++ b/tools/encoding/macTurkish.txt @@ -203,6 +203,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macUkraine.txt b/tools/encoding/macUkraine.txt index dba4e10..dc07cdc 100644 --- a/tools/encoding/macUkraine.txt +++ b/tools/encoding/macUkraine.txt @@ -148,6 +148,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE # 0x80 0x0410 # CYRILLIC CAPITAL LETTER A 0x81 0x0411 # CYRILLIC CAPITAL LETTER BE diff --git a/tools/encoding/symbol.txt b/tools/encoding/symbol.txt index 12dcae6..13a3ed8 100644 --- a/tools/encoding/symbol.txt +++ b/tools/encoding/symbol.txt @@ -169,6 +169,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x223C # TILDE OPERATOR +0x7F 0x007F # DELETE 0xA1 0x03D2 # GREEK UPSILON WITH HOOK SYMBOL 0xA2 0x2032 # PRIME 0xA3 0x2264 # LESS-THAN OR EQUAL TO diff --git a/tools/encoding/txt2enc.c b/tools/encoding/txt2enc.c index 7ee797b..80b44b9 100644 --- a/tools/encoding/txt2enc.c +++ b/tools/encoding/txt2enc.c @@ -26,7 +26,7 @@ main(int argc, char **argv) { FILE *fp; Rune *toUnicode[256]; - int i, multiByte, enc, uni, hi, lo, fixmissing, used, maxEnc; + int i, multiByte, enc, uni, hi, lo, used, maxEnc; int ch, encColumn, uniColumn, fallbackKnown, width; char *fallbackString, *str, *rest, *dot; unsigned int magic, type, symbol, fallbackChar; @@ -43,7 +43,6 @@ main(int argc, char **argv) fallbackKnown = 0; type = -1; symbol = 0; - fixmissing = 1; opterr = 0; while (1) { @@ -89,10 +88,6 @@ main(int argc, char **argv) symbol = 1; break; - case 'm': - fixmissing = 0; - break; - default: goto usage; } @@ -207,13 +202,6 @@ main(int argc, char **argv) for (i = 0; i < 0x20; i++) { toUnicode[0][i] = i; } - if (fixmissing) { - for (i = 0x7F; i < 0xA0; i++) { - if (toUnicode[i] == NULL && toUnicode[0][i] == 0) { - toUnicode[0][i] = i; - } - } - } } printf("# Encoding file: %s, %s-byte\n", argv[argc - 1], typeString[type]); -- cgit v0.12