diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2023-02-22 13:31:41 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2023-02-22 13:31:41 (GMT) |
commit | 78db448fff66d55223a88f8225976f4324de1b95 (patch) | |
tree | 5aa5a4b16e5dd9feaec6e7d4dc29968a7edc08eb | |
parent | 58f1209a70d5690e4615b1335f079595eaf542d3 (diff) | |
download | tcl-78db448fff66d55223a88f8225976f4324de1b95.zip tcl-78db448fff66d55223a88f8225976f4324de1b95.tar.gz tcl-78db448fff66d55223a88f8225976f4324de1b95.tar.bz2 |
Make txt2enc smarter, so we don't have to add DELETE any more in all original tables, better keep them as-is.
-rw-r--r-- | tools/encoding/Makefile | 2 | ||||
-rw-r--r-- | tools/encoding/ascii.txt | 1 | ||||
-rw-r--r-- | tools/encoding/big5.txt | 1 | ||||
-rw-r--r-- | tools/encoding/dingbats.txt | 1 | ||||
-rw-r--r-- | tools/encoding/gb1988.txt | 1 | ||||
-rw-r--r-- | tools/encoding/macCentEuro.txt | 1 | ||||
-rw-r--r-- | tools/encoding/macCroatian.txt | 1 | ||||
-rw-r--r-- | tools/encoding/macCyrillic.txt | 1 | ||||
-rw-r--r-- | tools/encoding/macDingbats.txt | 1 | ||||
-rw-r--r-- | tools/encoding/macGreek.txt | 1 | ||||
-rw-r--r-- | tools/encoding/macIceland.txt | 1 | ||||
-rw-r--r-- | tools/encoding/macJapan.txt | 1 | ||||
-rw-r--r-- | tools/encoding/macRoman.txt | 1 | ||||
-rw-r--r-- | tools/encoding/macRomania.txt | 1 | ||||
-rw-r--r-- | tools/encoding/macThai.txt | 1 | ||||
-rw-r--r-- | tools/encoding/macTurkish.txt | 1 | ||||
-rw-r--r-- | tools/encoding/macUkraine.txt | 1 | ||||
-rw-r--r-- | tools/encoding/symbol.txt | 1 | ||||
-rw-r--r-- | tools/encoding/txt2enc.c | 14 |
19 files changed, 13 insertions, 20 deletions
diff --git a/tools/encoding/Makefile b/tools/encoding/Makefile index a2122d5..ff19492 100644 --- a/tools/encoding/Makefile +++ b/tools/encoding/Makefile @@ -67,7 +67,7 @@ encodings: clean txt2enc $(EUC_ENCODINGS) @for p in *.txt; do \ enc=`echo $$p | sed 's/\..*$$/\.enc/'`; \ echo $$enc; \ - ./txt2enc $$p > $$enc; \ + ./txt2enc -e 0 -u 1 $$p > $$enc; \ done @echo @echo Compiling special versions of encoding files. diff --git a/tools/encoding/ascii.txt b/tools/encoding/ascii.txt index 2afbaab..66ba6f3 100644 --- a/tools/encoding/ascii.txt +++ b/tools/encoding/ascii.txt @@ -93,4 +93,3 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE
\ No newline at end of file diff --git a/tools/encoding/big5.txt b/tools/encoding/big5.txt index 06b0fac..58cdfe2 100644 --- a/tools/encoding/big5.txt +++ b/tools/encoding/big5.txt @@ -185,7 +185,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE 0xA140 0x3000 # IDEOGRAPHIC SPACE 0xA141 0xFF0C # FULLWIDTH COMMA 0xA142 0x3001 # IDEOGRAPHIC COMMA diff --git a/tools/encoding/dingbats.txt b/tools/encoding/dingbats.txt index 93a6081..334f8d6 100644 --- a/tools/encoding/dingbats.txt +++ b/tools/encoding/dingbats.txt @@ -155,7 +155,6 @@ 0x7C 0x275C # HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT 0x7D 0x275D # HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT 0x7E 0x275E # HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT -0x7F 0x275E # DELETE 0xA1 0x2761 # CURVED STEM PARAGRAPH SIGN ORNAMENT 0xA2 0x2762 # HEAVY EXCLAMATION MARK ORNAMENT 0xA3 0x2763 # HEAVY HEART EXCLAMATION MARK ORNAMENT diff --git a/tools/encoding/gb1988.txt b/tools/encoding/gb1988.txt index b9197e5..800cd68 100644 --- a/tools/encoding/gb1988.txt +++ b/tools/encoding/gb1988.txt @@ -93,7 +93,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x203E # OVERLINE -0x7F 0x007F # DELETE 0xA1 0xFF61 # HALFWIDTH IDEOGRAPHIC FULL STOP 0xA2 0xFF62 # HALFWIDTH LEFT CORNER BRACKET 0xA3 0xFF63 # HALFWIDTH RIGHT CORNER BRACKET diff --git a/tools/encoding/macCentEuro.txt b/tools/encoding/macCentEuro.txt index aa92908..bf424c1 100644 --- a/tools/encoding/macCentEuro.txt +++ b/tools/encoding/macCentEuro.txt @@ -188,7 +188,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x0100 # LATIN CAPITAL LETTER A WITH MACRON diff --git a/tools/encoding/macCroatian.txt b/tools/encoding/macCroatian.txt index 2eef246..538eda3 100644 --- a/tools/encoding/macCroatian.txt +++ b/tools/encoding/macCroatian.txt @@ -216,7 +216,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macCyrillic.txt b/tools/encoding/macCyrillic.txt index 2e9f8e2..695dade 100644 --- a/tools/encoding/macCyrillic.txt +++ b/tools/encoding/macCyrillic.txt @@ -213,7 +213,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x0410 # CYRILLIC CAPITAL LETTER A 0x81 0x0411 # CYRILLIC CAPITAL LETTER BE diff --git a/tools/encoding/macDingbats.txt b/tools/encoding/macDingbats.txt index 4b815f4..273d526 100644 --- a/tools/encoding/macDingbats.txt +++ b/tools/encoding/macDingbats.txt @@ -151,7 +151,6 @@ 0x7C 0x275C # HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT 0x7D 0x275D # HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT 0x7E 0x275E # HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT -0x7F 0x007F # DELETE 0x80 0xF8D7 # medium left parenthesis ornament 0x81 0xF8D8 # medium right parenthesis ornament 0x82 0xF8D9 # medium flattened left parenthesis ornament diff --git a/tools/encoding/macGreek.txt b/tools/encoding/macGreek.txt index b960d68..9783259 100644 --- a/tools/encoding/macGreek.txt +++ b/tools/encoding/macGreek.txt @@ -207,7 +207,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00B9 # SUPERSCRIPT ONE diff --git a/tools/encoding/macIceland.txt b/tools/encoding/macIceland.txt index c60b8d2..0a0b27b 100644 --- a/tools/encoding/macIceland.txt +++ b/tools/encoding/macIceland.txt @@ -234,7 +234,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macJapan.txt b/tools/encoding/macJapan.txt index 3c48c4a..7121b3b 100644 --- a/tools/encoding/macJapan.txt +++ b/tools/encoding/macJapan.txt @@ -318,7 +318,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE # Apple change -0x7F 0x007F # DELETE # 0x8140 0x3000 # IDEOGRAPHIC SPACE 0x8141 0x3001 # IDEOGRAPHIC COMMA diff --git a/tools/encoding/macRoman.txt b/tools/encoding/macRoman.txt index 43ad44b..7ddcf8d 100644 --- a/tools/encoding/macRoman.txt +++ b/tools/encoding/macRoman.txt @@ -233,7 +233,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macRomania.txt b/tools/encoding/macRomania.txt index 36a0b68..2a84adc 100644 --- a/tools/encoding/macRomania.txt +++ b/tools/encoding/macRomania.txt @@ -154,7 +154,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macThai.txt b/tools/encoding/macThai.txt index 2043621..b991833 100644 --- a/tools/encoding/macThai.txt +++ b/tools/encoding/macThai.txt @@ -168,7 +168,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x81 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK diff --git a/tools/encoding/macTurkish.txt b/tools/encoding/macTurkish.txt index ca3cda3..4a1ddab 100644 --- a/tools/encoding/macTurkish.txt +++ b/tools/encoding/macTurkish.txt @@ -203,7 +203,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macUkraine.txt b/tools/encoding/macUkraine.txt index dc07cdc..dba4e10 100644 --- a/tools/encoding/macUkraine.txt +++ b/tools/encoding/macUkraine.txt @@ -148,7 +148,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x0410 # CYRILLIC CAPITAL LETTER A 0x81 0x0411 # CYRILLIC CAPITAL LETTER BE diff --git a/tools/encoding/symbol.txt b/tools/encoding/symbol.txt index 13a3ed8..12dcae6 100644 --- a/tools/encoding/symbol.txt +++ b/tools/encoding/symbol.txt @@ -169,7 +169,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x223C # TILDE OPERATOR -0x7F 0x007F # DELETE 0xA1 0x03D2 # GREEK UPSILON WITH HOOK SYMBOL 0xA2 0x2032 # PRIME 0xA3 0x2264 # LESS-THAN OR EQUAL TO diff --git a/tools/encoding/txt2enc.c b/tools/encoding/txt2enc.c index 80b44b9..32c7344 100644 --- a/tools/encoding/txt2enc.c +++ b/tools/encoding/txt2enc.c @@ -26,7 +26,7 @@ main(int argc, char **argv) { FILE *fp; Rune *toUnicode[256]; - int i, multiByte, enc, uni, hi, lo, used, maxEnc; + int i, multiByte, enc, uni, hi, lo, fixmissing, used, maxEnc; int ch, encColumn, uniColumn, fallbackKnown, width; char *fallbackString, *str, *rest, *dot; unsigned int magic, type, symbol, fallbackChar; @@ -43,6 +43,7 @@ main(int argc, char **argv) fallbackKnown = 0; type = -1; symbol = 0; + fixmissing = 1; opterr = 0; while (1) { @@ -88,6 +89,10 @@ main(int argc, char **argv) symbol = 1; break; + case 'm': + fixmissing = 0; + break; + default: goto usage; } @@ -101,7 +106,7 @@ main(int argc, char **argv) fputs(" -f\tfallback character (default: QUESTION MARK)\n", stderr); fputs(" -t\toverride implicit type with single, double, or multi\n", stderr); fputs(" -s\tsymbol+ascii encoding\n", stderr); - fputs(" -m\tdon't implicitly include range 0080 to 00FF\n", stderr); + fputs(" -m\tdon't implicitly include 007F\n", stderr); return 1; } @@ -202,6 +207,11 @@ main(int argc, char **argv) for (i = 0; i < 0x20; i++) { toUnicode[0][i] = i; } + if (fixmissing) { + if (toUnicode[0x7F] == NULL && toUnicode[0][0x7F] == 0) { + toUnicode[0][0x7F] = 0x7F; + } + } } printf("# Encoding file: %s, %s-byte\n", argv[argc - 1], typeString[type]); |