From 280034d2ab7356da4aadf99bcade5d106a3da1b2 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 20 Feb 2023 20:41:00 +0000 Subject: Proposed fix for [4bea02e811]: encoding convertfrom -strict ascii \x7f generates exception --- library/encoding/ascii.enc | 2 +- library/encoding/cp1250.enc | 4 ++-- library/encoding/cp1251.enc | 2 +- library/encoding/cp1252.enc | 4 ++-- library/encoding/cp1253.enc | 4 ++-- library/encoding/cp1257.enc | 4 ++-- library/encoding/cp1258.enc | 4 ++-- library/encoding/cp864.enc | 2 +- library/encoding/cp869.enc | 4 ++-- library/encoding/cp874.enc | 4 ++-- library/encoding/cp932.enc | 2 +- library/encoding/cp949.enc | 2 +- library/encoding/cp950.enc | 4 ++-- library/encoding/tis-620.enc | 2 +- tools/encoding/Makefile | 7 +------ tools/encoding/ascii.txt | 1 + 16 files changed, 24 insertions(+), 28 deletions(-) diff --git a/library/encoding/ascii.enc b/library/encoding/ascii.enc index e0320b8..284a9f5 100644 --- a/library/encoding/ascii.enc +++ b/library/encoding/ascii.enc @@ -9,7 +9,7 @@ S 0040004100420043004400450046004700480049004A004B004C004D004E004F 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F -0070007100720073007400750076007700780079007A007B007C007D007E0000 +0070007100720073007400750076007700780079007A007B007C007D007E007F 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 diff --git a/library/encoding/cp1250.enc b/library/encoding/cp1250.enc index 070ad90..f40b485 100644 --- a/library/encoding/cp1250.enc +++ b/library/encoding/cp1250.enc @@ -10,8 +10,8 @@ S 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -20AC0081201A0083201E2026202020210088203001602039015A0164017D0179 -009020182019201C201D202220132014009821220161203A015B0165017E017A +20AC0000201A0000201E2026202020210000203001602039015A0164017D0179 +000020182019201C201D202220132014000021220161203A015B0165017E017A 00A002C702D8014100A4010400A600A700A800A9015E00AB00AC00AD00AE017B 00B000B102DB014200B400B500B600B700B80105015F00BB013D02DD013E017C 015400C100C2010200C40139010600C7010C00C9011800CB011A00CD00CE010E diff --git a/library/encoding/cp1251.enc b/library/encoding/cp1251.enc index 376b1b4..f9513c2 100644 --- a/library/encoding/cp1251.enc +++ b/library/encoding/cp1251.enc @@ -11,7 +11,7 @@ S 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F 04020403201A0453201E20262020202120AC203004092039040A040C040B040F -045220182019201C201D202220132014009821220459203A045A045C045B045F +045220182019201C201D202220132014000021220459203A045A045C045B045F 00A0040E045E040800A4049000A600A7040100A9040400AB00AC00AD00AE0407 00B000B104060456049100B500B600B704512116045400BB0458040504550457 0410041104120413041404150416041704180419041A041B041C041D041E041F diff --git a/library/encoding/cp1252.enc b/library/encoding/cp1252.enc index dd525ea..b45a7f8 100644 --- a/library/encoding/cp1252.enc +++ b/library/encoding/cp1252.enc @@ -10,8 +10,8 @@ S 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -20AC0081201A0192201E20262020202102C62030016020390152008D017D008F -009020182019201C201D20222013201402DC21220161203A0153009D017E0178 +20AC0000201A0192201E20262020202102C620300160203901520000017D0000 +000020182019201C201D20222013201402DC21220161203A01530000017E0178 00A000A100A200A300A400A500A600A700A800A900AA00AB00AC00AD00AE00AF 00B000B100B200B300B400B500B600B700B800B900BA00BB00BC00BD00BE00BF 00C000C100C200C300C400C500C600C700C800C900CA00CB00CC00CD00CE00CF diff --git a/library/encoding/cp1253.enc b/library/encoding/cp1253.enc index a8754c3..dcc8084 100644 --- a/library/encoding/cp1253.enc +++ b/library/encoding/cp1253.enc @@ -10,8 +10,8 @@ S 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -20AC0081201A0192201E20262020202100882030008A2039008C008D008E008F -009020182019201C201D20222013201400982122009A203A009C009D009E009F +20AC0000201A0192201E20262020202100002030000020390000000000000000 +000020182019201C201D202220132014000021220000203A0000000000000000 00A00385038600A300A400A500A600A700A800A9000000AB00AC00AD00AE2015 00B000B100B200B3038400B500B600B703880389038A00BB038C00BD038E038F 0390039103920393039403950396039703980399039A039B039C039D039E039F diff --git a/library/encoding/cp1257.enc b/library/encoding/cp1257.enc index 4aa135d..42c6905 100644 --- a/library/encoding/cp1257.enc +++ b/library/encoding/cp1257.enc @@ -10,8 +10,8 @@ S 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -20AC0081201A0083201E20262020202100882030008A2039008C00A802C700B8 -009020182019201C201D20222013201400982122009A203A009C00AF02DB009F +20AC0000201A0000201E2026202020210000203000002039000000A802C700B8 +000020182019201C201D202220132014000021220000203A000000AF02DB0000 00A0000000A200A300A4000000A600A700D800A9015600AB00AC00AD00AE00C6 00B000B100B200B300B400B500B600B700F800B9015700BB00BC00BD00BE00E6 0104012E0100010600C400C501180112010C00C90179011601220136012A013B diff --git a/library/encoding/cp1258.enc b/library/encoding/cp1258.enc index 95fdef8..bbe2b12 100644 --- a/library/encoding/cp1258.enc +++ b/library/encoding/cp1258.enc @@ -10,8 +10,8 @@ S 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -20AC0081201A0192201E20262020202102C62030008A20390152008D008E008F -009020182019201C201D20222013201402DC2122009A203A0153009D009E0178 +20AC0000201A0192201E20262020202102C62030000020390152000000000000 +000020182019201C201D20222013201402DC21220000203A0153000000000178 00A000A100A200A300A400A500A600A700A800A900AA00AB00AC00AD00AE00AF 00B000B100B200B300B400B500B600B700B800B900BA00BB00BC00BD00BE00BF 00C000C100C2010200C400C500C600C700C800C900CA00CB030000CD00CE00CF diff --git a/library/encoding/cp864.enc b/library/encoding/cp864.enc index 71f9e62..dad7c20 100644 --- a/library/encoding/cp864.enc +++ b/library/encoding/cp864.enc @@ -11,7 +11,7 @@ S 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F 00B000B72219221A259225002502253C2524252C251C25342510250C25142518 -03B2221E03C600B100BD00BC224800AB00BBFEF7FEF8009B009CFEFBFEFC009F +03B2221E03C600B100BD00BC224800AB00BBFEF7FEF800000000FEFBFEFC0000 00A000ADFE8200A300A4FE8400000000FE8EFE8FFE95FE99060CFE9DFEA1FEA5 0660066106620663066406650666066706680669FED1061BFEB1FEB5FEB9061F 00A2FE80FE81FE83FE85FECAFE8BFE8DFE91FE93FE97FE9BFE9FFEA3FEA7FEA9 diff --git a/library/encoding/cp869.enc b/library/encoding/cp869.enc index 9fd2929..4670826 100644 --- a/library/encoding/cp869.enc +++ b/library/encoding/cp869.enc @@ -10,8 +10,8 @@ S 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -0080008100820083008400850386008700B700AC00A620182019038820150389 -038A03AA038C00930094038E03AB00A9038F00B200B303AC00A303AD03AE03AF +0000000000000000000000000386000000B700AC00A620182019038820150389 +038A03AA038C00000000038E03AB00A9038F00B200B303AC00A303AD03AE03AF 03CA039003CC03CD039103920393039403950396039700BD0398039900AB00BB 25912592259325022524039A039B039C039D256325512557255D039E039F2510 25142534252C251C2500253C03A003A1255A25542569256625602550256C03A3 diff --git a/library/encoding/cp874.enc b/library/encoding/cp874.enc index 0487b97..e2e8433 100644 --- a/library/encoding/cp874.enc +++ b/library/encoding/cp874.enc @@ -10,8 +10,8 @@ S 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -20AC008100820083008420260086008700880089008A008B008C008D008E008F -009020182019201C201D20222013201400980099009A009B009C009D009E009F +20AC000000000000000020260000000000000000000000000000000000000000 +000020182019201C201D20222013201400000000000000000000000000000000 00A00E010E020E030E040E050E060E070E080E090E0A0E0B0E0C0E0D0E0E0E0F 0E100E110E120E130E140E150E160E170E180E190E1A0E1B0E1C0E1D0E1E0E1F 0E200E210E220E230E240E250E260E270E280E290E2A0E2B0E2C0E2D0E2E0E2F diff --git a/library/encoding/cp932.enc b/library/encoding/cp932.enc index 8da8cd6..0699000 100644 --- a/library/encoding/cp932.enc +++ b/library/encoding/cp932.enc @@ -10,7 +10,7 @@ M 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -0080000000000000000000850086000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000FF61FF62FF63FF64FF65FF66FF67FF68FF69FF6AFF6BFF6CFF6DFF6EFF6F FF70FF71FF72FF73FF74FF75FF76FF77FF78FF79FF7AFF7BFF7CFF7DFF7EFF7F diff --git a/library/encoding/cp949.enc b/library/encoding/cp949.enc index 2f3ec39..459dbd9 100644 --- a/library/encoding/cp949.enc +++ b/library/encoding/cp949.enc @@ -10,7 +10,7 @@ M 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -0080000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 diff --git a/library/encoding/cp950.enc b/library/encoding/cp950.enc index f33d785..f582bd9 100644 --- a/library/encoding/cp950.enc +++ b/library/encoding/cp950.enc @@ -10,8 +10,8 @@ M 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -0080008100820083008400850086008700880089008A008B008C008D008E008F -0090009100920093009400950096009700980099009A009B009C009D009E009F +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 diff --git a/library/encoding/tis-620.enc b/library/encoding/tis-620.enc index 2e9142a..af77326 100644 --- a/library/encoding/tis-620.enc +++ b/library/encoding/tis-620.enc @@ -9,7 +9,7 @@ S 0040004100420043004400450046004700480049004A004B004C004D004E004F 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F -0070007100720073007400750076007700780079007A007B007C007D007E0000 +0070007100720073007400750076007700780079007A007B007C007D007E007F 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 00000E010E020E030E040E050E060E070E080E090E0A0E0B0E0C0E0D0E0E0E0F diff --git a/tools/encoding/Makefile b/tools/encoding/Makefile index 361239e..7235b47 100644 --- a/tools/encoding/Makefile +++ b/tools/encoding/Makefile @@ -67,15 +67,10 @@ encodings: clean txt2enc $(EUC_ENCODINGS) @for p in *.txt; do \ enc=`echo $$p | sed 's/\..*$$/\.enc/'`; \ echo $$enc; \ - ./txt2enc -e 0 -u 1 $$p > $$enc; \ + ./txt2enc -m $$p > $$enc; \ done @echo @echo Compiling special versions of encoding files. - @for p in ascii.txt; do \ - enc=`echo $$p | sed 's/\..*$$/\.enc/'`; \ - echo $$enc; \ - ./txt2enc -m $$p > $$enc; \ - done @for p in jis0208.txt; do \ enc=`echo $$p | sed 's/\..*$$/\.enc/'`; \ echo $$enc; \ diff --git a/tools/encoding/ascii.txt b/tools/encoding/ascii.txt index 66ba6f3..2afbaab 100644 --- a/tools/encoding/ascii.txt +++ b/tools/encoding/ascii.txt @@ -93,3 +93,4 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE \ No newline at end of file -- cgit v0.12 From 8660fd1af23543a70d94adaec5d7b98105ad3ca3 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 20 Feb 2023 20:52:06 +0000 Subject: Two more files, re-generated --- library/encoding/cp1254.enc | 4 ++-- library/encoding/cp1255.enc | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/library/encoding/cp1254.enc b/library/encoding/cp1254.enc index b9e3b3c..4922f3c 100644 --- a/library/encoding/cp1254.enc +++ b/library/encoding/cp1254.enc @@ -10,8 +10,8 @@ S 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -20AC0081201A0192201E20262020202102C62030016020390152008D008E008F -009020182019201C201D20222013201402DC21220161203A0153009D009E0178 +20AC0000201A0192201E20262020202102C62030016020390152000000000000 +000020182019201C201D20222013201402DC21220161203A0153000000000178 00A000A100A200A300A400A500A600A700A800A900AA00AB00AC00AD00AE00AF 00B000B100B200B300B400B500B600B700B800B900BA00BB00BC00BD00BE00BF 00C000C100C200C300C400C500C600C700C800C900CA00CB00CC00CD00CE00CF diff --git a/library/encoding/cp1255.enc b/library/encoding/cp1255.enc index 6e78b95..74ef0c1 100644 --- a/library/encoding/cp1255.enc +++ b/library/encoding/cp1255.enc @@ -10,8 +10,8 @@ S 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -20AC0081201A0192201E20262020202102C62030008A2039008C008D008E008F -009020182019201C201D20222013201402DC2122009A203A009C009D009E009F +20AC0000201A0192201E20262020202102C62030000020390000000000000000 +000020182019201C201D20222013201402DC21220000203A0000000000000000 00A000A100A200A320AA00A500A600A700A800A900D700AB00AC00AD00AE00AF 00B000B100B200B300B400B500B600B700B800B900F700BB00BC00BD00BE00BF 05B005B105B205B305B405B505B605B705B805B9000005BB05BC05BD05BE05BF -- cgit v0.12 From 3315012c955111ef840365ecd7cc4ff46a15e204 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 20 Feb 2023 21:43:29 +0000 Subject: re-generated 8 more encodings --- library/encoding/big5.enc | 4 ++-- library/encoding/euc-cn.enc | 4 ++-- library/encoding/euc-jp.enc | 4 ++-- library/encoding/euc-kr.enc | 4 ++-- library/encoding/gb1988.enc | 4 ++-- library/encoding/jis0201.enc | 4 ++-- library/encoding/macJapan.enc | 2 +- library/encoding/shiftjis.enc | 2 +- tools/encoding/big5.txt | 1 + 9 files changed, 15 insertions(+), 14 deletions(-) diff --git a/library/encoding/big5.enc b/library/encoding/big5.enc index 26179f4..d6ff760 100644 --- a/library/encoding/big5.enc +++ b/library/encoding/big5.enc @@ -10,8 +10,8 @@ M 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -0080008100820083008400850086008700880089008A008B008C008D008E008F -0090009100920093009400950096009700980099009A009B009C009D009E009F +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 diff --git a/library/encoding/euc-cn.enc b/library/encoding/euc-cn.enc index 4b2f8c7..ff0f984 100644 --- a/library/encoding/euc-cn.enc +++ b/library/encoding/euc-cn.enc @@ -10,8 +10,8 @@ M 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -0080008100820083008400850086008700880089008A008B008C008D008E008F -0090009100920093009400950096009700980099009A009B009C009D009E009F +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 diff --git a/library/encoding/euc-jp.enc b/library/encoding/euc-jp.enc index db56c88..d4337d9 100644 --- a/library/encoding/euc-jp.enc +++ b/library/encoding/euc-jp.enc @@ -10,8 +10,8 @@ M 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -0080008100820083008400850086008700880089008A008B008C008D0000008F -0090009100920093009400950096009700980099009A009B009C009D009E009F +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 diff --git a/library/encoding/euc-kr.enc b/library/encoding/euc-kr.enc index 5e9bb93..0433260 100644 --- a/library/encoding/euc-kr.enc +++ b/library/encoding/euc-kr.enc @@ -10,8 +10,8 @@ M 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -0080008100820083008400850086008700880089008A008B008C008D008E008F -0090009100920093009400950096009700980099009A009B009C009D009E009F +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 diff --git a/library/encoding/gb1988.enc b/library/encoding/gb1988.enc index 298732c..8254684 100644 --- a/library/encoding/gb1988.enc +++ b/library/encoding/gb1988.enc @@ -10,8 +10,8 @@ S 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D203E007F -0080008100820083008400850086008700880089008A008B008C008D008E008F -0090009100920093009400950096009700980099009A009B009C009D009E009F +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 0000FF61FF62FF63FF64FF65FF66FF67FF68FF69FF6AFF6BFF6CFF6DFF6EFF6F FF70FF71FF72FF73FF74FF75FF76FF77FF78FF79FF7AFF7BFF7CFF7DFF7EFF7F FF80FF81FF82FF83FF84FF85FF86FF87FF88FF89FF8AFF8BFF8CFF8DFF8EFF8F diff --git a/library/encoding/jis0201.enc b/library/encoding/jis0201.enc index 64f423f..70e099d 100644 --- a/library/encoding/jis0201.enc +++ b/library/encoding/jis0201.enc @@ -10,8 +10,8 @@ S 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D203E007F -0080008100820083008400850086008700880089008A008B008C008D008E008F -0090009100920093009400950096009700980099009A009B009C009D009E009F +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 0000FF61FF62FF63FF64FF65FF66FF67FF68FF69FF6AFF6BFF6CFF6DFF6EFF6F FF70FF71FF72FF73FF74FF75FF76FF77FF78FF79FF7AFF7BFF7CFF7DFF7EFF7F FF80FF81FF82FF83FF84FF85FF86FF87FF88FF89FF8AFF8BFF8CFF8DFF8EFF8F diff --git a/library/encoding/macJapan.enc b/library/encoding/macJapan.enc index dba24bd..9f3f03b 100644 --- a/library/encoding/macJapan.enc +++ b/library/encoding/macJapan.enc @@ -10,7 +10,7 @@ M 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -0080000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 00A0FF61FF62FF63FF64FF65FF66FF67FF68FF69FF6AFF6BFF6CFF6DFF6EFF6F FF70FF71FF72FF73FF74FF75FF76FF77FF78FF79FF7AFF7BFF7CFF7DFF7EFF7F diff --git a/library/encoding/shiftjis.enc b/library/encoding/shiftjis.enc index 140aec4..3ba972e 100644 --- a/library/encoding/shiftjis.enc +++ b/library/encoding/shiftjis.enc @@ -10,7 +10,7 @@ M 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -0080000000000000000000850086008700000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000FF61FF62FF63FF64FF65FF66FF67FF68FF69FF6AFF6BFF6CFF6DFF6EFF6F FF70FF71FF72FF73FF74FF75FF76FF77FF78FF79FF7AFF7BFF7CFF7DFF7EFF7F diff --git a/tools/encoding/big5.txt b/tools/encoding/big5.txt index 58cdfe2..06b0fac 100644 --- a/tools/encoding/big5.txt +++ b/tools/encoding/big5.txt @@ -185,6 +185,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE 0xA140 0x3000 # IDEOGRAPHIC SPACE 0xA141 0xFF0C # FULLWIDTH COMMA 0xA142 0x3001 # IDEOGRAPHIC COMMA -- cgit v0.12 From 42956f6f09023c19e2c057150f6584f0f1f40b4c Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 21 Feb 2023 16:58:13 +0000 Subject: Some test-cases start failing in 9.0-compatibility-mode (-DTCL_NO_DEPRECATED), if the system encoding is one with gaps. So, better use iso8859-1 for those testcases --- tests/chanio.test | 12 ++++++------ tests/io.test | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/chanio.test b/tests/chanio.test index fb94051..61c168f 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -6736,8 +6736,8 @@ test chan-io-52.5 {TclCopyChannel, all} -constraints {fcopy} -setup { } -body { set f1 [open $thisScript] set f2 [open $path(test1) w] - chan configure $f1 -translation lf -blocking 0 - chan configure $f2 -translation lf -blocking 0 + chan configure $f1 -translation lf -encoding iso8859-1 -blocking 0 + chan configure $f2 -translation lf -encoding iso8859-1 -blocking 0 chan copy $f1 $f2 -size -1 ;# -1 means 'copy all', same as if no -size specified. set result [list [chan configure $f1 -blocking] [chan configure $f2 -blocking]] chan close $f1 @@ -6752,8 +6752,8 @@ test chan-io-52.5a {TclCopyChannel, all, other negative value} -setup { } -constraints {fcopy} -body { set f1 [open $thisScript] set f2 [open $path(test1) w] - chan configure $f1 -translation lf -blocking 0 - chan configure $f2 -translation lf -blocking 0 + chan configure $f1 -translation lf -encoding iso8859-1 -blocking 0 + chan configure $f2 -translation lf -encoding iso8859-1 -blocking 0 chan copy $f1 $f2 -size -2 ;# < 0 behaves like -1, copy all set result [list [chan configure $f1 -blocking] [chan configure $f2 -blocking]] chan close $f1 @@ -6768,8 +6768,8 @@ test chan-io-52.5b {TclCopyChannel, all, wrap to negative value} -setup { } -constraints {fcopy} -body { set f1 [open $thisScript] set f2 [open $path(test1) w] - chan configure $f1 -translation lf -blocking 0 - chan configure $f2 -translation lf -blocking 0 + chan configure $f1 -translation lf -encoding iso8859-1 -blocking 0 + chan configure $f2 -translation lf -encoding iso8859-1 -blocking 0 chan copy $f1 $f2 -size 3221176172 ;# Wrapped to < 0, behaves like -1, copy all set result [list [chan configure $f1 -blocking] [chan configure $f2 -blocking]] chan close $f1 diff --git a/tests/io.test b/tests/io.test index 7b8182e..aed7f85 100644 --- a/tests/io.test +++ b/tests/io.test @@ -7280,8 +7280,8 @@ test io-52.5 {TclCopyChannel, all} {fcopy} { file delete $path(test1) set f1 [open $thisScript] set f2 [open $path(test1) w] - fconfigure $f1 -translation lf -blocking 0 - fconfigure $f2 -translation lf -blocking 0 + fconfigure $f1 -translation lf -encoding iso8859-1 -blocking 0 + fconfigure $f2 -translation lf -encoding iso8859-1 -blocking 0 fcopy $f1 $f2 -size -1 ;# -1 means 'copy all', same as if no -size specified. set result [list [fconfigure $f1 -blocking] [fconfigure $f2 -blocking]] close $f1 @@ -7297,8 +7297,8 @@ test io-52.5a {TclCopyChannel, all, other negative value} {fcopy} { file delete $path(test1) set f1 [open $thisScript] set f2 [open $path(test1) w] - fconfigure $f1 -translation lf -blocking 0 - fconfigure $f2 -translation lf -blocking 0 + fconfigure $f1 -translation lf -encoding iso8859-1 -blocking 0 + fconfigure $f2 -translation lf -encoding iso8859-1 -blocking 0 fcopy $f1 $f2 -size -2 ;# < 0 behaves like -1, copy all set result [list [fconfigure $f1 -blocking] [fconfigure $f2 -blocking]] close $f1 @@ -7314,8 +7314,8 @@ test io-52.5b {TclCopyChannel, all, wrap to negative value} {fcopy} { file delete $path(test1) set f1 [open $thisScript] set f2 [open $path(test1) w] - fconfigure $f1 -translation lf -blocking 0 - fconfigure $f2 -translation lf -blocking 0 + fconfigure $f1 -translation lf -encoding iso8859-1 -blocking 0 + fconfigure $f2 -translation lf -encoding iso8859-1 -blocking 0 fcopy $f1 $f2 -size 3221176172 ;# Wrapped to < 0, behaves like -1, copy all set result [list [fconfigure $f1 -blocking] [fconfigure $f2 -blocking]] close $f1 -- cgit v0.12 From c606ae1574a7d66bcbf8666506e91840875f6d45 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 21 Feb 2023 17:50:36 +0000 Subject: Proposed fix for [d19fe0a5b]: Handling incomplete byte sequences for utf-16/utf-32 --- generic/tclEncoding.c | 27 ++++++++++++++++++++++++--- tests/encoding.test | 6 ++++++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index dfa7907..ecec6e9 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2457,21 +2457,27 @@ UnicodeToUtfProc( } result = TCL_OK; - /* check alignment with utf-16 (2 == sizeof(UTF-16)) */ + /* + * Check alignment with utf-16 (2 == sizeof(UTF-16)) + */ + if ((srcLen % 2) != 0) { result = TCL_CONVERT_MULTIBYTE; srcLen--; } +#if TCL_UTF_MAX > 3 /* - * If last code point is a high surrogate, we cannot handle that yet. + * If last code point is a high surrogate, we cannot handle that yet, + * unless we are at the end. */ - if ((srcLen >= 2) && + if (!(flags & TCL_ENCODING_END) && (srcLen >= 2) && ((src[srcLen - (clientData?1:2)] & 0xFC) == 0xD8)) { result = TCL_CONVERT_MULTIBYTE; srcLen-= 2; } +#endif srcStart = src; srcEnd = src + srcLen; @@ -2504,6 +2510,21 @@ UnicodeToUtfProc( src += sizeof(unsigned short); } + if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) { + /* We have a single byte left-over at the end */ + if (dst > dstEnd) { + result = TCL_CONVERT_NOSPACE; + } else { + /* destination is not full, so we really are at the end now */ + if (flags & TCL_ENCODING_STOPONERROR) { + result = TCL_CONVERT_SYNTAX; + } else { + dst += Tcl_UniCharToUtf(0xFFFD, dst); + numChars++; + src++; + } + } + } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; diff --git a/tests/encoding.test b/tests/encoding.test index f558e01..f6f9abc 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -419,6 +419,12 @@ test encoding-16.3 {UnicodeToUtfProc} -body { set val [encoding convertfrom unicode "\xDC\xDC"] list $val [format %X [scan $val %c]] } -result "\uDCDC DCDC" +test encoding-16.4 {UnicodeToUtfProc, bug [d19fe0a5b]} -body { + encoding convertfrom unicode "\x41\x41\x41" +} -result \u4141\uFFFD +test encoding-16.5 {UnicodeToUtfProc, bug [d19fe0a5b]} -constraints ucs2 -body { + encoding convertfrom unicode "\xD8\xD8" +} -result \uD8D8 test encoding-17.1 {UtfToUnicodeProc} -constraints fullutf -body { encoding convertto unicode "\U460DC" -- cgit v0.12 From f95599f4d4b6e502a92971909286a8ec6533c8c2 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 21 Feb 2023 23:16:14 +0000 Subject: More encoding fixes, e.g. range 0x80-0x9F for dingbats and symbol. Remove "-m" option from txt2enc.c tool, since the same is already handled in the table encoding code in Tcl itself. This was wat prevent Tcl to handle throwing exceptions correctly --- library/encoding/dingbats.enc | 4 ++-- library/encoding/ebcdic.enc | 1 + library/encoding/symbol.enc | 4 ++-- tools/encoding/Makefile | 2 +- tools/encoding/dingbats.txt | 1 + tools/encoding/gb1988.txt | 1 + tools/encoding/macTurkish.txt | 1 + tools/encoding/macUkraine.txt | 1 + tools/encoding/symbol.txt | 1 + tools/encoding/txt2enc.c | 14 +------------- 10 files changed, 12 insertions(+), 18 deletions(-) diff --git a/library/encoding/dingbats.enc b/library/encoding/dingbats.enc index 9729487..bd466b2 100644 --- a/library/encoding/dingbats.enc +++ b/library/encoding/dingbats.enc @@ -10,8 +10,8 @@ S 2730273127322733273427352736273727382739273A273B273C273D273E273F 2740274127422743274427452746274727482749274A274B25CF274D25A0274F 27502751275225B225BC25C6275625D727582759275A275B275C275D275E007F -0080008100820083008400850086008700880089008A008B008C008D008E008F -0090009100920093009400950096009700980099009A009B009C009D009E009F +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 0000276127622763276427652766276726632666266526602460246124622463 2464246524662467246824692776277727782779277A277B277C277D277E277F 2780278127822783278427852786278727882789278A278B278C278D278E278F diff --git a/library/encoding/ebcdic.enc b/library/encoding/ebcdic.enc index f451de5..f83ce7d 100644 --- a/library/encoding/ebcdic.enc +++ b/library/encoding/ebcdic.enc @@ -1,3 +1,4 @@ +# Encoding file: ebcdic, single-byte S 006F 0 1 00 diff --git a/library/encoding/symbol.enc b/library/encoding/symbol.enc index ffda9e3..ebd2f49 100644 --- a/library/encoding/symbol.enc +++ b/library/encoding/symbol.enc @@ -10,8 +10,8 @@ S 03A0039803A103A303A403A503C203A9039E03A80396005B2234005D22A5005F F8E503B103B203C703B403B503C603B303B703B903D503BA03BB03BC03BD03BF 03C003B803C103C303C403C503D603C903BE03C803B6007B007C007D223C007F -0080008100820083008400850086008700880089008A008B008C008D008E008F -0090009100920093009400950096009700980099009A009B009C009D009E009F +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 000003D2203222642044221E0192266326662665266021942190219121922193 00B000B12033226500D7221D2202202200F72260226122482026F8E6F8E721B5 21352111211C21182297229522052229222A2283228722842282228622082209 diff --git a/tools/encoding/Makefile b/tools/encoding/Makefile index 7235b47..a2122d5 100644 --- a/tools/encoding/Makefile +++ b/tools/encoding/Makefile @@ -67,7 +67,7 @@ encodings: clean txt2enc $(EUC_ENCODINGS) @for p in *.txt; do \ enc=`echo $$p | sed 's/\..*$$/\.enc/'`; \ echo $$enc; \ - ./txt2enc -m $$p > $$enc; \ + ./txt2enc $$p > $$enc; \ done @echo @echo Compiling special versions of encoding files. diff --git a/tools/encoding/dingbats.txt b/tools/encoding/dingbats.txt index 334f8d6..93a6081 100644 --- a/tools/encoding/dingbats.txt +++ b/tools/encoding/dingbats.txt @@ -155,6 +155,7 @@ 0x7C 0x275C # HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT 0x7D 0x275D # HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT 0x7E 0x275E # HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT +0x7F 0x275E # DELETE 0xA1 0x2761 # CURVED STEM PARAGRAPH SIGN ORNAMENT 0xA2 0x2762 # HEAVY EXCLAMATION MARK ORNAMENT 0xA3 0x2763 # HEAVY HEART EXCLAMATION MARK ORNAMENT diff --git a/tools/encoding/gb1988.txt b/tools/encoding/gb1988.txt index 800cd68..b9197e5 100644 --- a/tools/encoding/gb1988.txt +++ b/tools/encoding/gb1988.txt @@ -93,6 +93,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x203E # OVERLINE +0x7F 0x007F # DELETE 0xA1 0xFF61 # HALFWIDTH IDEOGRAPHIC FULL STOP 0xA2 0xFF62 # HALFWIDTH LEFT CORNER BRACKET 0xA3 0xFF63 # HALFWIDTH RIGHT CORNER BRACKET diff --git a/tools/encoding/macTurkish.txt b/tools/encoding/macTurkish.txt index 4a1ddab..ca3cda3 100644 --- a/tools/encoding/macTurkish.txt +++ b/tools/encoding/macTurkish.txt @@ -203,6 +203,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macUkraine.txt b/tools/encoding/macUkraine.txt index dba4e10..dc07cdc 100644 --- a/tools/encoding/macUkraine.txt +++ b/tools/encoding/macUkraine.txt @@ -148,6 +148,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE # 0x80 0x0410 # CYRILLIC CAPITAL LETTER A 0x81 0x0411 # CYRILLIC CAPITAL LETTER BE diff --git a/tools/encoding/symbol.txt b/tools/encoding/symbol.txt index 12dcae6..13a3ed8 100644 --- a/tools/encoding/symbol.txt +++ b/tools/encoding/symbol.txt @@ -169,6 +169,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x223C # TILDE OPERATOR +0x7F 0x007F # DELETE 0xA1 0x03D2 # GREEK UPSILON WITH HOOK SYMBOL 0xA2 0x2032 # PRIME 0xA3 0x2264 # LESS-THAN OR EQUAL TO diff --git a/tools/encoding/txt2enc.c b/tools/encoding/txt2enc.c index 7ee797b..80b44b9 100644 --- a/tools/encoding/txt2enc.c +++ b/tools/encoding/txt2enc.c @@ -26,7 +26,7 @@ main(int argc, char **argv) { FILE *fp; Rune *toUnicode[256]; - int i, multiByte, enc, uni, hi, lo, fixmissing, used, maxEnc; + int i, multiByte, enc, uni, hi, lo, used, maxEnc; int ch, encColumn, uniColumn, fallbackKnown, width; char *fallbackString, *str, *rest, *dot; unsigned int magic, type, symbol, fallbackChar; @@ -43,7 +43,6 @@ main(int argc, char **argv) fallbackKnown = 0; type = -1; symbol = 0; - fixmissing = 1; opterr = 0; while (1) { @@ -89,10 +88,6 @@ main(int argc, char **argv) symbol = 1; break; - case 'm': - fixmissing = 0; - break; - default: goto usage; } @@ -207,13 +202,6 @@ main(int argc, char **argv) for (i = 0; i < 0x20; i++) { toUnicode[0][i] = i; } - if (fixmissing) { - for (i = 0x7F; i < 0xA0; i++) { - if (toUnicode[i] == NULL && toUnicode[0][i] == 0) { - toUnicode[0][i] = i; - } - } - } } printf("# Encoding file: %s, %s-byte\n", argv[argc - 1], typeString[type]); -- cgit v0.12 From a970bffd00117d4e762dfec90e21a94576da94fc Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 21 Feb 2023 23:22:45 +0000 Subject: Add 0x7F: DELETE to more mac* encodings (so re-generating doesn't remove it again) --- tools/encoding/macCentEuro.txt | 1 + tools/encoding/macCroatian.txt | 1 + tools/encoding/macCyrillic.txt | 1 + tools/encoding/macDingbats.txt | 1 + tools/encoding/macGreek.txt | 1 + tools/encoding/macIceland.txt | 1 + tools/encoding/macJapan.txt | 1 + tools/encoding/macRoman.txt | 1 + tools/encoding/macRomania.txt | 1 + tools/encoding/macThai.txt | 1 + 10 files changed, 10 insertions(+) diff --git a/tools/encoding/macCentEuro.txt b/tools/encoding/macCentEuro.txt index bf424c1..aa92908 100644 --- a/tools/encoding/macCentEuro.txt +++ b/tools/encoding/macCentEuro.txt @@ -188,6 +188,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x0100 # LATIN CAPITAL LETTER A WITH MACRON diff --git a/tools/encoding/macCroatian.txt b/tools/encoding/macCroatian.txt index 538eda3..2eef246 100644 --- a/tools/encoding/macCroatian.txt +++ b/tools/encoding/macCroatian.txt @@ -216,6 +216,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macCyrillic.txt b/tools/encoding/macCyrillic.txt index 695dade..2e9f8e2 100644 --- a/tools/encoding/macCyrillic.txt +++ b/tools/encoding/macCyrillic.txt @@ -213,6 +213,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE # 0x80 0x0410 # CYRILLIC CAPITAL LETTER A 0x81 0x0411 # CYRILLIC CAPITAL LETTER BE diff --git a/tools/encoding/macDingbats.txt b/tools/encoding/macDingbats.txt index 273d526..4b815f4 100644 --- a/tools/encoding/macDingbats.txt +++ b/tools/encoding/macDingbats.txt @@ -151,6 +151,7 @@ 0x7C 0x275C # HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT 0x7D 0x275D # HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT 0x7E 0x275E # HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT +0x7F 0x007F # DELETE 0x80 0xF8D7 # medium left parenthesis ornament 0x81 0xF8D8 # medium right parenthesis ornament 0x82 0xF8D9 # medium flattened left parenthesis ornament diff --git a/tools/encoding/macGreek.txt b/tools/encoding/macGreek.txt index 9783259..b960d68 100644 --- a/tools/encoding/macGreek.txt +++ b/tools/encoding/macGreek.txt @@ -207,6 +207,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00B9 # SUPERSCRIPT ONE diff --git a/tools/encoding/macIceland.txt b/tools/encoding/macIceland.txt index 0a0b27b..c60b8d2 100644 --- a/tools/encoding/macIceland.txt +++ b/tools/encoding/macIceland.txt @@ -234,6 +234,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macJapan.txt b/tools/encoding/macJapan.txt index 7121b3b..3c48c4a 100644 --- a/tools/encoding/macJapan.txt +++ b/tools/encoding/macJapan.txt @@ -318,6 +318,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE # Apple change +0x7F 0x007F # DELETE # 0x8140 0x3000 # IDEOGRAPHIC SPACE 0x8141 0x3001 # IDEOGRAPHIC COMMA diff --git a/tools/encoding/macRoman.txt b/tools/encoding/macRoman.txt index 7ddcf8d..43ad44b 100644 --- a/tools/encoding/macRoman.txt +++ b/tools/encoding/macRoman.txt @@ -233,6 +233,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macRomania.txt b/tools/encoding/macRomania.txt index 2a84adc..36a0b68 100644 --- a/tools/encoding/macRomania.txt +++ b/tools/encoding/macRomania.txt @@ -154,6 +154,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macThai.txt b/tools/encoding/macThai.txt index b991833..2043621 100644 --- a/tools/encoding/macThai.txt +++ b/tools/encoding/macThai.txt @@ -168,6 +168,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE # 0x80 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x81 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -- cgit v0.12 From 78db448fff66d55223a88f8225976f4324de1b95 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 22 Feb 2023 13:31:41 +0000 Subject: Make txt2enc smarter, so we don't have to add DELETE any more in all original tables, better keep them as-is. --- tools/encoding/Makefile | 2 +- tools/encoding/ascii.txt | 1 - tools/encoding/big5.txt | 1 - tools/encoding/dingbats.txt | 1 - tools/encoding/gb1988.txt | 1 - tools/encoding/macCentEuro.txt | 1 - tools/encoding/macCroatian.txt | 1 - tools/encoding/macCyrillic.txt | 1 - tools/encoding/macDingbats.txt | 1 - tools/encoding/macGreek.txt | 1 - tools/encoding/macIceland.txt | 1 - tools/encoding/macJapan.txt | 1 - tools/encoding/macRoman.txt | 1 - tools/encoding/macRomania.txt | 1 - tools/encoding/macThai.txt | 1 - tools/encoding/macTurkish.txt | 1 - tools/encoding/macUkraine.txt | 1 - tools/encoding/symbol.txt | 1 - tools/encoding/txt2enc.c | 14 ++++++++++++-- 19 files changed, 13 insertions(+), 20 deletions(-) diff --git a/tools/encoding/Makefile b/tools/encoding/Makefile index a2122d5..ff19492 100644 --- a/tools/encoding/Makefile +++ b/tools/encoding/Makefile @@ -67,7 +67,7 @@ encodings: clean txt2enc $(EUC_ENCODINGS) @for p in *.txt; do \ enc=`echo $$p | sed 's/\..*$$/\.enc/'`; \ echo $$enc; \ - ./txt2enc $$p > $$enc; \ + ./txt2enc -e 0 -u 1 $$p > $$enc; \ done @echo @echo Compiling special versions of encoding files. diff --git a/tools/encoding/ascii.txt b/tools/encoding/ascii.txt index 2afbaab..66ba6f3 100644 --- a/tools/encoding/ascii.txt +++ b/tools/encoding/ascii.txt @@ -93,4 +93,3 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE \ No newline at end of file diff --git a/tools/encoding/big5.txt b/tools/encoding/big5.txt index 06b0fac..58cdfe2 100644 --- a/tools/encoding/big5.txt +++ b/tools/encoding/big5.txt @@ -185,7 +185,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE 0xA140 0x3000 # IDEOGRAPHIC SPACE 0xA141 0xFF0C # FULLWIDTH COMMA 0xA142 0x3001 # IDEOGRAPHIC COMMA diff --git a/tools/encoding/dingbats.txt b/tools/encoding/dingbats.txt index 93a6081..334f8d6 100644 --- a/tools/encoding/dingbats.txt +++ b/tools/encoding/dingbats.txt @@ -155,7 +155,6 @@ 0x7C 0x275C # HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT 0x7D 0x275D # HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT 0x7E 0x275E # HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT -0x7F 0x275E # DELETE 0xA1 0x2761 # CURVED STEM PARAGRAPH SIGN ORNAMENT 0xA2 0x2762 # HEAVY EXCLAMATION MARK ORNAMENT 0xA3 0x2763 # HEAVY HEART EXCLAMATION MARK ORNAMENT diff --git a/tools/encoding/gb1988.txt b/tools/encoding/gb1988.txt index b9197e5..800cd68 100644 --- a/tools/encoding/gb1988.txt +++ b/tools/encoding/gb1988.txt @@ -93,7 +93,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x203E # OVERLINE -0x7F 0x007F # DELETE 0xA1 0xFF61 # HALFWIDTH IDEOGRAPHIC FULL STOP 0xA2 0xFF62 # HALFWIDTH LEFT CORNER BRACKET 0xA3 0xFF63 # HALFWIDTH RIGHT CORNER BRACKET diff --git a/tools/encoding/macCentEuro.txt b/tools/encoding/macCentEuro.txt index aa92908..bf424c1 100644 --- a/tools/encoding/macCentEuro.txt +++ b/tools/encoding/macCentEuro.txt @@ -188,7 +188,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x0100 # LATIN CAPITAL LETTER A WITH MACRON diff --git a/tools/encoding/macCroatian.txt b/tools/encoding/macCroatian.txt index 2eef246..538eda3 100644 --- a/tools/encoding/macCroatian.txt +++ b/tools/encoding/macCroatian.txt @@ -216,7 +216,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macCyrillic.txt b/tools/encoding/macCyrillic.txt index 2e9f8e2..695dade 100644 --- a/tools/encoding/macCyrillic.txt +++ b/tools/encoding/macCyrillic.txt @@ -213,7 +213,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x0410 # CYRILLIC CAPITAL LETTER A 0x81 0x0411 # CYRILLIC CAPITAL LETTER BE diff --git a/tools/encoding/macDingbats.txt b/tools/encoding/macDingbats.txt index 4b815f4..273d526 100644 --- a/tools/encoding/macDingbats.txt +++ b/tools/encoding/macDingbats.txt @@ -151,7 +151,6 @@ 0x7C 0x275C # HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT 0x7D 0x275D # HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT 0x7E 0x275E # HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT -0x7F 0x007F # DELETE 0x80 0xF8D7 # medium left parenthesis ornament 0x81 0xF8D8 # medium right parenthesis ornament 0x82 0xF8D9 # medium flattened left parenthesis ornament diff --git a/tools/encoding/macGreek.txt b/tools/encoding/macGreek.txt index b960d68..9783259 100644 --- a/tools/encoding/macGreek.txt +++ b/tools/encoding/macGreek.txt @@ -207,7 +207,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00B9 # SUPERSCRIPT ONE diff --git a/tools/encoding/macIceland.txt b/tools/encoding/macIceland.txt index c60b8d2..0a0b27b 100644 --- a/tools/encoding/macIceland.txt +++ b/tools/encoding/macIceland.txt @@ -234,7 +234,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macJapan.txt b/tools/encoding/macJapan.txt index 3c48c4a..7121b3b 100644 --- a/tools/encoding/macJapan.txt +++ b/tools/encoding/macJapan.txt @@ -318,7 +318,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE # Apple change -0x7F 0x007F # DELETE # 0x8140 0x3000 # IDEOGRAPHIC SPACE 0x8141 0x3001 # IDEOGRAPHIC COMMA diff --git a/tools/encoding/macRoman.txt b/tools/encoding/macRoman.txt index 43ad44b..7ddcf8d 100644 --- a/tools/encoding/macRoman.txt +++ b/tools/encoding/macRoman.txt @@ -233,7 +233,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macRomania.txt b/tools/encoding/macRomania.txt index 36a0b68..2a84adc 100644 --- a/tools/encoding/macRomania.txt +++ b/tools/encoding/macRomania.txt @@ -154,7 +154,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macThai.txt b/tools/encoding/macThai.txt index 2043621..b991833 100644 --- a/tools/encoding/macThai.txt +++ b/tools/encoding/macThai.txt @@ -168,7 +168,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x81 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK diff --git a/tools/encoding/macTurkish.txt b/tools/encoding/macTurkish.txt index ca3cda3..4a1ddab 100644 --- a/tools/encoding/macTurkish.txt +++ b/tools/encoding/macTurkish.txt @@ -203,7 +203,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macUkraine.txt b/tools/encoding/macUkraine.txt index dc07cdc..dba4e10 100644 --- a/tools/encoding/macUkraine.txt +++ b/tools/encoding/macUkraine.txt @@ -148,7 +148,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x0410 # CYRILLIC CAPITAL LETTER A 0x81 0x0411 # CYRILLIC CAPITAL LETTER BE diff --git a/tools/encoding/symbol.txt b/tools/encoding/symbol.txt index 13a3ed8..12dcae6 100644 --- a/tools/encoding/symbol.txt +++ b/tools/encoding/symbol.txt @@ -169,7 +169,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x223C # TILDE OPERATOR -0x7F 0x007F # DELETE 0xA1 0x03D2 # GREEK UPSILON WITH HOOK SYMBOL 0xA2 0x2032 # PRIME 0xA3 0x2264 # LESS-THAN OR EQUAL TO diff --git a/tools/encoding/txt2enc.c b/tools/encoding/txt2enc.c index 80b44b9..32c7344 100644 --- a/tools/encoding/txt2enc.c +++ b/tools/encoding/txt2enc.c @@ -26,7 +26,7 @@ main(int argc, char **argv) { FILE *fp; Rune *toUnicode[256]; - int i, multiByte, enc, uni, hi, lo, used, maxEnc; + int i, multiByte, enc, uni, hi, lo, fixmissing, used, maxEnc; int ch, encColumn, uniColumn, fallbackKnown, width; char *fallbackString, *str, *rest, *dot; unsigned int magic, type, symbol, fallbackChar; @@ -43,6 +43,7 @@ main(int argc, char **argv) fallbackKnown = 0; type = -1; symbol = 0; + fixmissing = 1; opterr = 0; while (1) { @@ -88,6 +89,10 @@ main(int argc, char **argv) symbol = 1; break; + case 'm': + fixmissing = 0; + break; + default: goto usage; } @@ -101,7 +106,7 @@ main(int argc, char **argv) fputs(" -f\tfallback character (default: QUESTION MARK)\n", stderr); fputs(" -t\toverride implicit type with single, double, or multi\n", stderr); fputs(" -s\tsymbol+ascii encoding\n", stderr); - fputs(" -m\tdon't implicitly include range 0080 to 00FF\n", stderr); + fputs(" -m\tdon't implicitly include 007F\n", stderr); return 1; } @@ -202,6 +207,11 @@ main(int argc, char **argv) for (i = 0; i < 0x20; i++) { toUnicode[0][i] = i; } + if (fixmissing) { + if (toUnicode[0x7F] == NULL && toUnicode[0][0x7F] == 0) { + toUnicode[0][0x7F] = 0x7F; + } + } } printf("# Encoding file: %s, %s-byte\n", argv[argc - 1], typeString[type]); -- cgit v0.12 From 12345dfed8593e385a076594f4edcc545166d9ac Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 22 Feb 2023 13:34:04 +0000 Subject: re-generate macDingbats.enc, so it can now throw exceptions for the range 0x8E-0x9F --- library/encoding/macDingbats.enc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/library/encoding/macDingbats.enc b/library/encoding/macDingbats.enc index 28449cd..9fa47b5 100644 --- a/library/encoding/macDingbats.enc +++ b/library/encoding/macDingbats.enc @@ -10,8 +10,8 @@ S 2730273127322733273427352736273727382739273A273B273C273D273E273F 2740274127422743274427452746274727482749274A274B25CF274D25A0274F 27502751275225B225BC25C6275625D727582759275A275B275C275D275E007F -F8D7F8D8F8D9F8DAF8DBF8DCF8DDF8DEF8DFF8E0F8E1F8E2F8E3F8E4008E008F -0090009100920093009400950096009700980099009A009B009C009D009E009F +F8D7F8D8F8D9F8DAF8DBF8DCF8DDF8DEF8DFF8E0F8E1F8E2F8E3F8E400000000 +0000000000000000000000000000000000000000000000000000000000000000 0000276127622763276427652766276726632666266526602460246124622463 2464246524662467246824692776277727782779277A277B277C277D277E277F 2780278127822783278427852786278727882789278A278B278C278D278E278F -- cgit v0.12 From 1d76ffb03b359c7f557943523fd9b0c49a312554 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 22 Feb 2023 20:44:13 +0000 Subject: minor bug-fix in utf-16/utf-32: 2 testcases failed in Tcl 9 compatibility mode (-DTCL_NO_DEPRECATED) --- generic/tclEncoding.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 0490831..8e13b43 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -237,8 +237,13 @@ static Tcl_EncodingConvertProc Iso88591ToUtfProc; */ static const Tcl_ObjType encodingType = { - "encoding", FreeEncodingInternalRep, DupEncodingInternalRep, NULL, NULL + "encoding", + FreeEncodingInternalRep, + DupEncodingInternalRep, + NULL, + NULL }; + #define EncodingSetInternalRep(objPtr, encoding) \ do { \ Tcl_ObjInternalRep ir; \ @@ -461,7 +466,7 @@ FillEncodingFileMap(void) map = Tcl_NewDictObj(); Tcl_IncrRefCount(map); - for (i = numDirs-1; i >= 0; i--) { + for (i = numDirs-1; i != TCL_INDEX_NONE; i--) { /* * Iterate backwards through the search path so as we overwrite * entries found, we favor files earlier on the search path. @@ -1182,7 +1187,7 @@ Tcl_ExternalToUtfDString( * Tcl_ExternalToUtfDStringEx -- * * Convert a source buffer from the specified encoding into UTF-8. -* The parameter flags controls the behavior, if any of the bytes in + * The parameter flags controls the behavior, if any of the bytes in * the source buffer are invalid or cannot be represented in utf-8. * Possible flags values: * TCL_ENCODING_STOPONERROR: don't replace invalid characters/bytes but @@ -1458,8 +1463,9 @@ Tcl_UtfToExternalDStringEx( char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; - int dstLen, result, soFar, srcRead, dstWrote, dstChars; + int result, soFar, srcRead, dstWrote, dstChars; const char *srcStart = src; + int dstLen; Tcl_DStringInit(dstPtr); dst = Tcl_DStringValue(dstPtr); @@ -2627,9 +2633,10 @@ Utf32ToUtfProc( result = TCL_CONVERT_NOSPACE; } else { /* destination is not full, so we really are at the end now */ - if (flags & TCL_ENCODING_STOPONERROR) { + if ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) { result = TCL_CONVERT_SYNTAX; } else { + result = TCL_OK; dst += Tcl_UniCharToUtf(0xFFFD, dst); numChars++; src += bytesLeft; @@ -2854,9 +2861,10 @@ Utf16ToUtfProc( result = TCL_CONVERT_NOSPACE; } else { /* destination is not full, so we really are at the end now */ - if (flags & TCL_ENCODING_STOPONERROR) { + if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { result = TCL_CONVERT_SYNTAX; } else { + result = TCL_OK; dst += Tcl_UniCharToUtf(0xFFFD, dst); numChars++; src++; -- cgit v0.12 From bf7b1adb896dbe4f79efb038aa0ecaebbdd3919c Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 23 Feb 2023 10:44:56 +0000 Subject: See [d19fe0a5b] for follow-up to previous commit --- generic/tclEncoding.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index ecec6e9..2b3b614 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2516,13 +2516,10 @@ UnicodeToUtfProc( result = TCL_CONVERT_NOSPACE; } else { /* destination is not full, so we really are at the end now */ - if (flags & TCL_ENCODING_STOPONERROR) { - result = TCL_CONVERT_SYNTAX; - } else { - dst += Tcl_UniCharToUtf(0xFFFD, dst); - numChars++; - src++; - } + result = TCL_OK; + dst += Tcl_UniCharToUtf(0xFFFD, dst); + numChars++; + src++; } } *srcReadPtr = src - srcStart; -- cgit v0.12 From 1c3c25097b1f63d6b1a0446c2c441833c4ecec11 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 24 Feb 2023 08:25:27 +0000 Subject: int -> Tcl_Size in tclEncoding.c (making the diff between Tcl 8.7 and 9.0 smaller) --- generic/tclEncoding.c | 54 +++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 8e13b43..f32baac 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -36,7 +36,7 @@ typedef struct { * encoding is deleted. */ void *clientData; /* Arbitrary value associated with encoding * type. Passed to conversion functions. */ - int nullSize; /* Number of 0x00 bytes that signify + Tcl_Size nullSize; /* Number of 0x00 bytes that signify * end-of-string in this encoding. This number * is used to determine the source string * length when the srcLen argument is @@ -374,7 +374,7 @@ int Tcl_SetEncodingSearchPath( Tcl_Obj *searchPath) { - int dummy; + Tcl_Size dummy; if (TCL_ERROR == TclListObjLengthM(NULL, searchPath, &dummy)) { return TCL_ERROR; @@ -421,7 +421,7 @@ void TclSetLibraryPath( Tcl_Obj *path) { - int dummy; + Tcl_Size dummy; if (TCL_ERROR == TclListObjLengthM(NULL, path, &dummy)) { return; @@ -457,7 +457,7 @@ TclSetLibraryPath( static void FillEncodingFileMap(void) { - int i, numDirs = 0; + Tcl_Size i, numDirs = 0; Tcl_Obj *map, *searchPath; searchPath = Tcl_GetEncodingSearchPath(); @@ -472,7 +472,7 @@ FillEncodingFileMap(void) * entries found, we favor files earlier on the search path. */ - int j, numFiles; + Tcl_Size j, numFiles; Tcl_Obj *directory, *matchFileList; Tcl_Obj **filev; Tcl_GlobTypeData readableFiles = { @@ -1005,7 +1005,7 @@ Tcl_GetEncodingNames( * *--------------------------------------------------------------------------- */ -int +Tcl_Size Tcl_GetEncodingNulLength( Tcl_Encoding encoding) { @@ -1171,7 +1171,7 @@ Tcl_ExternalToUtfDString( Tcl_Encoding encoding, /* The encoding for the source string, or NULL * for the default system encoding. */ const char *src, /* Source string in specified encoding. */ - int srcLen, /* Source string length in bytes, or < 0 for + Tcl_Size srcLen, /* Source string length in bytes, or < 0 for * encoding-specific string length. */ Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ @@ -1210,12 +1210,12 @@ Tcl_ExternalToUtfDString( *------------------------------------------------------------------------- */ -int +Tcl_Size Tcl_ExternalToUtfDStringEx( Tcl_Encoding encoding, /* The encoding for the source string, or NULL * for the default system encoding. */ const char *src, /* Source string in specified encoding. */ - int srcLen, /* Source string length in bytes, or < 0 for + Tcl_Size srcLen, /* Source string length in bytes, or < 0 for * encoding-specific string length. */ int flags, /* Conversion control flags. */ Tcl_DString *dstPtr) /* Uninitialized or free DString in which the @@ -1224,7 +1224,8 @@ Tcl_ExternalToUtfDStringEx( char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; - int dstLen, result, soFar, srcRead, dstWrote, dstChars; + int result, soFar, srcRead, dstWrote, dstChars; + Tcl_Size dstLen; const char *srcStart = src; Tcl_DStringInit(dstPtr); @@ -1255,7 +1256,7 @@ Tcl_ExternalToUtfDStringEx( src += srcRead; if (result != TCL_CONVERT_NOSPACE) { Tcl_DStringSetLength(dstPtr, soFar); - return (result == TCL_OK) ? TCL_INDEX_NONE : (int)(src - srcStart); + return (result == TCL_OK) ? TCL_INDEX_NONE : (Tcl_Size)(src - srcStart); } flags &= ~TCL_ENCODING_START; srcLen -= srcRead; @@ -1292,7 +1293,7 @@ Tcl_ExternalToUtf( Tcl_Encoding encoding, /* The encoding for the source string, or NULL * for the default system encoding. */ const char *src, /* Source string in specified encoding. */ - int srcLen, /* Source string length in bytes, or < 0 for + Tcl_Size srcLen, /* Source string length in bytes, or < 0 for * encoding-specific string length. */ int flags, /* Conversion control flags. */ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state @@ -1302,7 +1303,7 @@ Tcl_ExternalToUtf( * routine under control of flags argument. */ char *dst, /* Output buffer in which converted string is * stored. */ - int dstLen, /* The maximum length of output buffer in + Tcl_Size dstLen, /* The maximum length of output buffer in * bytes. */ int *srcReadPtr, /* Filled with the number of bytes from the * source string that were converted. This may @@ -1409,7 +1410,7 @@ Tcl_UtfToExternalDString( Tcl_Encoding encoding, /* The encoding for the converted string, or * NULL for the default system encoding. */ const char *src, /* Source string in UTF-8. */ - int srcLen, /* Source string length in bytes, or < 0 for + Tcl_Size srcLen, /* Source string length in bytes, or < 0 for * strlen(). */ Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ @@ -1449,12 +1450,12 @@ Tcl_UtfToExternalDString( *------------------------------------------------------------------------- */ -int +Tcl_Size Tcl_UtfToExternalDStringEx( Tcl_Encoding encoding, /* The encoding for the converted string, or * NULL for the default system encoding. */ const char *src, /* Source string in UTF-8. */ - int srcLen, /* Source string length in bytes, or < 0 for + Tcl_Size srcLen, /* Source string length in bytes, or < 0 for * strlen(). */ int flags, /* Conversion control flags. */ Tcl_DString *dstPtr) /* Uninitialized or free DString in which the @@ -1465,7 +1466,7 @@ Tcl_UtfToExternalDStringEx( const Encoding *encodingPtr; int result, soFar, srcRead, dstWrote, dstChars; const char *srcStart = src; - int dstLen; + Tcl_Size dstLen; Tcl_DStringInit(dstPtr); dst = Tcl_DStringValue(dstPtr); @@ -1494,7 +1495,7 @@ Tcl_UtfToExternalDStringEx( while (i >= soFar) { Tcl_DStringSetLength(dstPtr, i--); } - return (result == TCL_OK) ? TCL_INDEX_NONE : (int)(src - srcStart); + return (result == TCL_OK) ? TCL_INDEX_NONE : (Tcl_Size)(src - srcStart); } flags &= ~TCL_ENCODING_START; @@ -1532,7 +1533,7 @@ Tcl_UtfToExternal( Tcl_Encoding encoding, /* The encoding for the converted string, or * NULL for the default system encoding. */ const char *src, /* Source string in UTF-8. */ - int srcLen, /* Source string length in bytes, or < 0 for + Tcl_Size srcLen, /* Source string length in bytes, or < 0 for * strlen(). */ int flags, /* Conversion control flags. */ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state @@ -1542,7 +1543,7 @@ Tcl_UtfToExternal( * routine under control of flags argument. */ char *dst, /* Output buffer in which converted string * is stored. */ - int dstLen, /* The maximum length of output buffer in + Tcl_Size dstLen, /* The maximum length of output buffer in * bytes. */ int *srcReadPtr, /* Filled with the number of bytes from the * source string that were converted. This may @@ -1653,7 +1654,7 @@ OpenEncodingFileChannel( Tcl_Obj *map = TclGetProcessGlobalValue(&encodingFileMap); Tcl_Obj **dir, *path, *directory = NULL; Tcl_Channel chan = NULL; - int i, numDirs; + Tcl_Size i, numDirs; TclListObjGetElementsM(NULL, searchPath, &numDirs, &dir); Tcl_IncrRefCount(nameObj); @@ -1918,7 +1919,7 @@ LoadTableEncoding( for (i = 0; i < numPages; i++) { int ch; const char *p; - int expected = 3 + 16 * (16 * 4 + 1); + Tcl_Size expected = 3 + 16 * (16 * 4 + 1); if (Tcl_ReadChars(chan, objPtr, expected, 0) != expected) { return NULL; @@ -2154,7 +2155,7 @@ LoadEscapeEncoding( Tcl_DStringInit(&escapeData); while (1) { - int argc; + Tcl_Size argc; const char **argv; char *line; Tcl_DString lineString; @@ -3919,8 +3920,7 @@ EscapeFromUtfProc( result = TCL_CONVERT_NOSPACE; break; } - memcpy(dst, subTablePtr->sequence, - subTablePtr->sequenceLen); + memcpy(dst, subTablePtr->sequence, subTablePtr->sequenceLen); dst += subTablePtr->sequenceLen; } } @@ -4138,11 +4138,11 @@ unilen4( static void InitializeEncodingSearchPath( char **valuePtr, - unsigned int *lengthPtr, + TCL_HASH_TYPE *lengthPtr, Tcl_Encoding *encodingPtr) { const char *bytes; - int i, numDirs, numBytes; + Tcl_Size i, numDirs, numBytes; Tcl_Obj *libPathObj, *encodingObj, *searchPathObj; TclNewLiteralStringObj(encodingObj, "encoding"); -- cgit v0.12