diff options
Diffstat (limited to 'Modules/cjkcodecs/_codecs_hk.c')
-rw-r--r-- | Modules/cjkcodecs/_codecs_hk.c | 56 |
1 files changed, 48 insertions, 8 deletions
diff --git a/Modules/cjkcodecs/_codecs_hk.c b/Modules/cjkcodecs/_codecs_hk.c index 221eced..4bbd622 100644 --- a/Modules/cjkcodecs/_codecs_hk.c +++ b/Modules/cjkcodecs/_codecs_hk.c @@ -26,6 +26,16 @@ CODEC_INIT(big5hkscs) return 0; } +/* + * There are four possible pair unicode -> big5hkscs maps as in HKSCS 2004: + * U+00CA U+0304 -> 8862 (U+00CA alone is mapped to 8866) + * U+00CA U+030C -> 8864 + * U+00EA U+0304 -> 88a3 (U+00EA alone is mapped to 88a7) + * U+00EA U+030C -> 88a5 + * These are handled by not mapping tables but a hand-written code. + */ +static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5}; + ENCODER(big5hkscs) { while (inleft > 0) { @@ -46,7 +56,27 @@ ENCODER(big5hkscs) REQUIRE_OUTBUF(2) if (c < 0x10000) { - TRYMAP_ENC(big5hkscs_bmp, code, c); + TRYMAP_ENC(big5hkscs_bmp, code, c) { + if (code == MULTIC) { + if (inleft >= 2 && + ((c & 0xffdf) == 0x00ca) && + (((*inbuf)[1] & 0xfff7) == 0x0304)) { + code = big5hkscs_pairenc_table[ + ((c >> 4) | + ((*inbuf)[1] >> 3)) & 3]; + insize = 2; + } + else if (inleft < 2 && + !(flags & MBENC_FLUSH)) + return MBERR_TOOFEW; + else { + if (c == 0xca) + code = 0x8866; + else /* c == 0xea */ + code = 0x88a7; + } + } + } else TRYMAP_ENC(big5, code, c); else return 1; } @@ -67,7 +97,7 @@ ENCODER(big5hkscs) return 0; } -#define BH2S(c1, c2) (((c1) - 0x88) * (0xfe - 0x40 + 1) + ((c2) - 0x40)) +#define BH2S(c1, c2) (((c1) - 0x87) * (0xfe - 0x40 + 1) + ((c2) - 0x40)) DECODER(big5hkscs) { @@ -96,19 +126,19 @@ hkscsdec: TRYMAP_DEC(big5hkscs, decoded, c, IN2) { int s = BH2S(c, IN2); const unsigned char *hintbase; - assert(0x88 <= c && c <= 0xfe); + assert(0x87 <= c && c <= 0xfe); assert(0x40 <= IN2 && IN2 <= 0xfe); - if (BH2S(0x88, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) { + if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) { hintbase = big5hkscs_phint_0; - s -= BH2S(0x88, 0x40); + s -= BH2S(0x87, 0x40); } else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){ - hintbase = big5hkscs_phint_11939; + hintbase = big5hkscs_phint_12130; s -= BH2S(0xc6, 0xa1); } else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){ - hintbase = big5hkscs_phint_21733; + hintbase = big5hkscs_phint_21924; s -= BH2S(0xf9, 0xd6); } else @@ -123,7 +153,17 @@ hkscsdec: TRYMAP_DEC(big5hkscs, decoded, c, IN2) { NEXT(2, 1) } } - else return 2; + else { + switch ((c << 8) | IN2) { + case 0x8862: WRITE2(0x00ca, 0x0304); break; + case 0x8864: WRITE2(0x00ca, 0x030c); break; + case 0x88a3: WRITE2(0x00ea, 0x0304); break; + case 0x88a5: WRITE2(0x00ea, 0x030c); break; + default: return 2; + } + + NEXT(2, 2) /* all decoded codepoints are pairs, above. */ + } } return 0; |