summaryrefslogtreecommitdiffstats
path: root/Modules/cjkcodecs/_codecs_kr.c
diff options
context:
space:
mode:
authorHye-Shik Chang <hyeshik@gmail.com>2007-08-20 06:49:18 (GMT)
committerHye-Shik Chang <hyeshik@gmail.com>2007-08-20 06:49:18 (GMT)
commit2390104d81fb90c525fb353589837fc66837e5ff (patch)
tree873e11368d5a0759df39bfaaf98493c31ac5f9df /Modules/cjkcodecs/_codecs_kr.c
parentc553f4290766e0a9b713af7c120b9c2a0af1df64 (diff)
downloadcpython-2390104d81fb90c525fb353589837fc66837e5ff.zip
cpython-2390104d81fb90c525fb353589837fc66837e5ff.tar.gz
cpython-2390104d81fb90c525fb353589837fc66837e5ff.tar.bz2
Add cheot-ga-keut composed make-up sequence support in EUC-KR codec.
Diffstat (limited to 'Modules/cjkcodecs/_codecs_kr.c')
-rw-r--r--Modules/cjkcodecs/_codecs_kr.c111
1 files changed, 104 insertions, 7 deletions
diff --git a/Modules/cjkcodecs/_codecs_kr.c b/Modules/cjkcodecs/_codecs_kr.c
index 2a95bbe..161967e 100644
--- a/Modules/cjkcodecs/_codecs_kr.c
+++ b/Modules/cjkcodecs/_codecs_kr.c
@@ -11,6 +11,26 @@
* EUC-KR codec
*/
+#define EUCKR_JAMO_FIRSTBYTE 0xA4
+#define EUCKR_JAMO_FILLER 0xD4
+
+static const unsigned char u2cgk_choseong[19] = {
+ 0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2,
+ 0xb3, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb,
+ 0xbc, 0xbd, 0xbe
+};
+static const unsigned char u2cgk_jungseong[21] = {
+ 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6,
+ 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce,
+ 0xcf, 0xd0, 0xd1, 0xd2, 0xd3
+};
+static const unsigned char u2cgk_jongseong[28] = {
+ 0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+ 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
+ 0xb1, 0xb2, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xba,
+ 0xbb, 0xbc, 0xbd, 0xbe
+};
+
ENCODER(euc_kr)
{
while (inleft > 0) {
@@ -28,17 +48,57 @@ ENCODER(euc_kr)
TRYMAP_ENC(cp949, code, c);
else return 1;
- if (code & 0x8000) /* MSB set: CP949 */
- return 1;
+ if ((code & 0x8000) == 0) {
+ /* KS X 1001 coded character */
+ OUT1((code >> 8) | 0x80)
+ OUT2((code & 0xFF) | 0x80)
+ NEXT(1, 2)
+ }
+ else { /* Mapping is found in CP949 extension,
+ * but we encode it in KS X 1001:1998 Annex 3,
+ * make-up sequence for EUC-KR. */
- OUT1((code >> 8) | 0x80)
- OUT2((code & 0xFF) | 0x80)
- NEXT(1, 2)
+ REQUIRE_OUTBUF(8)
+
+ /* syllable composition precedence */
+ OUT1(EUCKR_JAMO_FIRSTBYTE)
+ OUT2(EUCKR_JAMO_FILLER)
+
+ /* All codepoints in CP949 extension are in unicode
+ * Hangul Syllable area. */
+ assert(0xac00 <= c && c <= 0xd7a3);
+ c -= 0xac00;
+
+ OUT3(EUCKR_JAMO_FIRSTBYTE)
+ OUT4(u2cgk_choseong[c / 588])
+ NEXT_OUT(4)
+
+ OUT1(EUCKR_JAMO_FIRSTBYTE)
+ OUT2(u2cgk_jungseong[(c / 28) % 21])
+ OUT3(EUCKR_JAMO_FIRSTBYTE)
+ OUT4(u2cgk_jongseong[c % 28])
+ NEXT(1, 4)
+ }
}
return 0;
}
+#define NONE 127
+
+static const unsigned char cgk2u_choseong[] = { /* [A1, BE] */
+ 0, 1, NONE, 2, NONE, NONE, 3, 4,
+ 5, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
+ 6, 7, 8, NONE, 9, 10, 11, 12,
+ 13, 14, 15, 16, 17, 18
+};
+static const unsigned char cgk2u_jongseong[] = { /* [A1, BE] */
+ 1, 2, 3, 4, 5, 6, 7, NONE,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, NONE, 18, 19, 20, 21, 22,
+ NONE, 23, 24, 25, 26, 27
+};
+
DECODER(euc_kr)
{
while (inleft > 0) {
@@ -54,13 +114,50 @@ DECODER(euc_kr)
REQUIRE_INBUF(2)
- TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
+ if (c == EUCKR_JAMO_FIRSTBYTE &&
+ IN2 == EUCKR_JAMO_FILLER) {
+ /* KS X 1001:1998 Annex 3 make-up sequence */
+ DBCHAR cho, jung, jong;
+
+ REQUIRE_INBUF(8)
+ if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE ||
+ (*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE ||
+ (*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE)
+ return 8;
+
+ c = (*inbuf)[3];
+ if (0xa1 <= c && c <= 0xbe)
+ cho = cgk2u_choseong[c - 0xa1];
+ else
+ cho = NONE;
+
+ c = (*inbuf)[5];
+ jung = (0xbf <= c && c <= 0xd3) ? c - 0xbf : NONE;
+
+ c = (*inbuf)[7];
+ if (c == EUCKR_JAMO_FILLER)
+ jong = 0;
+ else if (0xa1 <= c && c <= 0xbe)
+ jong = cgk2u_jongseong[c - 0xa1];
+ else
+ jong = NONE;
+
+ if (cho == NONE || jung == NONE || jong == NONE)
+ return 8;
+
+ OUT1(0xac00 + cho*588 + jung*28 + jong);
+ NEXT(8, 1)
+ }
+ else TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
NEXT(2, 1)
- } else return 2;
+ }
+ else
+ return 2;
}
return 0;
}
+#undef NONE
/*