diff options
author | Guido van Rossum <guido@python.org> | 2007-08-20 19:06:03 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2007-08-20 19:06:03 (GMT) |
commit | 61e21b52f1873bc4b0da34431e54a9517c0030e1 (patch) | |
tree | e569dc3bbc88512e345691a151c2d64a8e7dc910 /Modules | |
parent | 7d6068e6c9c3f8a2ee17b9b57e4f3f40b149c6eb (diff) | |
download | cpython-61e21b52f1873bc4b0da34431e54a9517c0030e1.zip cpython-61e21b52f1873bc4b0da34431e54a9517c0030e1.tar.gz cpython-61e21b52f1873bc4b0da34431e54a9517c0030e1.tar.bz2 |
Merged revisions 57152-57220 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk
........
r57155 | brett.cannon | 2007-08-17 11:51:57 -0700 (Fri, 17 Aug 2007) | 2 lines
Clarify the comment about setting the PYTHON variable for the Doc Makefile.
........
r57156 | brett.cannon | 2007-08-17 11:59:58 -0700 (Fri, 17 Aug 2007) | 3 lines
Remove news entry for test.test_support.guard_warnings_filter as it has been
removed.
........
r57158 | facundo.batista | 2007-08-17 12:16:44 -0700 (Fri, 17 Aug 2007) | 9 lines
Added a flag (_send_traceback_header) to the SimpleXMLRPCServer class
that allows sending back exception/stack trace information about
internal server errors (this flag defaults to False to avoid sending
such information unless explicitly enabled). Added tests to verify
behavior of this new feature (these tests are skipped on win32 because
of problems with WSAEWOULDBLOCK). Renamed HTTPTestCase to
SimpleServerTestCase. [GSoC - Alan McIntyre]
........
r57160 | brett.cannon | 2007-08-17 13:16:15 -0700 (Fri, 17 Aug 2007) | 2 lines
Make test_warnings re-entrant.
........
r57175 | georg.brandl | 2007-08-17 23:05:56 -0700 (Fri, 17 Aug 2007) | 2 lines
Fix PDB command descriptions.
........
r57182 | brett.cannon | 2007-08-18 11:30:36 -0700 (Sat, 18 Aug 2007) | 3 lines
Remove Mikael as the maintainer of BeOS at his request. With no maintainer
should the port be deprecated in 2.6?
........
r57199 | georg.brandl | 2007-08-19 11:43:50 -0700 (Sun, 19 Aug 2007) | 2 lines
Fix stray backticks.
........
r57211 | hyeshik.chang | 2007-08-19 23:49:18 -0700 (Sun, 19 Aug 2007) | 2 lines
Add cheot-ga-keut composed make-up sequence support in EUC-KR codec.
........
r57216 | guido.van.rossum | 2007-08-20 08:18:04 -0700 (Mon, 20 Aug 2007) | 4 lines
Add a hack (originally devised in a slightly different form by Thomas Wouters)
to prevent spurious tracebacks when a daemon thread's cleanup happens to wake
up when the world around it has already been destroyed.
........
Diffstat (limited to 'Modules')
-rw-r--r-- | Modules/cjkcodecs/_codecs_kr.c | 111 |
1 files changed, 104 insertions, 7 deletions
diff --git a/Modules/cjkcodecs/_codecs_kr.c b/Modules/cjkcodecs/_codecs_kr.c index 2a95bbe..161967e 100644 --- a/Modules/cjkcodecs/_codecs_kr.c +++ b/Modules/cjkcodecs/_codecs_kr.c @@ -11,6 +11,26 @@ * EUC-KR codec */ +#define EUCKR_JAMO_FIRSTBYTE 0xA4 +#define EUCKR_JAMO_FILLER 0xD4 + +static const unsigned char u2cgk_choseong[19] = { + 0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2, + 0xb3, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, + 0xbc, 0xbd, 0xbe +}; +static const unsigned char u2cgk_jungseong[21] = { + 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, + 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, + 0xcf, 0xd0, 0xd1, 0xd2, 0xd3 +}; +static const unsigned char u2cgk_jongseong[28] = { + 0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, + 0xb1, 0xb2, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xba, + 0xbb, 0xbc, 0xbd, 0xbe +}; + ENCODER(euc_kr) { while (inleft > 0) { @@ -28,17 +48,57 @@ ENCODER(euc_kr) TRYMAP_ENC(cp949, code, c); else return 1; - if (code & 0x8000) /* MSB set: CP949 */ - return 1; + if ((code & 0x8000) == 0) { + /* KS X 1001 coded character */ + OUT1((code >> 8) | 0x80) + OUT2((code & 0xFF) | 0x80) + NEXT(1, 2) + } + else { /* Mapping is found in CP949 extension, + * but we encode it in KS X 1001:1998 Annex 3, + * make-up sequence for EUC-KR. */ - OUT1((code >> 8) | 0x80) - OUT2((code & 0xFF) | 0x80) - NEXT(1, 2) + REQUIRE_OUTBUF(8) + + /* syllable composition precedence */ + OUT1(EUCKR_JAMO_FIRSTBYTE) + OUT2(EUCKR_JAMO_FILLER) + + /* All codepoints in CP949 extension are in unicode + * Hangul Syllable area. */ + assert(0xac00 <= c && c <= 0xd7a3); + c -= 0xac00; + + OUT3(EUCKR_JAMO_FIRSTBYTE) + OUT4(u2cgk_choseong[c / 588]) + NEXT_OUT(4) + + OUT1(EUCKR_JAMO_FIRSTBYTE) + OUT2(u2cgk_jungseong[(c / 28) % 21]) + OUT3(EUCKR_JAMO_FIRSTBYTE) + OUT4(u2cgk_jongseong[c % 28]) + NEXT(1, 4) + } } return 0; } +#define NONE 127 + +static const unsigned char cgk2u_choseong[] = { /* [A1, BE] */ + 0, 1, NONE, 2, NONE, NONE, 3, 4, + 5, NONE, NONE, NONE, NONE, NONE, NONE, NONE, + 6, 7, 8, NONE, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18 +}; +static const unsigned char cgk2u_jongseong[] = { /* [A1, BE] */ + 1, 2, 3, 4, 5, 6, 7, NONE, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, NONE, 18, 19, 20, 21, 22, + NONE, 23, 24, 25, 26, 27 +}; + DECODER(euc_kr) { while (inleft > 0) { @@ -54,13 +114,50 @@ DECODER(euc_kr) REQUIRE_INBUF(2) - TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) { + if (c == EUCKR_JAMO_FIRSTBYTE && + IN2 == EUCKR_JAMO_FILLER) { + /* KS X 1001:1998 Annex 3 make-up sequence */ + DBCHAR cho, jung, jong; + + REQUIRE_INBUF(8) + if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE || + (*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE || + (*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE) + return 8; + + c = (*inbuf)[3]; + if (0xa1 <= c && c <= 0xbe) + cho = cgk2u_choseong[c - 0xa1]; + else + cho = NONE; + + c = (*inbuf)[5]; + jung = (0xbf <= c && c <= 0xd3) ? c - 0xbf : NONE; + + c = (*inbuf)[7]; + if (c == EUCKR_JAMO_FILLER) + jong = 0; + else if (0xa1 <= c && c <= 0xbe) + jong = cgk2u_jongseong[c - 0xa1]; + else + jong = NONE; + + if (cho == NONE || jung == NONE || jong == NONE) + return 8; + + OUT1(0xac00 + cho*588 + jung*28 + jong); + NEXT(8, 1) + } + else TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) { NEXT(2, 1) - } else return 2; + } + else + return 2; } return 0; } +#undef NONE /* |