summaryrefslogtreecommitdiffstats
path: root/Modules/cjkcodecs/_codecs_jp.c
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2013-04-14 00:06:32 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2013-04-14 00:06:32 (GMT)
commitd949126995a7ff63590285aa816da65d97a31403 (patch)
tree5fae129dff24d0a40c415b2a8d1559ff613aceea /Modules/cjkcodecs/_codecs_jp.c
parent71557596b26e9c899e83adc99659732097097c4e (diff)
downloadcpython-d949126995a7ff63590285aa816da65d97a31403.zip
cpython-d949126995a7ff63590285aa816da65d97a31403.tar.gz
cpython-d949126995a7ff63590285aa816da65d97a31403.tar.bz2
Issue #17693: CJK encoders now use the new Unicode API (PEP 393)
Diffstat (limited to 'Modules/cjkcodecs/_codecs_jp.c')
-rw-r--r--Modules/cjkcodecs/_codecs_jp.c168
1 files changed, 86 insertions, 82 deletions
diff --git a/Modules/cjkcodecs/_codecs_jp.c b/Modules/cjkcodecs/_codecs_jp.c
index 8bfb813..7ab318b 100644
--- a/Modules/cjkcodecs/_codecs_jp.c
+++ b/Modules/cjkcodecs/_codecs_jp.c
@@ -19,38 +19,39 @@
ENCODER(cp932)
{
- while (inleft > 0) {
- Py_UCS4 c = IN1;
+ while (*inpos < inlen) {
+ Py_UCS4 c = INCHAR1;
DBCHAR code;
unsigned char c1, c2;
if (c <= 0x80) {
- WRITE1((unsigned char)c)
- NEXT(1, 1)
+ WRITEBYTE1((unsigned char)c)
+ NEXT(1, 1);
continue;
}
else if (c >= 0xff61 && c <= 0xff9f) {
- WRITE1(c - 0xfec0)
- NEXT(1, 1)
+ WRITEBYTE1(c - 0xfec0)
+ NEXT(1, 1);
continue;
}
else if (c >= 0xf8f0 && c <= 0xf8f3) {
/* Windows compatibility */
REQUIRE_OUTBUF(1)
if (c == 0xf8f0)
- OUT1(0xa0)
+ OUTBYTE1(0xa0)
else
- OUT1(c - 0xfef1 + 0xfd)
- NEXT(1, 1)
+ OUTBYTE1(c - 0xfef1 + 0xfd)
+ NEXT(1, 1);
continue;
}
- UCS4INVALID(c)
+ if (c > 0xFFFF)
+ return 1;
REQUIRE_OUTBUF(2)
TRYMAP_ENC(cp932ext, code, c) {
- OUT1(code >> 8)
- OUT2(code & 0xff)
+ OUTBYTE1(code >> 8)
+ OUTBYTE2(code & 0xff)
}
else TRYMAP_ENC(jisxcommon, code, c) {
if (code & 0x8000) /* MSB set: JIS X 0212 */
@@ -61,20 +62,20 @@ ENCODER(cp932)
c2 = code & 0xff;
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
c1 = (c1 - 0x21) >> 1;
- OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
- OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
+ OUTBYTE1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
+ OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
}
else if (c >= 0xe000 && c < 0xe758) {
/* User-defined area */
c1 = (Py_UCS4)(c - 0xe000) / 188;
c2 = (Py_UCS4)(c - 0xe000) % 188;
- OUT1(c1 + 0xf0)
- OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
+ OUTBYTE1(c1 + 0xf0)
+ OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
}
else
return 1;
- NEXT(1, 2)
+ NEXT(1, 2);
}
return 0;
@@ -83,7 +84,7 @@ ENCODER(cp932)
DECODER(cp932)
{
while (inleft > 0) {
- unsigned char c = IN1, c2;
+ unsigned char c = INBYTE1, c2;
if (c <= 0x80) {
OUTCHAR(c);
@@ -106,7 +107,7 @@ DECODER(cp932)
}
REQUIRE_INBUF(2)
- c2 = IN2;
+ c2 = INBYTE2;
TRYMAP_DEC(cp932ext, writer, c, c2);
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
@@ -145,25 +146,24 @@ DECODER(cp932)
ENCODER(euc_jis_2004)
{
- while (inleft > 0) {
- Py_UCS4 c = IN1;
+ while (*inpos < inlen) {
+ Py_UCS4 c = INCHAR1;
DBCHAR code;
Py_ssize_t insize;
if (c < 0x80) {
- WRITE1(c)
- NEXT(1, 1)
+ WRITEBYTE1(c)
+ NEXT(1, 1);
continue;
}
- DECODE_SURROGATE(c)
- insize = GET_INSIZE(c);
+ insize = 1;
if (c <= 0xFFFF) {
EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
else TRYMAP_ENC(jisx0213_bmp, code, c) {
if (code == MULTIC) {
- if (inleft < 2) {
+ if (inlen - *inpos < 2) {
if (flags & MBENC_FLUSH) {
code = find_pairencmap(
(ucs2_t)c, 0,
@@ -176,8 +176,9 @@ ENCODER(euc_jis_2004)
return MBERR_TOOFEW;
}
else {
+ Py_UCS4 c2 = INCHAR2;
code = find_pairencmap(
- (ucs2_t)c, (*inbuf)[1],
+ (ucs2_t)c, c2,
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV) {
@@ -195,8 +196,8 @@ ENCODER(euc_jis_2004)
else TRYMAP_ENC(jisxcommon, code, c);
else if (c >= 0xff61 && c <= 0xff9f) {
/* JIS X 0201 half-width katakana */
- WRITE2(0x8e, c - 0xfec0)
- NEXT(1, 2)
+ WRITEBYTE2(0x8e, c - 0xfec0)
+ NEXT(1, 2);
continue;
}
else if (c == 0xff3c)
@@ -218,12 +219,12 @@ ENCODER(euc_jis_2004)
if (code & 0x8000) {
/* Codeset 2 */
- WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
- NEXT(insize, 3)
+ WRITEBYTE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
+ NEXT(insize, 3);
} else {
/* Codeset 1 */
- WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
- NEXT(insize, 2)
+ WRITEBYTE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
+ NEXT(insize, 2);
}
}
@@ -233,7 +234,7 @@ ENCODER(euc_jis_2004)
DECODER(euc_jis_2004)
{
while (inleft > 0) {
- unsigned char c = IN1;
+ unsigned char c = INBYTE1;
Py_UCS4 code;
if (c < 0x80) {
@@ -247,7 +248,7 @@ DECODER(euc_jis_2004)
unsigned char c2;
REQUIRE_INBUF(2)
- c2 = IN2;
+ c2 = INBYTE2;
if (c2 >= 0xa1 && c2 <= 0xdf) {
OUTCHAR(0xfec0 + c2);
NEXT_IN(2);
@@ -259,8 +260,8 @@ DECODER(euc_jis_2004)
unsigned char c2, c3;
REQUIRE_INBUF(3)
- c2 = IN2 ^ 0x80;
- c3 = IN3 ^ 0x80;
+ c2 = INBYTE2 ^ 0x80;
+ c3 = INBYTE3 ^ 0x80;
/* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */
EMULATE_JISX0213_2000_DECODE_PLANE2(writer, c2, c3)
@@ -279,7 +280,7 @@ DECODER(euc_jis_2004)
REQUIRE_INBUF(2)
c ^= 0x80;
- c2 = IN2 ^ 0x80;
+ c2 = INBYTE2 ^ 0x80;
/* JIS X 0213 Plane 1 */
EMULATE_JISX0213_2000_DECODE_PLANE1(writer, c, c2)
@@ -312,35 +313,36 @@ DECODER(euc_jis_2004)
ENCODER(euc_jp)
{
- while (inleft > 0) {
- Py_UCS4 c = IN1;
+ while (*inpos < inlen) {
+ Py_UCS4 c = INCHAR1;
DBCHAR code;
if (c < 0x80) {
- WRITE1((unsigned char)c)
- NEXT(1, 1)
+ WRITEBYTE1((unsigned char)c)
+ NEXT(1, 1);
continue;
}
- UCS4INVALID(c)
+ if (c > 0xFFFF)
+ return 1;
TRYMAP_ENC(jisxcommon, code, c);
else if (c >= 0xff61 && c <= 0xff9f) {
/* JIS X 0201 half-width katakana */
- WRITE2(0x8e, c - 0xfec0)
- NEXT(1, 2)
+ WRITEBYTE2(0x8e, c - 0xfec0)
+ NEXT(1, 2);
continue;
}
#ifndef STRICT_BUILD
else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */
code = 0x2140;
else if (c == 0xa5) { /* YEN SIGN */
- WRITE1(0x5c);
- NEXT(1, 1)
+ WRITEBYTE1(0x5c);
+ NEXT(1, 1);
continue;
} else if (c == 0x203e) { /* OVERLINE */
- WRITE1(0x7e);
- NEXT(1, 1)
+ WRITEBYTE1(0x7e);
+ NEXT(1, 1);
continue;
}
#endif
@@ -349,12 +351,12 @@ ENCODER(euc_jp)
if (code & 0x8000) {
/* JIS X 0212 */
- WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
- NEXT(1, 3)
+ WRITEBYTE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
+ NEXT(1, 3);
} else {
/* JIS X 0208 */
- WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
- NEXT(1, 2)
+ WRITEBYTE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
+ NEXT(1, 2);
}
}
@@ -364,7 +366,7 @@ ENCODER(euc_jp)
DECODER(euc_jp)
{
while (inleft > 0) {
- unsigned char c = IN1;
+ unsigned char c = INBYTE1;
if (c < 0x80) {
OUTCHAR(c);
@@ -377,7 +379,7 @@ DECODER(euc_jp)
unsigned char c2;
REQUIRE_INBUF(2)
- c2 = IN2;
+ c2 = INBYTE2;
if (c2 >= 0xa1 && c2 <= 0xdf) {
OUTCHAR(0xfec0 + c2);
NEXT_IN(2);
@@ -389,8 +391,8 @@ DECODER(euc_jp)
unsigned char c2, c3;
REQUIRE_INBUF(3)
- c2 = IN2;
- c3 = IN3;
+ c2 = INBYTE2;
+ c3 = INBYTE3;
/* JIS X 0212 */
TRYMAP_DEC(jisx0212, writer, c2 ^ 0x80, c3 ^ 0x80) {
NEXT_IN(3);
@@ -402,7 +404,7 @@ DECODER(euc_jp)
unsigned char c2;
REQUIRE_INBUF(2)
- c2 = IN2;
+ c2 = INBYTE2;
/* JIS X 0208 */
#ifndef STRICT_BUILD
if (c == 0xa1 && c2 == 0xc0)
@@ -427,8 +429,8 @@ DECODER(euc_jp)
ENCODER(shift_jis)
{
- while (inleft > 0) {
- Py_UCS4 c = IN1;
+ while (*inpos < inlen) {
+ Py_UCS4 c = INCHAR1;
DBCHAR code;
unsigned char c1, c2;
@@ -440,14 +442,16 @@ ENCODER(shift_jis)
else if (c == 0x203e) code = 0x7e; /* OVERLINE */
#endif
else JISX0201_K_ENCODE(c, code)
- else UCS4INVALID(c)
- else code = NOCHAR;
+ else if (c > 0xFFFF)
+ return 1;
+ else
+ code = NOCHAR;
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
REQUIRE_OUTBUF(1)
- OUT1((unsigned char)code)
- NEXT(1, 1)
+ OUTBYTE1((unsigned char)code)
+ NEXT(1, 1);
continue;
}
@@ -470,9 +474,9 @@ ENCODER(shift_jis)
c2 = code & 0xff;
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
c1 = (c1 - 0x21) >> 1;
- OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
- OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
- NEXT(1, 2)
+ OUTBYTE1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
+ OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
+ NEXT(1, 2);
}
return 0;
@@ -481,7 +485,7 @@ ENCODER(shift_jis)
DECODER(shift_jis)
{
while (inleft > 0) {
- unsigned char c = IN1;
+ unsigned char c = INBYTE1;
#ifdef STRICT_BUILD
JISX0201_R_DECODE(c, writer)
@@ -493,7 +497,7 @@ DECODER(shift_jis)
unsigned char c1, c2;
REQUIRE_INBUF(2)
- c2 = IN2;
+ c2 = INBYTE2;
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
return 1;
@@ -533,30 +537,29 @@ DECODER(shift_jis)
ENCODER(shift_jis_2004)
{
- while (inleft > 0) {
- Py_UCS4 c = IN1;
+ while (*inpos < inlen) {
+ Py_UCS4 c = INCHAR1;
DBCHAR code = NOCHAR;
int c1, c2;
Py_ssize_t insize;
JISX0201_ENCODE(c, code)
- else DECODE_SURROGATE(c)
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
- WRITE1((unsigned char)code)
- NEXT(1, 1)
+ WRITEBYTE1((unsigned char)code)
+ NEXT(1, 1);
continue;
}
REQUIRE_OUTBUF(2)
- insize = GET_INSIZE(c);
+ insize = 1;
if (code == NOCHAR) {
if (c <= 0xffff) {
EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
else TRYMAP_ENC(jisx0213_bmp, code, c) {
if (code == MULTIC) {
- if (inleft < 2) {
+ if (inlen - *inpos < 2) {
if (flags & MBENC_FLUSH) {
code = find_pairencmap
((ucs2_t)c, 0,
@@ -569,8 +572,9 @@ ENCODER(shift_jis_2004)
return MBERR_TOOFEW;
}
else {
+ Py_UCS4 ch2 = INCHAR2;
code = find_pairencmap(
- (ucs2_t)c, IN2,
+ (ucs2_t)c, ch2,
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV) {
@@ -615,10 +619,10 @@ ENCODER(shift_jis_2004)
if (c1 & 1) c2 += 0x5e;
c1 >>= 1;
- OUT1(c1 + (c1 < 0x1f ? 0x81 : 0xc1))
- OUT2(c2 + (c2 < 0x3f ? 0x40 : 0x41))
+ OUTBYTE1(c1 + (c1 < 0x1f ? 0x81 : 0xc1))
+ OUTBYTE2(c2 + (c2 < 0x3f ? 0x40 : 0x41))
- NEXT(insize, 2)
+ NEXT(insize, 2);
}
return 0;
@@ -627,7 +631,7 @@ ENCODER(shift_jis_2004)
DECODER(shift_jis_2004)
{
while (inleft > 0) {
- unsigned char c = IN1;
+ unsigned char c = INBYTE1;
JISX0201_DECODE(c, writer)
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){
@@ -635,7 +639,7 @@ DECODER(shift_jis_2004)
Py_UCS4 code;
REQUIRE_INBUF(2)
- c2 = IN2;
+ c2 = INBYTE2;
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
return 1;