summaryrefslogtreecommitdiffstats
path: root/Modules/cjkcodecs
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/cjkcodecs')
-rw-r--r--Modules/cjkcodecs/_codecs_cn.c210
-rw-r--r--Modules/cjkcodecs/_codecs_hk.c72
-rw-r--r--Modules/cjkcodecs/_codecs_iso2022.c283
-rw-r--r--Modules/cjkcodecs/_codecs_jp.c325
-rw-r--r--Modules/cjkcodecs/_codecs_kr.c152
-rw-r--r--Modules/cjkcodecs/_codecs_tw.c60
-rw-r--r--Modules/cjkcodecs/alg_jisx0201.h21
-rw-r--r--Modules/cjkcodecs/cjkcodecs.h158
-rw-r--r--Modules/cjkcodecs/emu_jisx0213_2000.h5
-rw-r--r--Modules/cjkcodecs/mappings_cn.h2
-rw-r--r--Modules/cjkcodecs/mappings_jisx0213_pair.h2
-rw-r--r--Modules/cjkcodecs/multibytecodec.c334
-rw-r--r--Modules/cjkcodecs/multibytecodec.h16
13 files changed, 797 insertions, 843 deletions
diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c
index 9e9e96c..285da1e 100644
--- a/Modules/cjkcodecs/_codecs_cn.c
+++ b/Modules/cjkcodecs/_codecs_cn.c
@@ -23,12 +23,12 @@
* A844 undefined U+2015 HORIZONTAL BAR
*/
-#define GBK_DECODE(dc1, dc2, assi) \
- if ((dc1) == 0xa1 && (dc2) == 0xaa) (assi) = 0x2014; \
- else if ((dc1) == 0xa8 && (dc2) == 0x44) (assi) = 0x2015; \
- else if ((dc1) == 0xa1 && (dc2) == 0xa4) (assi) = 0x00b7; \
- else TRYMAP_DEC(gb2312, assi, dc1 ^ 0x80, dc2 ^ 0x80); \
- else TRYMAP_DEC(gbkext, assi, dc1, dc2);
+#define GBK_DECODE(dc1, dc2, writer) \
+ if ((dc1) == 0xa1 && (dc2) == 0xaa) OUTCHAR(0x2014); \
+ else if ((dc1) == 0xa8 && (dc2) == 0x44) OUTCHAR(0x2015); \
+ else if ((dc1) == 0xa1 && (dc2) == 0xa4) OUTCHAR(0x00b7); \
+ else TRYMAP_DEC(gb2312, writer, dc1 ^ 0x80, dc2 ^ 0x80); \
+ else TRYMAP_DEC(gbkext, writer, dc1, dc2);
#define GBK_ENCODE(code, assi) \
if ((code) == 0x2014) (assi) = 0xa1aa; \
@@ -42,16 +42,18 @@
ENCODER(gb2312)
{
- while (inleft > 0) {
- Py_UNICODE c = IN1;
+ while (*inpos < inlen) {
+ Py_UCS4 c = INCHAR1;
DBCHAR code;
if (c < 0x80) {
- WRITE1((unsigned char)c)
- NEXT(1, 1)
+ WRITEBYTE1((unsigned char)c)
+ NEXT(1, 1);
continue;
}
- UCS4INVALID(c)
+
+ if (c > 0xFFFF)
+ return 1;
REQUIRE_OUTBUF(2)
TRYMAP_ENC(gbcommon, code, c);
@@ -60,9 +62,9 @@ ENCODER(gb2312)
if (code & 0x8000) /* MSB set: GBK */
return 1;
- OUT1((code >> 8) | 0x80)
- OUT2((code & 0xFF) | 0x80)
- NEXT(1, 2)
+ OUTBYTE1((code >> 8) | 0x80)
+ OUTBYTE2((code & 0xFF) | 0x80)
+ NEXT(1, 2);
}
return 0;
@@ -73,17 +75,15 @@ DECODER(gb2312)
while (inleft > 0) {
unsigned char c = **inbuf;
- REQUIRE_OUTBUF(1)
-
if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2)
- TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
- NEXT(2, 1)
+ TRYMAP_DEC(gb2312, writer, c ^ 0x80, INBYTE2 ^ 0x80) {
+ NEXT_IN(2);
}
else return 1;
}
@@ -98,28 +98,30 @@ DECODER(gb2312)
ENCODER(gbk)
{
- while (inleft > 0) {
- Py_UNICODE c = IN1;
+ while (*inpos < inlen) {
+ Py_UCS4 c = INCHAR1;
DBCHAR code;
if (c < 0x80) {
- WRITE1((unsigned char)c)
- NEXT(1, 1)
+ WRITEBYTE1((unsigned char)c)
+ NEXT(1, 1);
continue;
}
- UCS4INVALID(c)
+
+ if (c > 0xFFFF)
+ return 1;
REQUIRE_OUTBUF(2)
GBK_ENCODE(c, code)
else return 1;
- OUT1((code >> 8) | 0x80)
+ OUTBYTE1((code >> 8) | 0x80)
if (code & 0x8000)
- OUT2((code & 0xFF)) /* MSB set: GBK */
+ OUTBYTE2((code & 0xFF)) /* MSB set: GBK */
else
- OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
- NEXT(1, 2)
+ OUTBYTE2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
+ NEXT(1, 2);
}
return 0;
@@ -128,22 +130,20 @@ ENCODER(gbk)
DECODER(gbk)
{
while (inleft > 0) {
- unsigned char c = IN1;
-
- REQUIRE_OUTBUF(1)
+ unsigned char c = INBYTE1;
if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2)
- GBK_DECODE(c, IN2, **outbuf)
+ GBK_DECODE(c, INBYTE2, writer)
else return 1;
- NEXT(2, 1)
+ NEXT_IN(2);
}
return 0;
@@ -156,41 +156,31 @@ DECODER(gbk)
ENCODER(gb18030)
{
- while (inleft > 0) {
- ucs4_t c = IN1;
+ while (*inpos < inlen) {
+ Py_UCS4 c = INCHAR1;
DBCHAR code;
if (c < 0x80) {
- WRITE1(c)
- NEXT(1, 1)
+ WRITEBYTE1(c)
+ NEXT(1, 1);
continue;
}
- DECODE_SURROGATE(c)
- if (c > 0x10FFFF)
-#if Py_UNICODE_SIZE == 2
- return 2; /* surrogates pair */
-#else
- return 1;
-#endif
- else if (c >= 0x10000) {
- ucs4_t tc = c - 0x10000;
+ if (c >= 0x10000) {
+ Py_UCS4 tc = c - 0x10000;
+ assert (c <= 0x10FFFF);
REQUIRE_OUTBUF(4)
- OUT4((unsigned char)(tc % 10) + 0x30)
+ OUTBYTE4((unsigned char)(tc % 10) + 0x30)
tc /= 10;
- OUT3((unsigned char)(tc % 126) + 0x81)
+ OUTBYTE3((unsigned char)(tc % 126) + 0x81)
tc /= 126;
- OUT2((unsigned char)(tc % 10) + 0x30)
+ OUTBYTE2((unsigned char)(tc % 10) + 0x30)
tc /= 10;
- OUT1((unsigned char)(tc + 0x90))
+ OUTBYTE1((unsigned char)(tc + 0x90))
-#if Py_UNICODE_SIZE == 2
- NEXT(2, 4) /* surrogates pair */
-#else
- NEXT(1, 4)
-#endif
+ NEXT(1, 4);
continue;
}
@@ -208,20 +198,20 @@ ENCODER(gb18030)
utrrange++)
if (utrrange->first <= c &&
c <= utrrange->last) {
- Py_UNICODE tc;
+ Py_UCS4 tc;
tc = c - utrrange->first +
utrrange->base;
- OUT4((unsigned char)(tc % 10) + 0x30)
+ OUTBYTE4((unsigned char)(tc % 10) + 0x30)
tc /= 10;
- OUT3((unsigned char)(tc % 126) + 0x81)
+ OUTBYTE3((unsigned char)(tc % 126) + 0x81)
tc /= 126;
- OUT2((unsigned char)(tc % 10) + 0x30)
+ OUTBYTE2((unsigned char)(tc % 10) + 0x30)
tc /= 10;
- OUT1((unsigned char)tc + 0x81)
+ OUTBYTE1((unsigned char)tc + 0x81)
- NEXT(1, 4)
+ NEXT(1, 4);
break;
}
@@ -230,13 +220,13 @@ ENCODER(gb18030)
continue;
}
- OUT1((code >> 8) | 0x80)
+ OUTBYTE1((code >> 8) | 0x80)
if (code & 0x8000)
- OUT2((code & 0xFF)) /* MSB set: GBK or GB18030ext */
+ OUTBYTE2((code & 0xFF)) /* MSB set: GBK or GB18030ext */
else
- OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
+ OUTBYTE2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
- NEXT(1, 2)
+ NEXT(1, 2);
}
return 0;
@@ -245,61 +235,59 @@ ENCODER(gb18030)
DECODER(gb18030)
{
while (inleft > 0) {
- unsigned char c = IN1, c2;
-
- REQUIRE_OUTBUF(1)
+ unsigned char c = INBYTE1, c2;
if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2)
- c2 = IN2;
+ c2 = INBYTE2;
if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */
const struct _gb18030_to_unibmp_ranges *utr;
unsigned char c3, c4;
- ucs4_t lseq;
+ Py_UCS4 lseq;
REQUIRE_INBUF(4)
- c3 = IN3;
- c4 = IN4;
+ c3 = INBYTE3;
+ c4 = INBYTE4;
if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39)
return 1;
c -= 0x81; c2 -= 0x30;
c3 -= 0x81; c4 -= 0x30;
if (c < 4) { /* U+0080 - U+FFFF */
- lseq = ((ucs4_t)c * 10 + c2) * 1260 +
- (ucs4_t)c3 * 10 + c4;
+ lseq = ((Py_UCS4)c * 10 + c2) * 1260 +
+ (Py_UCS4)c3 * 10 + c4;
if (lseq < 39420) {
for (utr = gb18030_to_unibmp_ranges;
lseq >= (utr + 1)->base;
utr++) ;
- OUT1(utr->first - utr->base + lseq)
- NEXT(4, 1)
+ OUTCHAR(utr->first - utr->base + lseq);
+ NEXT_IN(4);
continue;
}
}
else if (c >= 15) { /* U+10000 - U+10FFFF */
- lseq = 0x10000 + (((ucs4_t)c-15) * 10 + c2)
- * 1260 + (ucs4_t)c3 * 10 + c4;
+ lseq = 0x10000 + (((Py_UCS4)c-15) * 10 + c2)
+ * 1260 + (Py_UCS4)c3 * 10 + c4;
if (lseq <= 0x10FFFF) {
- WRITEUCS4(lseq);
- NEXT_IN(4)
+ OUTCHAR(lseq);
+ NEXT_IN(4);
continue;
}
}
return 1;
}
- GBK_DECODE(c, c2, **outbuf)
- else TRYMAP_DEC(gb18030ext, **outbuf, c, c2);
+ GBK_DECODE(c, c2, writer)
+ else TRYMAP_DEC(gb18030ext, writer, c, c2);
else return 1;
- NEXT(2, 1)
+ NEXT_IN(2);
}
return 0;
@@ -319,33 +307,34 @@ ENCODER_INIT(hz)
ENCODER_RESET(hz)
{
if (state->i != 0) {
- WRITE2('~', '}')
+ WRITEBYTE2('~', '}')
state->i = 0;
- NEXT_OUT(2)
+ NEXT_OUT(2);
}
return 0;
}
ENCODER(hz)
{
- while (inleft > 0) {
- Py_UNICODE c = IN1;
+ while (*inpos < inlen) {
+ Py_UCS4 c = INCHAR1;
DBCHAR code;
if (c < 0x80) {
if (state->i == 0) {
- WRITE1((unsigned char)c)
- NEXT(1, 1)
+ WRITEBYTE1((unsigned char)c)
+ NEXT(1, 1);
}
else {
- WRITE3('~', '}', (unsigned char)c)
- NEXT(1, 3)
+ WRITEBYTE3('~', '}', (unsigned char)c)
+ NEXT(1, 3);
state->i = 0;
}
continue;
}
- UCS4INVALID(c)
+ if (c > 0xFFFF)
+ return 1;
TRYMAP_ENC(gbcommon, code, c);
else return 1;
@@ -354,13 +343,13 @@ ENCODER(hz)
return 1;
if (state->i == 0) {
- WRITE4('~', '{', code >> 8, code & 0xff)
- NEXT(1, 4)
+ WRITEBYTE4('~', '{', code >> 8, code & 0xff)
+ NEXT(1, 4);
state->i = 1;
}
else {
- WRITE2(code >> 8, code & 0xff)
- NEXT(1, 2)
+ WRITEBYTE2(code >> 8, code & 0xff)
+ NEXT(1, 2);
}
}
@@ -382,15 +371,15 @@ DECODER_RESET(hz)
DECODER(hz)
{
while (inleft > 0) {
- unsigned char c = IN1;
+ unsigned char c = INBYTE1;
if (c == '~') {
- unsigned char c2 = IN2;
+ unsigned char c2 = INBYTE2;
REQUIRE_INBUF(2)
if (c2 == '~') {
- WRITE1('~')
- NEXT(2, 1)
+ OUTCHAR('~');
+ NEXT_IN(2);
continue;
}
else if (c2 == '{' && state->i == 0)
@@ -401,7 +390,7 @@ DECODER(hz)
; /* line-continuation */
else
return 1;
- NEXT(2, 0);
+ NEXT_IN(2);
continue;
}
@@ -409,14 +398,13 @@ DECODER(hz)
return 1;
if (state->i == 0) { /* ASCII mode */
- WRITE1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
}
else { /* GB mode */
REQUIRE_INBUF(2)
- REQUIRE_OUTBUF(1)
- TRYMAP_DEC(gb2312, **outbuf, c, IN2) {
- NEXT(2, 1)
+ TRYMAP_DEC(gb2312, writer, c, INBYTE2) {
+ NEXT_IN(2);
}
else
return 1;
diff --git a/Modules/cjkcodecs/_codecs_hk.c b/Modules/cjkcodecs/_codecs_hk.c
index d3ad04b..fe5f597 100644
--- a/Modules/cjkcodecs/_codecs_hk.c
+++ b/Modules/cjkcodecs/_codecs_hk.c
@@ -38,35 +38,39 @@ static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5
ENCODER(big5hkscs)
{
- while (inleft > 0) {
- ucs4_t c = **inbuf;
+ while (*inpos < inlen) {
+ Py_UCS4 c = INCHAR1;
DBCHAR code;
Py_ssize_t insize;
if (c < 0x80) {
REQUIRE_OUTBUF(1)
**outbuf = (unsigned char)c;
- NEXT(1, 1)
+ NEXT(1, 1);
continue;
}
- DECODE_SURROGATE(c)
- insize = GET_INSIZE(c);
-
+ insize = 1;
REQUIRE_OUTBUF(2)
if (c < 0x10000) {
TRYMAP_ENC(big5hkscs_bmp, code, c) {
if (code == MULTIC) {
- if (inleft >= 2 &&
+ Py_UCS4 c2;
+ if (inlen - *inpos >= 2)
+ c2 = INCHAR2;
+ else
+ c2 = 0;
+
+ if (inlen - *inpos >= 2 &&
((c & 0xffdf) == 0x00ca) &&
- (((*inbuf)[1] & 0xfff7) == 0x0304)) {
+ ((c2 & 0xfff7) == 0x0304)) {
code = big5hkscs_pairenc_table[
((c >> 4) |
- ((*inbuf)[1] >> 3)) & 3];
+ (c2 >> 3)) & 3];
insize = 2;
}
- else if (inleft < 2 &&
+ else if (inlen - *inpos < 2 &&
!(flags & MBENC_FLUSH))
return MBERR_TOOFEW;
else {
@@ -89,9 +93,9 @@ ENCODER(big5hkscs)
else
return insize;
- OUT1(code >> 8)
- OUT2(code & 0xFF)
- NEXT(insize, 2)
+ OUTBYTE1(code >> 8)
+ OUTBYTE2(code & 0xFF)
+ NEXT(insize, 2);
}
return 0;
@@ -102,33 +106,31 @@ ENCODER(big5hkscs)
DECODER(big5hkscs)
{
while (inleft > 0) {
- unsigned char c = IN1;
- ucs4_t decoded;
-
- REQUIRE_OUTBUF(1)
+ unsigned char c = INBYTE1;
+ Py_UCS4 decoded;
if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2)
- if (0xc6 > c || c > 0xc8 || (c < 0xc7 && IN2 < 0xa1)) {
- TRYMAP_DEC(big5, **outbuf, c, IN2) {
- NEXT(2, 1)
+ if (0xc6 > c || c > 0xc8 || (c < 0xc7 && INBYTE2 < 0xa1)) {
+ TRYMAP_DEC(big5, writer, c, INBYTE2) {
+ NEXT_IN(2);
continue;
}
}
- TRYMAP_DEC(big5hkscs, decoded, c, IN2)
+ TRYMAP_DEC_CHAR(big5hkscs, decoded, c, INBYTE2)
{
- int s = BH2S(c, IN2);
+ int s = BH2S(c, INBYTE2);
const unsigned char *hintbase;
assert(0x87 <= c && c <= 0xfe);
- assert(0x40 <= IN2 && IN2 <= 0xfe);
+ assert(0x40 <= INBYTE2 && INBYTE2 <= 0xfe);
if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
hintbase = big5hkscs_phint_0;
@@ -146,25 +148,25 @@ DECODER(big5hkscs)
return MBERR_INTERNAL;
if (hintbase[s >> 3] & (1 << (s & 7))) {
- WRITEUCS4(decoded | 0x20000)
- NEXT_IN(2)
+ OUTCHAR(decoded | 0x20000);
+ NEXT_IN(2);
}
else {
- OUT1(decoded)
- NEXT(2, 1)
+ OUTCHAR(decoded);
+ NEXT_IN(2);
}
continue;
}
- switch ((c << 8) | IN2) {
- case 0x8862: WRITE2(0x00ca, 0x0304); break;
- case 0x8864: WRITE2(0x00ca, 0x030c); break;
- case 0x88a3: WRITE2(0x00ea, 0x0304); break;
- case 0x88a5: WRITE2(0x00ea, 0x030c); break;
+ switch ((c << 8) | INBYTE2) {
+ case 0x8862: OUTCHAR2(0x00ca, 0x0304); break;
+ case 0x8864: OUTCHAR2(0x00ca, 0x030c); break;
+ case 0x88a3: OUTCHAR2(0x00ea, 0x0304); break;
+ case 0x88a5: OUTCHAR2(0x00ea, 0x030c); break;
default: return 1;
}
- NEXT(2, 2) /* all decoded codepoints are pairs, above. */
+ NEXT_IN(2); /* all decoded codepoints are pairs, above. */
}
return 0;
diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c
index cbc1542..bb63835 100644
--- a/Modules/cjkcodecs/_codecs_iso2022.c
+++ b/Modules/cjkcodecs/_codecs_iso2022.c
@@ -102,8 +102,8 @@
/*-*- internal data structures -*-*/
typedef int (*iso2022_init_func)(void);
-typedef ucs4_t (*iso2022_decode_func)(const unsigned char *data);
-typedef DBCHAR (*iso2022_encode_func)(const ucs4_t *data, Py_ssize_t *length);
+typedef Py_UCS4 (*iso2022_decode_func)(const unsigned char *data);
+typedef DBCHAR (*iso2022_encode_func)(const Py_UCS4 *data, Py_ssize_t *length);
struct iso2022_designation {
unsigned char mark;
@@ -141,13 +141,13 @@ ENCODER_INIT(iso2022)
ENCODER_RESET(iso2022)
{
if (STATE_GETFLAG(F_SHIFTED)) {
- WRITE1(SI)
- NEXT_OUT(1)
+ WRITEBYTE1(SI)
+ NEXT_OUT(1);
STATE_CLEARFLAG(F_SHIFTED)
}
if (STATE_G0 != CHARSET_ASCII) {
- WRITE3(ESC, '(', 'B')
- NEXT_OUT(3)
+ WRITEBYTE3(ESC, '(', 'B')
+ NEXT_OUT(3);
STATE_SETG0(CHARSET_ASCII)
}
return 0;
@@ -155,30 +155,29 @@ ENCODER_RESET(iso2022)
ENCODER(iso2022)
{
- while (inleft > 0) {
+ while (*inpos < inlen) {
const struct iso2022_designation *dsg;
DBCHAR encoded;
- ucs4_t c = **inbuf;
+ Py_UCS4 c = INCHAR1;
Py_ssize_t insize;
if (c < 0x80) {
if (STATE_G0 != CHARSET_ASCII) {
- WRITE3(ESC, '(', 'B')
+ WRITEBYTE3(ESC, '(', 'B')
STATE_SETG0(CHARSET_ASCII)
- NEXT_OUT(3)
+ NEXT_OUT(3);
}
if (STATE_GETFLAG(F_SHIFTED)) {
- WRITE1(SI)
+ WRITEBYTE1(SI)
STATE_CLEARFLAG(F_SHIFTED)
- NEXT_OUT(1)
+ NEXT_OUT(1);
}
- WRITE1((unsigned char)c)
- NEXT(1, 1)
+ WRITEBYTE1((unsigned char)c)
+ NEXT(1, 1);
continue;
}
- DECODE_SURROGATE(c)
- insize = GET_INSIZE(c);
+ insize = 1;
encoded = MAP_UNMAPPABLE;
for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {
@@ -187,24 +186,14 @@ ENCODER(iso2022)
if (encoded == MAP_MULTIPLE_AVAIL) {
/* this implementation won't work for pair
* of non-bmp characters. */
- if (inleft < 2) {
+ if (inlen - *inpos < 2) {
if (!(flags & MBENC_FLUSH))
return MBERR_TOOFEW;
length = -1;
}
else
length = 2;
-#if Py_UNICODE_SIZE == 2
- if (length == 2) {
- ucs4_t u4in[2];
- u4in[0] = (ucs4_t)IN1;
- u4in[1] = (ucs4_t)IN2;
- encoded = dsg->encoder(u4in, &length);
- } else
- encoded = dsg->encoder(&c, &length);
-#else
encoded = dsg->encoder(&c, &length);
-#endif
if (encoded != MAP_UNMAPPABLE) {
insize = length;
break;
@@ -221,47 +210,47 @@ ENCODER(iso2022)
switch (dsg->plane) {
case 0: /* G0 */
if (STATE_GETFLAG(F_SHIFTED)) {
- WRITE1(SI)
+ WRITEBYTE1(SI)
STATE_CLEARFLAG(F_SHIFTED)
- NEXT_OUT(1)
+ NEXT_OUT(1);
}
if (STATE_G0 != dsg->mark) {
if (dsg->width == 1) {
- WRITE3(ESC, '(', ESCMARK(dsg->mark))
+ WRITEBYTE3(ESC, '(', ESCMARK(dsg->mark))
STATE_SETG0(dsg->mark)
- NEXT_OUT(3)
+ NEXT_OUT(3);
}
else if (dsg->mark == CHARSET_JISX0208) {
- WRITE3(ESC, '$', ESCMARK(dsg->mark))
+ WRITEBYTE3(ESC, '$', ESCMARK(dsg->mark))
STATE_SETG0(dsg->mark)
- NEXT_OUT(3)
+ NEXT_OUT(3);
}
else {
- WRITE4(ESC, '$', '(',
+ WRITEBYTE4(ESC, '$', '(',
ESCMARK(dsg->mark))
STATE_SETG0(dsg->mark)
- NEXT_OUT(4)
+ NEXT_OUT(4);
}
}
break;
case 1: /* G1 */
if (STATE_G1 != dsg->mark) {
if (dsg->width == 1) {
- WRITE3(ESC, ')', ESCMARK(dsg->mark))
+ WRITEBYTE3(ESC, ')', ESCMARK(dsg->mark))
STATE_SETG1(dsg->mark)
- NEXT_OUT(3)
+ NEXT_OUT(3);
}
else {
- WRITE4(ESC, '$', ')',
+ WRITEBYTE4(ESC, '$', ')',
ESCMARK(dsg->mark))
STATE_SETG1(dsg->mark)
- NEXT_OUT(4)
+ NEXT_OUT(4);
}
}
if (!STATE_GETFLAG(F_SHIFTED)) {
- WRITE1(SO)
+ WRITEBYTE1(SO)
STATE_SETFLAG(F_SHIFTED)
- NEXT_OUT(1)
+ NEXT_OUT(1);
}
break;
default: /* G2 and G3 is not supported: no encoding in
@@ -270,14 +259,14 @@ ENCODER(iso2022)
}
if (dsg->width == 1) {
- WRITE1((unsigned char)encoded)
- NEXT_OUT(1)
+ WRITEBYTE1((unsigned char)encoded)
+ NEXT_OUT(1);
}
else {
- WRITE2(encoded >> 8, encoded & 0xff)
- NEXT_OUT(2)
+ WRITEBYTE2(encoded >> 8, encoded & 0xff)
+ NEXT_OUT(2);
}
- NEXT_IN(insize)
+ NEXT_INCHAR(insize);
}
return 0;
@@ -323,26 +312,26 @@ iso2022processesc(const void *config, MultibyteCodec_State *state,
switch (esclen) {
case 3:
- if (IN2 == '$') {
- charset = IN3 | CHARSET_DBCS;
+ if (INBYTE2 == '$') {
+ charset = INBYTE3 | CHARSET_DBCS;
designation = 0;
}
else {
- charset = IN3;
- if (IN2 == '(') designation = 0;
- else if (IN2 == ')') designation = 1;
- else if (CONFIG_ISSET(USE_G2) && IN2 == '.')
+ charset = INBYTE3;
+ if (INBYTE2 == '(') designation = 0;
+ else if (INBYTE2 == ')') designation = 1;
+ else if (CONFIG_ISSET(USE_G2) && INBYTE2 == '.')
designation = 2;
else return 3;
}
break;
case 4:
- if (IN2 != '$')
+ if (INBYTE2 != '$')
return 4;
- charset = IN4 | CHARSET_DBCS;
- if (IN3 == '(') designation = 0;
- else if (IN3 == ')') designation = 1;
+ charset = INBYTE4 | CHARSET_DBCS;
+ if (INBYTE3 == '(') designation = 0;
+ else if (INBYTE3 == ')') designation = 1;
else return 4;
break;
case 6: /* designation with prefix */
@@ -376,45 +365,43 @@ iso2022processesc(const void *config, MultibyteCodec_State *state,
return 0;
}
-#define ISO8859_7_DECODE(c, assi) \
- if ((c) < 0xa0) (assi) = (c); \
- else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \
- (assi) = (c); \
- else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \
- (0xbffffd77L & (1L << ((c)-0xb4))))) \
- (assi) = 0x02d0 + (c); \
- else if ((c) == 0xa1) (assi) = 0x2018; \
- else if ((c) == 0xa2) (assi) = 0x2019; \
- else if ((c) == 0xaf) (assi) = 0x2015;
+#define ISO8859_7_DECODE(c, writer) \
+ if ((c) < 0xa0) OUTCHAR(c); \
+ else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \
+ OUTCHAR(c); \
+ else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \
+ (0xbffffd77L & (1L << ((c)-0xb4))))) \
+ OUTCHAR(0x02d0 + (c)); \
+ else if ((c) == 0xa1) OUTCHAR(0x2018); \
+ else if ((c) == 0xa2) OUTCHAR(0x2019); \
+ else if ((c) == 0xaf) OUTCHAR(0x2015);
static Py_ssize_t
iso2022processg2(const void *config, MultibyteCodec_State *state,
const unsigned char **inbuf, Py_ssize_t *inleft,
- Py_UNICODE **outbuf, Py_ssize_t *outleft)
+ _PyUnicodeWriter *writer)
{
/* not written to use encoder, decoder functions because only few
* encodings use G2 designations in CJKCodecs */
if (STATE_G2 == CHARSET_ISO8859_1) {
- if (IN3 < 0x80)
- OUT1(IN3 + 0x80)
+ if (INBYTE3 < 0x80)
+ OUTCHAR(INBYTE3 + 0x80);
else
return 3;
}
else if (STATE_G2 == CHARSET_ISO8859_7) {
- ISO8859_7_DECODE(IN3 ^ 0x80, **outbuf)
+ ISO8859_7_DECODE(INBYTE3 ^ 0x80, writer)
else return 3;
}
else if (STATE_G2 == CHARSET_ASCII) {
- if (IN3 & 0x80) return 3;
- else **outbuf = IN3;
+ if (INBYTE3 & 0x80) return 3;
+ else OUTCHAR(INBYTE3);
}
else
return MBERR_INTERNAL;
(*inbuf) += 3;
*inleft -= 3;
- (*outbuf) += 1;
- *outleft -= 1;
return 0;
}
@@ -423,14 +410,14 @@ DECODER(iso2022)
const struct iso2022_designation *dsgcache = NULL;
while (inleft > 0) {
- unsigned char c = IN1;
+ unsigned char c = INBYTE1;
Py_ssize_t err;
if (STATE_GETFLAG(F_ESCTHROUGHOUT)) {
/* ESC throughout mode:
* for non-iso2022 escape sequences */
- WRITE1(c) /* assume as ISO-8859-1 */
- NEXT(1, 1)
+ OUTCHAR(c); /* assume as ISO-8859-1 */
+ NEXT_IN(1);
if (IS_ESCEND(c)) {
STATE_CLEARFLAG(F_ESCTHROUGHOUT)
}
@@ -440,41 +427,41 @@ DECODER(iso2022)
switch (c) {
case ESC:
REQUIRE_INBUF(2)
- if (IS_ISO2022ESC(IN2)) {
+ if (IS_ISO2022ESC(INBYTE2)) {
err = iso2022processesc(config, state,
inbuf, &inleft);
if (err != 0)
return err;
}
- else if (CONFIG_ISSET(USE_G2) && IN2 == 'N') {/* SS2 */
+ else if (CONFIG_ISSET(USE_G2) && INBYTE2 == 'N') {/* SS2 */
REQUIRE_INBUF(3)
err = iso2022processg2(config, state,
- inbuf, &inleft, outbuf, &outleft);
+ inbuf, &inleft, writer);
if (err != 0)
return err;
}
else {
- WRITE1(ESC)
+ OUTCHAR(ESC);
STATE_SETFLAG(F_ESCTHROUGHOUT)
- NEXT(1, 1)
+ NEXT_IN(1);
}
break;
case SI:
if (CONFIG_ISSET(NO_SHIFT))
goto bypass;
STATE_CLEARFLAG(F_SHIFTED)
- NEXT_IN(1)
+ NEXT_IN(1);
break;
case SO:
if (CONFIG_ISSET(NO_SHIFT))
goto bypass;
STATE_SETFLAG(F_SHIFTED)
- NEXT_IN(1)
+ NEXT_IN(1);
break;
case LF:
STATE_CLEARFLAG(F_SHIFTED)
- WRITE1(LF)
- NEXT(1, 1)
+ OUTCHAR(LF);
+ NEXT_IN(1);
break;
default:
if (c < 0x20) /* C0 */
@@ -484,7 +471,7 @@ DECODER(iso2022)
else {
const struct iso2022_designation *dsg;
unsigned char charset;
- ucs4_t decoded;
+ Py_UCS4 decoded;
if (STATE_GETFLAG(F_SHIFTED))
charset = STATE_G1;
@@ -492,8 +479,8 @@ DECODER(iso2022)
charset = STATE_G0;
if (charset == CHARSET_ASCII) {
-bypass: WRITE1(c)
- NEXT(1, 1)
+bypass: OUTCHAR(c);
+ NEXT_IN(1);
break;
}
@@ -518,17 +505,15 @@ bypass: WRITE1(c)
return dsg->width;
if (decoded < 0x10000) {
- WRITE1(decoded)
- NEXT_OUT(1)
+ OUTCHAR(decoded);
}
else if (decoded < 0x30000) {
- WRITEUCS4(decoded)
+ OUTCHAR(decoded);
}
else { /* JIS X 0213 pairs */
- WRITE2(decoded >> 16, decoded & 0xffff)
- NEXT_OUT(2)
+ OUTCHAR2(decoded >> 16, decoded & 0xffff);
}
- NEXT_IN(dsg->width)
+ NEXT_IN(dsg->width);
}
break;
}
@@ -577,18 +562,18 @@ ksx1001_init(void)
return 0;
}
-static ucs4_t
+static Py_UCS4
ksx1001_decoder(const unsigned char *data)
{
- ucs4_t u;
- TRYMAP_DEC(ksx1001, u, data[0], data[1])
+ Py_UCS4 u;
+ TRYMAP_DEC_CHAR(ksx1001, u, data[0], data[1])
return u;
else
return MAP_UNMAPPABLE;
}
static DBCHAR
-ksx1001_encoder(const ucs4_t *data, Py_ssize_t *length)
+ksx1001_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
assert(*length == 1);
@@ -613,20 +598,20 @@ jisx0208_init(void)
return 0;
}
-static ucs4_t
+static Py_UCS4
jisx0208_decoder(const unsigned char *data)
{
- ucs4_t u;
+ Py_UCS4 u;
if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
return 0xff3c;
- else TRYMAP_DEC(jisx0208, u, data[0], data[1])
+ else TRYMAP_DEC_CHAR(jisx0208, u, data[0], data[1])
return u;
else
return MAP_UNMAPPABLE;
}
static DBCHAR
-jisx0208_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0208_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
assert(*length == 1);
@@ -654,18 +639,18 @@ jisx0212_init(void)
return 0;
}
-static ucs4_t
+static Py_UCS4
jisx0212_decoder(const unsigned char *data)
{
- ucs4_t u;
- TRYMAP_DEC(jisx0212, u, data[0], data[1])
+ Py_UCS4 u;
+ TRYMAP_DEC_CHAR(jisx0212, u, data[0], data[1])
return u;
else
return MAP_UNMAPPABLE;
}
static DBCHAR
-jisx0212_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0212_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
assert(*length == 1);
@@ -705,30 +690,30 @@ jisx0213_init(void)
}
#define config ((void *)2000)
-static ucs4_t
+static Py_UCS4
jisx0213_2000_1_decoder(const unsigned char *data)
{
- ucs4_t u;
+ Py_UCS4 u;
EMULATE_JISX0213_2000_DECODE_PLANE1(u, data[0], data[1])
else if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
return 0xff3c;
- else TRYMAP_DEC(jisx0208, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])
+ else TRYMAP_DEC_CHAR(jisx0208, u, data[0], data[1]);
+ else TRYMAP_DEC_CHAR(jisx0213_1_bmp, u, data[0], data[1]);
+ else TRYMAP_DEC_CHAR(jisx0213_1_emp, u, data[0], data[1])
u |= 0x20000;
- else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);
+ else TRYMAP_DEC_CHAR(jisx0213_pair, u, data[0], data[1]);
else
return MAP_UNMAPPABLE;
return u;
}
-static ucs4_t
+static Py_UCS4
jisx0213_2000_2_decoder(const unsigned char *data)
{
- ucs4_t u;
- EMULATE_JISX0213_2000_DECODE_PLANE2(u, data[0], data[1])
- TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])
+ Py_UCS4 u;
+ EMULATE_JISX0213_2000_DECODE_PLANE2_CHAR(u, data[0], data[1])
+ TRYMAP_DEC_CHAR(jisx0213_2_bmp, u, data[0], data[1]);
+ else TRYMAP_DEC_CHAR(jisx0213_2_emp, u, data[0], data[1])
u |= 0x20000;
else
return MAP_UNMAPPABLE;
@@ -736,28 +721,28 @@ jisx0213_2000_2_decoder(const unsigned char *data)
}
#undef config
-static ucs4_t
+static Py_UCS4
jisx0213_2004_1_decoder(const unsigned char *data)
{
- ucs4_t u;
+ Py_UCS4 u;
if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
return 0xff3c;
- else TRYMAP_DEC(jisx0208, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])
+ else TRYMAP_DEC_CHAR(jisx0208, u, data[0], data[1]);
+ else TRYMAP_DEC_CHAR(jisx0213_1_bmp, u, data[0], data[1]);
+ else TRYMAP_DEC_CHAR(jisx0213_1_emp, u, data[0], data[1])
u |= 0x20000;
- else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);
+ else TRYMAP_DEC_CHAR(jisx0213_pair, u, data[0], data[1]);
else
return MAP_UNMAPPABLE;
return u;
}
-static ucs4_t
+static Py_UCS4
jisx0213_2004_2_decoder(const unsigned char *data)
{
- ucs4_t u;
- TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])
+ Py_UCS4 u;
+ TRYMAP_DEC_CHAR(jisx0213_2_bmp, u, data[0], data[1]);
+ else TRYMAP_DEC_CHAR(jisx0213_2_emp, u, data[0], data[1])
u |= 0x20000;
else
return MAP_UNMAPPABLE;
@@ -765,7 +750,7 @@ jisx0213_2004_2_decoder(const unsigned char *data)
}
static DBCHAR
-jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config)
+jisx0213_encoder(const Py_UCS4 *data, Py_ssize_t *length, void *config)
{
DBCHAR coded;
@@ -819,7 +804,7 @@ jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config)
}
static DBCHAR
-jisx0213_2000_1_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0213_2000_1_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
@@ -831,7 +816,7 @@ jisx0213_2000_1_encoder(const ucs4_t *data, Py_ssize_t *length)
}
static DBCHAR
-jisx0213_2000_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
+jisx0213_2000_1_encoder_paironly(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
Py_ssize_t ilength = *length;
@@ -854,7 +839,7 @@ jisx0213_2000_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
}
static DBCHAR
-jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0213_2000_2_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
@@ -866,7 +851,7 @@ jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length)
}
static DBCHAR
-jisx0213_2004_1_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0213_2004_1_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded = jisx0213_encoder(data, length, NULL);
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
@@ -878,7 +863,7 @@ jisx0213_2004_1_encoder(const ucs4_t *data, Py_ssize_t *length)
}
static DBCHAR
-jisx0213_2004_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
+jisx0213_2004_1_encoder_paironly(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
Py_ssize_t ilength = *length;
@@ -901,7 +886,7 @@ jisx0213_2004_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
}
static DBCHAR
-jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0213_2004_2_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded = jisx0213_encoder(data, length, NULL);
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
@@ -912,17 +897,17 @@ jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length)
return MAP_UNMAPPABLE;
}
-static ucs4_t
+static Py_UCS4
jisx0201_r_decoder(const unsigned char *data)
{
- ucs4_t u;
- JISX0201_R_DECODE(*data, u)
+ Py_UCS4 u;
+ JISX0201_R_DECODE_CHAR(*data, u)
else return MAP_UNMAPPABLE;
return u;
}
static DBCHAR
-jisx0201_r_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0201_r_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
JISX0201_R_ENCODE(*data, coded)
@@ -930,17 +915,17 @@ jisx0201_r_encoder(const ucs4_t *data, Py_ssize_t *length)
return coded;
}
-static ucs4_t
+static Py_UCS4
jisx0201_k_decoder(const unsigned char *data)
{
- ucs4_t u;
- JISX0201_K_DECODE(*data ^ 0x80, u)
+ Py_UCS4 u;
+ JISX0201_K_DECODE_CHAR(*data ^ 0x80, u)
else return MAP_UNMAPPABLE;
return u;
}
static DBCHAR
-jisx0201_k_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0201_k_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
JISX0201_K_ENCODE(*data, coded)
@@ -961,18 +946,18 @@ gb2312_init(void)
return 0;
}
-static ucs4_t
+static Py_UCS4
gb2312_decoder(const unsigned char *data)
{
- ucs4_t u;
- TRYMAP_DEC(gb2312, u, data[0], data[1])
+ Py_UCS4 u;
+ TRYMAP_DEC_CHAR(gb2312, u, data[0], data[1])
return u;
else
return MAP_UNMAPPABLE;
}
static DBCHAR
-gb2312_encoder(const ucs4_t *data, Py_ssize_t *length)
+gb2312_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
assert(*length == 1);
@@ -986,14 +971,14 @@ gb2312_encoder(const ucs4_t *data, Py_ssize_t *length)
}
-static ucs4_t
+static Py_UCS4
dummy_decoder(const unsigned char *data)
{
return MAP_UNMAPPABLE;
}
static DBCHAR
-dummy_encoder(const ucs4_t *data, Py_ssize_t *length)
+dummy_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
return MAP_UNMAPPABLE;
}
diff --git a/Modules/cjkcodecs/_codecs_jp.c b/Modules/cjkcodecs/_codecs_jp.c
index a500696..7ab318b 100644
--- a/Modules/cjkcodecs/_codecs_jp.c
+++ b/Modules/cjkcodecs/_codecs_jp.c
@@ -19,38 +19,39 @@
ENCODER(cp932)
{
- while (inleft > 0) {
- Py_UNICODE c = IN1;
+ while (*inpos < inlen) {
+ Py_UCS4 c = INCHAR1;
DBCHAR code;
unsigned char c1, c2;
if (c <= 0x80) {
- WRITE1((unsigned char)c)
- NEXT(1, 1)
+ WRITEBYTE1((unsigned char)c)
+ NEXT(1, 1);
continue;
}
else if (c >= 0xff61 && c <= 0xff9f) {
- WRITE1(c - 0xfec0)
- NEXT(1, 1)
+ WRITEBYTE1(c - 0xfec0)
+ NEXT(1, 1);
continue;
}
else if (c >= 0xf8f0 && c <= 0xf8f3) {
/* Windows compatibility */
REQUIRE_OUTBUF(1)
if (c == 0xf8f0)
- OUT1(0xa0)
+ OUTBYTE1(0xa0)
else
- OUT1(c - 0xfef1 + 0xfd)
- NEXT(1, 1)
+ OUTBYTE1(c - 0xfef1 + 0xfd)
+ NEXT(1, 1);
continue;
}
- UCS4INVALID(c)
+ if (c > 0xFFFF)
+ return 1;
REQUIRE_OUTBUF(2)
TRYMAP_ENC(cp932ext, code, c) {
- OUT1(code >> 8)
- OUT2(code & 0xff)
+ OUTBYTE1(code >> 8)
+ OUTBYTE2(code & 0xff)
}
else TRYMAP_ENC(jisxcommon, code, c) {
if (code & 0x8000) /* MSB set: JIS X 0212 */
@@ -61,20 +62,20 @@ ENCODER(cp932)
c2 = code & 0xff;
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
c1 = (c1 - 0x21) >> 1;
- OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
- OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
+ OUTBYTE1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
+ OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
}
else if (c >= 0xe000 && c < 0xe758) {
/* User-defined area */
- c1 = (Py_UNICODE)(c - 0xe000) / 188;
- c2 = (Py_UNICODE)(c - 0xe000) % 188;
- OUT1(c1 + 0xf0)
- OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
+ c1 = (Py_UCS4)(c - 0xe000) / 188;
+ c2 = (Py_UCS4)(c - 0xe000) % 188;
+ OUTBYTE1(c1 + 0xf0)
+ OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
}
else
return 1;
- NEXT(1, 2)
+ NEXT(1, 2);
}
return 0;
@@ -83,33 +84,32 @@ ENCODER(cp932)
DECODER(cp932)
{
while (inleft > 0) {
- unsigned char c = IN1, c2;
+ unsigned char c = INBYTE1, c2;
- REQUIRE_OUTBUF(1)
if (c <= 0x80) {
- OUT1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
continue;
}
else if (c >= 0xa0 && c <= 0xdf) {
if (c == 0xa0)
- OUT1(0xf8f0) /* half-width katakana */
+ OUTCHAR(0xf8f0); /* half-width katakana */
else
- OUT1(0xfec0 + c)
- NEXT(1, 1)
+ OUTCHAR(0xfec0 + c);
+ NEXT_IN(1);
continue;
}
else if (c >= 0xfd/* && c <= 0xff*/) {
/* Windows compatibility */
- OUT1(0xf8f1 - 0xfd + c)
- NEXT(1, 1)
+ OUTCHAR(0xf8f1 - 0xfd + c);
+ NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2)
- c2 = IN2;
+ c2 = INBYTE2;
- TRYMAP_DEC(cp932ext, **outbuf, c, c2);
+ TRYMAP_DEC(cp932ext, writer, c, c2);
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
return 1;
@@ -119,21 +119,21 @@ DECODER(cp932)
c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21);
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
- TRYMAP_DEC(jisx0208, **outbuf, c, c2);
+ TRYMAP_DEC(jisx0208, writer, c, c2);
else return 1;
}
else if (c >= 0xf0 && c <= 0xf9) {
if ((c2 >= 0x40 && c2 <= 0x7e) ||
(c2 >= 0x80 && c2 <= 0xfc))
- OUT1(0xe000 + 188 * (c - 0xf0) +
- (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41))
+ OUTCHAR(0xe000 + 188 * (c - 0xf0) +
+ (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41));
else
return 1;
}
else
return 1;
- NEXT(2, 1)
+ NEXT_IN(2);
}
return 0;
@@ -146,25 +146,24 @@ DECODER(cp932)
ENCODER(euc_jis_2004)
{
- while (inleft > 0) {
- ucs4_t c = IN1;
+ while (*inpos < inlen) {
+ Py_UCS4 c = INCHAR1;
DBCHAR code;
Py_ssize_t insize;
if (c < 0x80) {
- WRITE1(c)
- NEXT(1, 1)
+ WRITEBYTE1(c)
+ NEXT(1, 1);
continue;
}
- DECODE_SURROGATE(c)
- insize = GET_INSIZE(c);
+ insize = 1;
if (c <= 0xFFFF) {
EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
else TRYMAP_ENC(jisx0213_bmp, code, c) {
if (code == MULTIC) {
- if (inleft < 2) {
+ if (inlen - *inpos < 2) {
if (flags & MBENC_FLUSH) {
code = find_pairencmap(
(ucs2_t)c, 0,
@@ -177,8 +176,9 @@ ENCODER(euc_jis_2004)
return MBERR_TOOFEW;
}
else {
+ Py_UCS4 c2 = INCHAR2;
code = find_pairencmap(
- (ucs2_t)c, (*inbuf)[1],
+ (ucs2_t)c, c2,
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV) {
@@ -196,8 +196,8 @@ ENCODER(euc_jis_2004)
else TRYMAP_ENC(jisxcommon, code, c);
else if (c >= 0xff61 && c <= 0xff9f) {
/* JIS X 0201 half-width katakana */
- WRITE2(0x8e, c - 0xfec0)
- NEXT(1, 2)
+ WRITEBYTE2(0x8e, c - 0xfec0)
+ NEXT(1, 2);
continue;
}
else if (c == 0xff3c)
@@ -219,12 +219,12 @@ ENCODER(euc_jis_2004)
if (code & 0x8000) {
/* Codeset 2 */
- WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
- NEXT(insize, 3)
+ WRITEBYTE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
+ NEXT(insize, 3);
} else {
/* Codeset 1 */
- WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
- NEXT(insize, 2)
+ WRITEBYTE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
+ NEXT(insize, 2);
}
}
@@ -234,14 +234,12 @@ ENCODER(euc_jis_2004)
DECODER(euc_jis_2004)
{
while (inleft > 0) {
- unsigned char c = IN1;
- ucs4_t code;
-
- REQUIRE_OUTBUF(1)
+ unsigned char c = INBYTE1;
+ Py_UCS4 code;
if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
continue;
}
@@ -250,10 +248,10 @@ DECODER(euc_jis_2004)
unsigned char c2;
REQUIRE_INBUF(2)
- c2 = IN2;
+ c2 = INBYTE2;
if (c2 >= 0xa1 && c2 <= 0xdf) {
- OUT1(0xfec0 + c2)
- NEXT(2, 1)
+ OUTCHAR(0xfec0 + c2);
+ NEXT_IN(2);
}
else
return 1;
@@ -262,46 +260,46 @@ DECODER(euc_jis_2004)
unsigned char c2, c3;
REQUIRE_INBUF(3)
- c2 = IN2 ^ 0x80;
- c3 = IN3 ^ 0x80;
+ c2 = INBYTE2 ^ 0x80;
+ c3 = INBYTE3 ^ 0x80;
/* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */
- EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf, c2, c3)
- else TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ;
- else TRYMAP_DEC(jisx0213_2_emp, code, c2, c3) {
- WRITEUCS4(EMPBASE | code)
- NEXT_IN(3)
+ EMULATE_JISX0213_2000_DECODE_PLANE2(writer, c2, c3)
+ else TRYMAP_DEC(jisx0213_2_bmp, writer, c2, c3) ;
+ else TRYMAP_DEC_CHAR(jisx0213_2_emp, code, c2, c3) {
+ OUTCHAR(EMPBASE | code);
+ NEXT_IN(3);
continue;
}
- else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ;
+ else TRYMAP_DEC(jisx0212, writer, c2, c3) ;
else return 1;
- NEXT(3, 1)
+ NEXT_IN(3);
}
else {
unsigned char c2;
REQUIRE_INBUF(2)
c ^= 0x80;
- c2 = IN2 ^ 0x80;
+ c2 = INBYTE2 ^ 0x80;
/* JIS X 0213 Plane 1 */
- EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf, c, c2)
- else if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c;
- else if (c == 0x22 && c2 == 0x32) **outbuf = 0xff5e;
- else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
- else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2);
- else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) {
- WRITEUCS4(EMPBASE | code)
- NEXT_IN(2)
+ EMULATE_JISX0213_2000_DECODE_PLANE1(writer, c, c2)
+ else if (c == 0x21 && c2 == 0x40) OUTCHAR(0xff3c);
+ else if (c == 0x22 && c2 == 0x32) OUTCHAR(0xff5e);
+ else TRYMAP_DEC(jisx0208, writer, c, c2);
+ else TRYMAP_DEC(jisx0213_1_bmp, writer, c, c2);
+ else TRYMAP_DEC_CHAR(jisx0213_1_emp, code, c, c2) {
+ OUTCHAR(EMPBASE | code);
+ NEXT_IN(2);
continue;
}
- else TRYMAP_DEC(jisx0213_pair, code, c, c2) {
- WRITE2(code >> 16, code & 0xffff)
- NEXT(2, 2)
+ else TRYMAP_DEC_CHAR(jisx0213_pair, code, c, c2) {
+ OUTCHAR2(code >> 16, code & 0xffff);
+ NEXT_IN(2);
continue;
}
else return 1;
- NEXT(2, 1)
+ NEXT_IN(2);
}
}
@@ -315,35 +313,36 @@ DECODER(euc_jis_2004)
ENCODER(euc_jp)
{
- while (inleft > 0) {
- Py_UNICODE c = IN1;
+ while (*inpos < inlen) {
+ Py_UCS4 c = INCHAR1;
DBCHAR code;
if (c < 0x80) {
- WRITE1((unsigned char)c)
- NEXT(1, 1)
+ WRITEBYTE1((unsigned char)c)
+ NEXT(1, 1);
continue;
}
- UCS4INVALID(c)
+ if (c > 0xFFFF)
+ return 1;
TRYMAP_ENC(jisxcommon, code, c);
else if (c >= 0xff61 && c <= 0xff9f) {
/* JIS X 0201 half-width katakana */
- WRITE2(0x8e, c - 0xfec0)
- NEXT(1, 2)
+ WRITEBYTE2(0x8e, c - 0xfec0)
+ NEXT(1, 2);
continue;
}
#ifndef STRICT_BUILD
else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */
code = 0x2140;
else if (c == 0xa5) { /* YEN SIGN */
- WRITE1(0x5c);
- NEXT(1, 1)
+ WRITEBYTE1(0x5c);
+ NEXT(1, 1);
continue;
} else if (c == 0x203e) { /* OVERLINE */
- WRITE1(0x7e);
- NEXT(1, 1)
+ WRITEBYTE1(0x7e);
+ NEXT(1, 1);
continue;
}
#endif
@@ -352,12 +351,12 @@ ENCODER(euc_jp)
if (code & 0x8000) {
/* JIS X 0212 */
- WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
- NEXT(1, 3)
+ WRITEBYTE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
+ NEXT(1, 3);
} else {
/* JIS X 0208 */
- WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
- NEXT(1, 2)
+ WRITEBYTE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
+ NEXT(1, 2);
}
}
@@ -367,13 +366,11 @@ ENCODER(euc_jp)
DECODER(euc_jp)
{
while (inleft > 0) {
- unsigned char c = IN1;
-
- REQUIRE_OUTBUF(1)
+ unsigned char c = INBYTE1;
if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
continue;
}
@@ -382,10 +379,10 @@ DECODER(euc_jp)
unsigned char c2;
REQUIRE_INBUF(2)
- c2 = IN2;
+ c2 = INBYTE2;
if (c2 >= 0xa1 && c2 <= 0xdf) {
- OUT1(0xfec0 + c2)
- NEXT(2, 1)
+ OUTCHAR(0xfec0 + c2);
+ NEXT_IN(2);
}
else
return 1;
@@ -394,11 +391,11 @@ DECODER(euc_jp)
unsigned char c2, c3;
REQUIRE_INBUF(3)
- c2 = IN2;
- c3 = IN3;
+ c2 = INBYTE2;
+ c3 = INBYTE3;
/* JIS X 0212 */
- TRYMAP_DEC(jisx0212, **outbuf, c2 ^ 0x80, c3 ^ 0x80) {
- NEXT(3, 1)
+ TRYMAP_DEC(jisx0212, writer, c2 ^ 0x80, c3 ^ 0x80) {
+ NEXT_IN(3);
}
else
return 1;
@@ -407,18 +404,18 @@ DECODER(euc_jp)
unsigned char c2;
REQUIRE_INBUF(2)
- c2 = IN2;
+ c2 = INBYTE2;
/* JIS X 0208 */
#ifndef STRICT_BUILD
if (c == 0xa1 && c2 == 0xc0)
/* FULL-WIDTH REVERSE SOLIDUS */
- **outbuf = 0xff3c;
+ OUTCHAR(0xff3c);
else
#endif
- TRYMAP_DEC(jisx0208, **outbuf,
+ TRYMAP_DEC(jisx0208, writer,
c ^ 0x80, c2 ^ 0x80) ;
else return 1;
- NEXT(2, 1)
+ NEXT_IN(2);
}
}
@@ -432,8 +429,8 @@ DECODER(euc_jp)
ENCODER(shift_jis)
{
- while (inleft > 0) {
- Py_UNICODE c = IN1;
+ while (*inpos < inlen) {
+ Py_UCS4 c = INCHAR1;
DBCHAR code;
unsigned char c1, c2;
@@ -445,14 +442,16 @@ ENCODER(shift_jis)
else if (c == 0x203e) code = 0x7e; /* OVERLINE */
#endif
else JISX0201_K_ENCODE(c, code)
- else UCS4INVALID(c)
- else code = NOCHAR;
+ else if (c > 0xFFFF)
+ return 1;
+ else
+ code = NOCHAR;
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
REQUIRE_OUTBUF(1)
- OUT1((unsigned char)code)
- NEXT(1, 1)
+ OUTBYTE1((unsigned char)code)
+ NEXT(1, 1);
continue;
}
@@ -475,9 +474,9 @@ ENCODER(shift_jis)
c2 = code & 0xff;
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
c1 = (c1 - 0x21) >> 1;
- OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
- OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
- NEXT(1, 2)
+ OUTBYTE1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
+ OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
+ NEXT(1, 2);
}
return 0;
@@ -486,21 +485,19 @@ ENCODER(shift_jis)
DECODER(shift_jis)
{
while (inleft > 0) {
- unsigned char c = IN1;
-
- REQUIRE_OUTBUF(1)
+ unsigned char c = INBYTE1;
#ifdef STRICT_BUILD
- JISX0201_R_DECODE(c, **outbuf)
+ JISX0201_R_DECODE(c, writer)
#else
- if (c < 0x80) **outbuf = c;
+ if (c < 0x80) OUTCHAR(c);
#endif
- else JISX0201_K_DECODE(c, **outbuf)
+ else JISX0201_K_DECODE(c, writer)
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
unsigned char c1, c2;
REQUIRE_INBUF(2)
- c2 = IN2;
+ c2 = INBYTE2;
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
return 1;
@@ -512,13 +509,13 @@ DECODER(shift_jis)
#ifndef STRICT_BUILD
if (c1 == 0x21 && c2 == 0x40) {
/* FULL-WIDTH REVERSE SOLIDUS */
- OUT1(0xff3c)
- NEXT(2, 1)
+ OUTCHAR(0xff3c);
+ NEXT_IN(2);
continue;
}
#endif
- TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
- NEXT(2, 1)
+ TRYMAP_DEC(jisx0208, writer, c1, c2) {
+ NEXT_IN(2);
continue;
}
else
@@ -527,7 +524,7 @@ DECODER(shift_jis)
else
return 1;
- NEXT(1, 1) /* JIS X 0201 */
+ NEXT_IN(1); /* JIS X 0201 */
}
return 0;
@@ -540,30 +537,29 @@ DECODER(shift_jis)
ENCODER(shift_jis_2004)
{
- while (inleft > 0) {
- ucs4_t c = IN1;
+ while (*inpos < inlen) {
+ Py_UCS4 c = INCHAR1;
DBCHAR code = NOCHAR;
int c1, c2;
Py_ssize_t insize;
JISX0201_ENCODE(c, code)
- else DECODE_SURROGATE(c)
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
- WRITE1((unsigned char)code)
- NEXT(1, 1)
+ WRITEBYTE1((unsigned char)code)
+ NEXT(1, 1);
continue;
}
REQUIRE_OUTBUF(2)
- insize = GET_INSIZE(c);
+ insize = 1;
if (code == NOCHAR) {
if (c <= 0xffff) {
EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
else TRYMAP_ENC(jisx0213_bmp, code, c) {
if (code == MULTIC) {
- if (inleft < 2) {
+ if (inlen - *inpos < 2) {
if (flags & MBENC_FLUSH) {
code = find_pairencmap
((ucs2_t)c, 0,
@@ -576,8 +572,9 @@ ENCODER(shift_jis_2004)
return MBERR_TOOFEW;
}
else {
+ Py_UCS4 ch2 = INCHAR2;
code = find_pairencmap(
- (ucs2_t)c, IN2,
+ (ucs2_t)c, ch2,
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV) {
@@ -622,10 +619,10 @@ ENCODER(shift_jis_2004)
if (c1 & 1) c2 += 0x5e;
c1 >>= 1;
- OUT1(c1 + (c1 < 0x1f ? 0x81 : 0xc1))
- OUT2(c2 + (c2 < 0x3f ? 0x40 : 0x41))
+ OUTBYTE1(c1 + (c1 < 0x1f ? 0x81 : 0xc1))
+ OUTBYTE2(c2 + (c2 < 0x3f ? 0x40 : 0x41))
- NEXT(insize, 2)
+ NEXT(insize, 2);
}
return 0;
@@ -634,16 +631,15 @@ ENCODER(shift_jis_2004)
DECODER(shift_jis_2004)
{
while (inleft > 0) {
- unsigned char c = IN1;
+ unsigned char c = INBYTE1;
- REQUIRE_OUTBUF(1)
- JISX0201_DECODE(c, **outbuf)
+ JISX0201_DECODE(c, writer)
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){
unsigned char c1, c2;
- ucs4_t code;
+ Py_UCS4 code;
REQUIRE_INBUF(2)
- c2 = IN2;
+ c2 = INBYTE2;
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
return 1;
@@ -654,50 +650,47 @@ DECODER(shift_jis_2004)
if (c1 < 0x5e) { /* Plane 1 */
c1 += 0x21;
- EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf,
+ EMULATE_JISX0213_2000_DECODE_PLANE1(writer,
c1, c2)
- else TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
- NEXT_OUT(1)
+ else TRYMAP_DEC(jisx0208, writer, c1, c2) {
}
- else TRYMAP_DEC(jisx0213_1_bmp, **outbuf,
+ else TRYMAP_DEC(jisx0213_1_bmp, writer,
c1, c2) {
- NEXT_OUT(1)
}
- else TRYMAP_DEC(jisx0213_1_emp, code, c1, c2) {
- WRITEUCS4(EMPBASE | code)
+ else TRYMAP_DEC_CHAR(jisx0213_1_emp, code, c1, c2) {
+ OUTCHAR(EMPBASE | code);
}
- else TRYMAP_DEC(jisx0213_pair, code, c1, c2) {
- WRITE2(code >> 16, code & 0xffff)
- NEXT_OUT(2)
+ else TRYMAP_DEC_CHAR(jisx0213_pair, code, c1, c2) {
+ OUTCHAR2(code >> 16, code & 0xffff);
}
else
return 1;
- NEXT_IN(2)
+ NEXT_IN(2);
}
else { /* Plane 2 */
if (c1 >= 0x67) c1 += 0x07;
else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37;
else c1 -= 0x3d;
- EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf,
+ EMULATE_JISX0213_2000_DECODE_PLANE2(writer,
c1, c2)
- else TRYMAP_DEC(jisx0213_2_bmp, **outbuf,
- c1, c2) ;
- else TRYMAP_DEC(jisx0213_2_emp, code, c1, c2) {
- WRITEUCS4(EMPBASE | code)
- NEXT_IN(2)
+ else TRYMAP_DEC(jisx0213_2_bmp, writer,
+ c1, c2) {
+ } else TRYMAP_DEC_CHAR(jisx0213_2_emp, code, c1, c2) {
+ OUTCHAR(EMPBASE | code);
+ NEXT_IN(2);
continue;
}
else
return 1;
- NEXT(2, 1)
+ NEXT_IN(2);
}
continue;
}
else
return 1;
- NEXT(1, 1) /* JIS X 0201 */
+ NEXT_IN(1); /* JIS X 0201 */
}
return 0;
diff --git a/Modules/cjkcodecs/_codecs_kr.c b/Modules/cjkcodecs/_codecs_kr.c
index f5697dd..0c2309d 100644
--- a/Modules/cjkcodecs/_codecs_kr.c
+++ b/Modules/cjkcodecs/_codecs_kr.c
@@ -33,16 +33,18 @@ static const unsigned char u2cgk_jongseong[28] = {
ENCODER(euc_kr)
{
- while (inleft > 0) {
- Py_UNICODE c = IN1;
+ while (*inpos < inlen) {
+ Py_UCS4 c = INCHAR1;
DBCHAR code;
if (c < 0x80) {
- WRITE1((unsigned char)c)
- NEXT(1, 1)
+ WRITEBYTE1((unsigned char)c)
+ NEXT(1, 1);
continue;
}
- UCS4INVALID(c)
+
+ if (c > 0xFFFF)
+ return 1;
REQUIRE_OUTBUF(2)
TRYMAP_ENC(cp949, code, c);
@@ -50,9 +52,9 @@ ENCODER(euc_kr)
if ((code & 0x8000) == 0) {
/* KS X 1001 coded character */
- OUT1((code >> 8) | 0x80)
- OUT2((code & 0xFF) | 0x80)
- NEXT(1, 2)
+ OUTBYTE1((code >> 8) | 0x80)
+ OUTBYTE2((code & 0xFF) | 0x80)
+ NEXT(1, 2);
}
else { /* Mapping is found in CP949 extension,
* but we encode it in KS X 1001:1998 Annex 3,
@@ -61,23 +63,23 @@ ENCODER(euc_kr)
REQUIRE_OUTBUF(8)
/* syllable composition precedence */
- OUT1(EUCKR_JAMO_FIRSTBYTE)
- OUT2(EUCKR_JAMO_FILLER)
+ OUTBYTE1(EUCKR_JAMO_FIRSTBYTE)
+ OUTBYTE2(EUCKR_JAMO_FILLER)
/* All codepoints in CP949 extension are in unicode
* Hangul Syllable area. */
assert(0xac00 <= c && c <= 0xd7a3);
c -= 0xac00;
- OUT3(EUCKR_JAMO_FIRSTBYTE)
- OUT4(u2cgk_choseong[c / 588])
- NEXT_OUT(4)
+ OUTBYTE3(EUCKR_JAMO_FIRSTBYTE)
+ OUTBYTE4(u2cgk_choseong[c / 588])
+ NEXT_OUT(4);
- OUT1(EUCKR_JAMO_FIRSTBYTE)
- OUT2(u2cgk_jungseong[(c / 28) % 21])
- OUT3(EUCKR_JAMO_FIRSTBYTE)
- OUT4(u2cgk_jongseong[c % 28])
- NEXT(1, 4)
+ OUTBYTE1(EUCKR_JAMO_FIRSTBYTE)
+ OUTBYTE2(u2cgk_jungseong[(c / 28) % 21])
+ OUTBYTE3(EUCKR_JAMO_FIRSTBYTE)
+ OUTBYTE4(u2cgk_jongseong[c % 28])
+ NEXT(1, 4);
}
}
@@ -102,20 +104,18 @@ static const unsigned char cgk2u_jongseong[] = { /* [A1, BE] */
DECODER(euc_kr)
{
while (inleft > 0) {
- unsigned char c = IN1;
-
- REQUIRE_OUTBUF(1)
+ unsigned char c = INBYTE1;
if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2)
if (c == EUCKR_JAMO_FIRSTBYTE &&
- IN2 == EUCKR_JAMO_FILLER) {
+ INBYTE2 == EUCKR_JAMO_FILLER) {
/* KS X 1001:1998 Annex 3 make-up sequence */
DBCHAR cho, jung, jong;
@@ -145,11 +145,11 @@ DECODER(euc_kr)
if (cho == NONE || jung == NONE || jong == NONE)
return 1;
- OUT1(0xac00 + cho*588 + jung*28 + jong);
- NEXT(8, 1)
+ OUTCHAR(0xac00 + cho*588 + jung*28 + jong);
+ NEXT_IN(8);
}
- else TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
- NEXT(2, 1)
+ else TRYMAP_DEC(ksx1001, writer, c ^ 0x80, INBYTE2 ^ 0x80) {
+ NEXT_IN(2);
}
else
return 1;
@@ -166,27 +166,29 @@ DECODER(euc_kr)
ENCODER(cp949)
{
- while (inleft > 0) {
- Py_UNICODE c = IN1;
+ while (*inpos < inlen) {
+ Py_UCS4 c = INCHAR1;
DBCHAR code;
if (c < 0x80) {
- WRITE1((unsigned char)c)
- NEXT(1, 1)
+ WRITEBYTE1((unsigned char)c)
+ NEXT(1, 1);
continue;
}
- UCS4INVALID(c)
+
+ if (c > 0xFFFF)
+ return 1;
REQUIRE_OUTBUF(2)
TRYMAP_ENC(cp949, code, c);
else return 1;
- OUT1((code >> 8) | 0x80)
+ OUTBYTE1((code >> 8) | 0x80)
if (code & 0x8000)
- OUT2(code & 0xFF) /* MSB set: CP949 */
+ OUTBYTE2(code & 0xFF) /* MSB set: CP949 */
else
- OUT2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */
- NEXT(1, 2)
+ OUTBYTE2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */
+ NEXT(1, 2);
}
return 0;
@@ -195,22 +197,20 @@ ENCODER(cp949)
DECODER(cp949)
{
while (inleft > 0) {
- unsigned char c = IN1;
-
- REQUIRE_OUTBUF(1)
+ unsigned char c = INBYTE1;
if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2)
- TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);
- else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);
+ TRYMAP_DEC(ksx1001, writer, c ^ 0x80, INBYTE2 ^ 0x80);
+ else TRYMAP_DEC(cp949ext, writer, c, INBYTE2);
else return 1;
- NEXT(2, 1)
+ NEXT_IN(2);
}
return 0;
@@ -250,16 +250,18 @@ static const DBCHAR u2johabjamo[] = {
ENCODER(johab)
{
- while (inleft > 0) {
- Py_UNICODE c = IN1;
+ while (*inpos < inlen) {
+ Py_UCS4 c = INCHAR1;
DBCHAR code;
if (c < 0x80) {
- WRITE1((unsigned char)c)
- NEXT(1, 1)
+ WRITEBYTE1((unsigned char)c)
+ NEXT(1, 1);
continue;
}
- UCS4INVALID(c)
+
+ if (c > 0xFFFF)
+ return 1;
REQUIRE_OUTBUF(2)
@@ -285,9 +287,9 @@ ENCODER(johab)
t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) :
(c1 - 0x21 + 0x197));
t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21);
- OUT1(t1 >> 1)
- OUT2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43)
- NEXT(1, 2)
+ OUTBYTE1(t1 >> 1)
+ OUTBYTE2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43)
+ NEXT(1, 2);
continue;
}
else
@@ -296,9 +298,9 @@ ENCODER(johab)
else
return 1;
- OUT1(code >> 8)
- OUT2(code & 0xff)
- NEXT(1, 2)
+ OUTBYTE1(code >> 8)
+ OUTBYTE2(code & 0xff)
+ NEXT(1, 2);
}
return 0;
@@ -348,18 +350,16 @@ static const unsigned char johabjamo_jongseong[32] = {
DECODER(johab)
{
while (inleft > 0) {
- unsigned char c = IN1, c2;
-
- REQUIRE_OUTBUF(1)
+ unsigned char c = INBYTE1, c2;
if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2)
- c2 = IN2;
+ c2 = INBYTE2;
if (c < 0xd8) {
/* johab hangul */
@@ -381,33 +381,33 @@ DECODER(johab)
if (i_cho == FILL) {
if (i_jung == FILL) {
if (i_jong == FILL)
- OUT1(0x3000)
+ OUTCHAR(0x3000);
else
- OUT1(0x3100 |
- johabjamo_jongseong[c_jong])
+ OUTCHAR(0x3100 |
+ johabjamo_jongseong[c_jong]);
}
else {
if (i_jong == FILL)
- OUT1(0x3100 |
- johabjamo_jungseong[c_jung])
+ OUTCHAR(0x3100 |
+ johabjamo_jungseong[c_jung]);
else
return 1;
}
} else {
if (i_jung == FILL) {
if (i_jong == FILL)
- OUT1(0x3100 |
- johabjamo_choseong[c_cho])
+ OUTCHAR(0x3100 |
+ johabjamo_choseong[c_cho]);
else
return 1;
}
else
- OUT1(0xac00 +
- i_cho * 588 +
- i_jung * 28 +
- (i_jong == FILL ? 0 : i_jong))
+ OUTCHAR(0xac00 +
+ i_cho * 588 +
+ i_jung * 28 +
+ (i_jong == FILL ? 0 : i_jong));
}
- NEXT(2, 1)
+ NEXT_IN(2);
} else {
/* KS X 1001 except hangul jamos and syllables */
if (c == 0xdf || c > 0xf9 ||
@@ -424,9 +424,9 @@ DECODER(johab)
t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21;
t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
- TRYMAP_DEC(ksx1001, **outbuf, t1, t2);
+ TRYMAP_DEC(ksx1001, writer, t1, t2);
else return 1;
- NEXT(2, 1)
+ NEXT_IN(2);
}
}
}
diff --git a/Modules/cjkcodecs/_codecs_tw.c b/Modules/cjkcodecs/_codecs_tw.c
index 916298d..80e0b81 100644
--- a/Modules/cjkcodecs/_codecs_tw.c
+++ b/Modules/cjkcodecs/_codecs_tw.c
@@ -13,26 +13,28 @@
ENCODER(big5)
{
- while (inleft > 0) {
- Py_UNICODE c = **inbuf;
+ while (*inpos < inlen) {
+ Py_UCS4 c = INCHAR1;
DBCHAR code;
if (c < 0x80) {
REQUIRE_OUTBUF(1)
**outbuf = (unsigned char)c;
- NEXT(1, 1)
+ NEXT(1, 1);
continue;
}
- UCS4INVALID(c)
+
+ if (c > 0xFFFF)
+ return 1;
REQUIRE_OUTBUF(2)
TRYMAP_ENC(big5, code, c);
else return 1;
- OUT1(code >> 8)
- OUT2(code & 0xFF)
- NEXT(1, 2)
+ OUTBYTE1(code >> 8)
+ OUTBYTE2(code & 0xFF)
+ NEXT(1, 2);
}
return 0;
@@ -41,19 +43,17 @@ ENCODER(big5)
DECODER(big5)
{
while (inleft > 0) {
- unsigned char c = IN1;
-
- REQUIRE_OUTBUF(1)
+ unsigned char c = INBYTE1;
if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2)
- TRYMAP_DEC(big5, **outbuf, c, IN2) {
- NEXT(2, 1)
+ TRYMAP_DEC(big5, writer, c, INBYTE2) {
+ NEXT_IN(2);
}
else return 1;
}
@@ -68,25 +68,27 @@ DECODER(big5)
ENCODER(cp950)
{
- while (inleft > 0) {
- Py_UNICODE c = IN1;
+ while (*inpos < inlen) {
+ Py_UCS4 c = INCHAR1;
DBCHAR code;
if (c < 0x80) {
- WRITE1((unsigned char)c)
- NEXT(1, 1)
+ WRITEBYTE1((unsigned char)c)
+ NEXT(1, 1);
continue;
}
- UCS4INVALID(c)
+
+ if (c > 0xFFFF)
+ return 1;
REQUIRE_OUTBUF(2)
TRYMAP_ENC(cp950ext, code, c);
else TRYMAP_ENC(big5, code, c);
else return 1;
- OUT1(code >> 8)
- OUT2(code & 0xFF)
- NEXT(1, 2)
+ OUTBYTE1(code >> 8)
+ OUTBYTE2(code & 0xFF)
+ NEXT(1, 2);
}
return 0;
@@ -95,23 +97,21 @@ ENCODER(cp950)
DECODER(cp950)
{
while (inleft > 0) {
- unsigned char c = IN1;
-
- REQUIRE_OUTBUF(1)
+ unsigned char c = INBYTE1;
if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2)
- TRYMAP_DEC(cp950ext, **outbuf, c, IN2);
- else TRYMAP_DEC(big5, **outbuf, c, IN2);
+ TRYMAP_DEC(cp950ext, writer, c, INBYTE2);
+ else TRYMAP_DEC(big5, writer, c, INBYTE2);
else return 1;
- NEXT(2, 1)
+ NEXT_IN(2);
}
return 0;
diff --git a/Modules/cjkcodecs/alg_jisx0201.h b/Modules/cjkcodecs/alg_jisx0201.h
index 0bc7db5..98c63e6 100644
--- a/Modules/cjkcodecs/alg_jisx0201.h
+++ b/Modules/cjkcodecs/alg_jisx0201.h
@@ -10,15 +10,24 @@
JISX0201_R_ENCODE(c, assi) \
else JISX0201_K_ENCODE(c, assi)
-#define JISX0201_R_DECODE(c, assi) \
+#define JISX0201_R_DECODE_CHAR(c, assi) \
if ((c) < 0x5c) (assi) = (c); \
else if ((c) == 0x5c) (assi) = 0x00a5; \
else if ((c) < 0x7e) (assi) = (c); \
else if ((c) == 0x7e) (assi) = 0x203e; \
else if ((c) == 0x7f) (assi) = 0x7f;
-#define JISX0201_K_DECODE(c, assi) \
+#define JISX0201_R_DECODE(c, writer) \
+ if ((c) < 0x5c) OUTCHAR(c); \
+ else if ((c) == 0x5c) OUTCHAR(0x00a5); \
+ else if ((c) < 0x7e) OUTCHAR(c); \
+ else if ((c) == 0x7e) OUTCHAR(0x203e); \
+ else if ((c) == 0x7f) OUTCHAR(0x7f);
+#define JISX0201_K_DECODE(c, writer) \
if ((c) >= 0xa1 && (c) <= 0xdf) \
- (assi) = 0xfec0 + (c);
-#define JISX0201_DECODE(c, assi) \
- JISX0201_R_DECODE(c, assi) \
- else JISX0201_K_DECODE(c, assi)
+ OUTCHAR(0xfec0 + (c));
+#define JISX0201_K_DECODE_CHAR(c, assi) \
+ if ((c) >= 0xa1 && (c) <= 0xdf) \
+ (assi) = 0xfec0 + (c);
+#define JISX0201_DECODE(c, writer) \
+ JISX0201_R_DECODE(c, writer) \
+ else JISX0201_K_DECODE(c, writer)
diff --git a/Modules/cjkcodecs/cjkcodecs.h b/Modules/cjkcodecs/cjkcodecs.h
index ab0682a..18cc02f 100644
--- a/Modules/cjkcodecs/cjkcodecs.h
+++ b/Modules/cjkcodecs/cjkcodecs.h
@@ -33,7 +33,7 @@ struct dbcs_index {
typedef struct dbcs_index decode_map;
struct widedbcs_index {
- const ucs4_t *map;
+ const Py_UCS4 *map;
unsigned char bottom, top;
};
typedef struct widedbcs_index widedecode_map;
@@ -56,7 +56,7 @@ struct dbcs_map {
};
struct pair_encodemap {
- ucs4_t uniseq;
+ Py_UCS4 uniseq;
DBCHAR code;
};
@@ -72,7 +72,8 @@ static const struct dbcs_map *mapping_list;
#define ENCODER(encoding) \
static Py_ssize_t encoding##_encode( \
MultibyteCodec_State *state, const void *config, \
- const Py_UNICODE **inbuf, Py_ssize_t inleft, \
+ int kind, void *data, \
+ Py_ssize_t *inpos, Py_ssize_t inlen, \
unsigned char **outbuf, Py_ssize_t outleft, int flags)
#define ENCODER_RESET(encoding) \
static Py_ssize_t encoding##_encode_reset( \
@@ -86,28 +87,30 @@ static const struct dbcs_map *mapping_list;
static Py_ssize_t encoding##_decode( \
MultibyteCodec_State *state, const void *config, \
const unsigned char **inbuf, Py_ssize_t inleft, \
- Py_UNICODE **outbuf, Py_ssize_t outleft)
+ _PyUnicodeWriter *writer)
#define DECODER_RESET(encoding) \
static Py_ssize_t encoding##_decode_reset( \
MultibyteCodec_State *state, const void *config)
-#if Py_UNICODE_SIZE == 4
-#define UCS4INVALID(code) \
- if ((code) > 0xFFFF) \
- return 1;
-#else
-#define UCS4INVALID(code) \
- if (0) ;
-#endif
-
#define NEXT_IN(i) \
- (*inbuf) += (i); \
- (inleft) -= (i);
+ do { \
+ (*inbuf) += (i); \
+ (inleft) -= (i); \
+ } while (0)
+#define NEXT_INCHAR(i) \
+ do { \
+ (*inpos) += (i); \
+ } while (0)
#define NEXT_OUT(o) \
- (*outbuf) += (o); \
- (outleft) -= (o);
+ do { \
+ (*outbuf) += (o); \
+ (outleft) -= (o); \
+ } while (0)
#define NEXT(i, o) \
- NEXT_IN(i) NEXT_OUT(o)
+ do { \
+ NEXT_INCHAR(i); \
+ NEXT_OUT(o); \
+ } while (0)
#define REQUIRE_INBUF(n) \
if (inleft < (n)) \
@@ -116,48 +119,55 @@ static const struct dbcs_map *mapping_list;
if (outleft < (n)) \
return MBERR_TOOSMALL;
-#define IN1 ((*inbuf)[0])
-#define IN2 ((*inbuf)[1])
-#define IN3 ((*inbuf)[2])
-#define IN4 ((*inbuf)[3])
-
-#define OUT1(c) ((*outbuf)[0]) = (c);
-#define OUT2(c) ((*outbuf)[1]) = (c);
-#define OUT3(c) ((*outbuf)[2]) = (c);
-#define OUT4(c) ((*outbuf)[3]) = (c);
-
-#define WRITE1(c1) \
+#define INBYTE1 ((*inbuf)[0])
+#define INBYTE2 ((*inbuf)[1])
+#define INBYTE3 ((*inbuf)[2])
+#define INBYTE4 ((*inbuf)[3])
+
+#define INCHAR1 PyUnicode_READ(kind, data, *inpos)
+#define INCHAR2 PyUnicode_READ(kind, data, *inpos + 1)
+
+#define OUTCHAR(c) \
+ do { \
+ if (_PyUnicodeWriter_WriteChar(writer, (c)) < 0) \
+ return MBERR_TOOSMALL; \
+ } while (0)
+
+#define OUTCHAR2(c1, c2) \
+ do { \
+ Py_UCS4 _c1 = (c1); \
+ Py_UCS4 _c2 = (c2); \
+ if (_PyUnicodeWriter_Prepare(writer, 2, Py_MAX(_c1, c2)) < 0) \
+ return MBERR_TOOSMALL; \
+ PyUnicode_WRITE(writer->kind, writer->data, writer->pos, _c1); \
+ PyUnicode_WRITE(writer->kind, writer->data, writer->pos + 1, _c2); \
+ writer->pos += 2; \
+ } while (0)
+
+#define OUTBYTE1(c) ((*outbuf)[0]) = (c);
+#define OUTBYTE2(c) ((*outbuf)[1]) = (c);
+#define OUTBYTE3(c) ((*outbuf)[2]) = (c);
+#define OUTBYTE4(c) ((*outbuf)[3]) = (c);
+
+#define WRITEBYTE1(c1) \
REQUIRE_OUTBUF(1) \
(*outbuf)[0] = (c1);
-#define WRITE2(c1, c2) \
+#define WRITEBYTE2(c1, c2) \
REQUIRE_OUTBUF(2) \
(*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2);
-#define WRITE3(c1, c2, c3) \
+#define WRITEBYTE3(c1, c2, c3) \
REQUIRE_OUTBUF(3) \
(*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2); \
(*outbuf)[2] = (c3);
-#define WRITE4(c1, c2, c3, c4) \
+#define WRITEBYTE4(c1, c2, c3, c4) \
REQUIRE_OUTBUF(4) \
(*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2); \
(*outbuf)[2] = (c3); \
(*outbuf)[3] = (c4);
-#if Py_UNICODE_SIZE == 2
-# define WRITEUCS4(c) \
- REQUIRE_OUTBUF(2) \
- (*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10); \
- (*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff); \
- NEXT_OUT(2)
-#else
-# define WRITEUCS4(c) \
- REQUIRE_OUTBUF(1) \
- **outbuf = (Py_UNICODE)(c); \
- NEXT_OUT(1)
-#endif
-
#define _TRYMAP_ENC(m, assi, val) \
((m)->map != NULL && (val) >= (m)->bottom && \
(val)<= (m)->top && ((assi) = (m)->map[(val) - \
@@ -167,39 +177,41 @@ static const struct dbcs_map *mapping_list;
#define TRYMAP_ENC(charset, assi, uni) \
if TRYMAP_ENC_COND(charset, assi, uni)
-#define _TRYMAP_DEC(m, assi, val) \
- ((m)->map != NULL && (val) >= (m)->bottom && \
- (val)<= (m)->top && ((assi) = (m)->map[(val) - \
- (m)->bottom]) != UNIINV)
-#define TRYMAP_DEC(charset, assi, c1, c2) \
- if _TRYMAP_DEC(&charset##_decmap[c1], assi, c2)
+Py_LOCAL_INLINE(int)
+_TRYMAP_DEC_WRITE(_PyUnicodeWriter *writer, Py_UCS4 c)
+{
+ if (c == UNIINV || _PyUnicodeWriter_WriteChar(writer, c) < 0)
+ return UNIINV;
+ else
+ return c;
+}
-#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) \
- ((m)->map != NULL && (val) >= (m)->bottom && \
- (val)<= (m)->top && \
- ((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \
+#define _TRYMAP_DEC(m, writer, val) \
+ ((m)->map != NULL && \
+ (val) >= (m)->bottom && \
+ (val)<= (m)->top && \
+ _TRYMAP_DEC_WRITE(writer, (m)->map[(val) - (m)->bottom]) != UNIINV)
+#define _TRYMAP_DEC_CHAR(m, assi, val) \
+ ((m)->map != NULL && \
+ (val) >= (m)->bottom && \
+ (val)<= (m)->top && \
+ ((assi) = (m)->map[(val) - (m)->bottom]) != UNIINV)
+#define TRYMAP_DEC(charset, writer, c1, c2) \
+ if _TRYMAP_DEC(&charset##_decmap[c1], writer, c2)
+#define TRYMAP_DEC_CHAR(charset, assi, c1, c2) \
+ if _TRYMAP_DEC_CHAR(&charset##_decmap[c1], assi, c2)
+
+#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) \
+ ((m)->map != NULL && (val) >= (m)->bottom && \
+ (val)<= (m)->top && \
+ ((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \
(((asshi) = (m)->map[((val) - (m)->bottom)*3 + 1]), 1) && \
(((asslo) = (m)->map[((val) - (m)->bottom)*3 + 2]), 1))
#define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) \
if _TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], \
assplane, asshi, asslo, (uni) & 0xff)
-#define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2) \
- if _TRYMAP_DEC(&charset##_decmap[plane][c1], assi, c2)
-
-#if Py_UNICODE_SIZE == 2
-#define DECODE_SURROGATE(c) \
- if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ \
- REQUIRE_INBUF(2) \
- if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ \
- c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \
- ((ucs4_t)(IN2) - 0xdc00); \
- } \
- }
-#define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1)
-#else
-#define DECODE_SURROGATE(c) {;}
-#define GET_INSIZE(c) 1
-#endif
+#define TRYMAP_DEC_MPLANE(charset, writer, plane, c1, c2) \
+ if _TRYMAP_DEC(&charset##_decmap[plane][c1], writer, c2)
#define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = {
#define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL},
@@ -324,7 +336,7 @@ find_pairencmap(ucs2_t body, ucs2_t modifier,
const struct pair_encodemap *haystack, int haystacksize)
{
int pos, min, max;
- ucs4_t value = body << 16 | modifier;
+ Py_UCS4 value = body << 16 | modifier;
min = 0;
max = haystacksize;
diff --git a/Modules/cjkcodecs/emu_jisx0213_2000.h b/Modules/cjkcodecs/emu_jisx0213_2000.h
index 4227fb2..877337e 100644
--- a/Modules/cjkcodecs/emu_jisx0213_2000.h
+++ b/Modules/cjkcodecs/emu_jisx0213_2000.h
@@ -38,6 +38,9 @@
((c1) == 0x7E && (c2) == 0x7E))) \
return EMULATE_JISX0213_2000_DECODE_INVALID;
-#define EMULATE_JISX0213_2000_DECODE_PLANE2(assi, c1, c2) \
+#define EMULATE_JISX0213_2000_DECODE_PLANE2(writer, c1, c2) \
+ if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B) \
+ OUTCHAR(0x9B1D);
+#define EMULATE_JISX0213_2000_DECODE_PLANE2_CHAR(assi, c1, c2) \
if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B) \
(assi) = 0x9B1D;
diff --git a/Modules/cjkcodecs/mappings_cn.h b/Modules/cjkcodecs/mappings_cn.h
index a6dcebf..1f8c299 100644
--- a/Modules/cjkcodecs/mappings_cn.h
+++ b/Modules/cjkcodecs/mappings_cn.h
@@ -4049,7 +4049,7 @@ __gb18030ext_encmap+3126,0,100},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
static const struct _gb18030_to_unibmp_ranges {
- Py_UNICODE first, last;
+ Py_UCS4 first, last;
DBCHAR base;
} gb18030_to_unibmp_ranges[] = {
{128,163,0},{165,166,36},{169,175,38},{178,182,45},{184,214,50},{216,223,81},{
diff --git a/Modules/cjkcodecs/mappings_jisx0213_pair.h b/Modules/cjkcodecs/mappings_jisx0213_pair.h
index eda8e9e..729e4bc 100644
--- a/Modules/cjkcodecs/mappings_jisx0213_pair.h
+++ b/Modules/cjkcodecs/mappings_jisx0213_pair.h
@@ -3,7 +3,7 @@
static const struct widedbcs_index *jisx0213_pair_decmap;
static const struct pair_encodemap *jisx0213_pair_encmap;
#else
-static const ucs4_t __jisx0213_pair_decmap[49] = {
+static const Py_UCS4 __jisx0213_pair_decmap[49] = {
810234010,810365082,810496154,810627226,810758298,816525466,816656538,
816787610,816918682,817049754,817574042,818163866,818426010,838283418,
15074048,U,U,U,39060224,39060225,42730240,42730241,39387904,39387905,39453440,
diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c
index c032cdb..b449953 100644
--- a/Modules/cjkcodecs/multibytecodec.c
+++ b/Modules/cjkcodecs/multibytecodec.c
@@ -10,15 +10,16 @@
#include "multibytecodec.h"
typedef struct {
- const Py_UNICODE *inbuf, *inbuf_top, *inbuf_end;
+ PyObject *inobj;
+ Py_ssize_t inpos, inlen;
unsigned char *outbuf, *outbuf_end;
PyObject *excobj, *outobj;
} MultibyteEncodeBuffer;
typedef struct {
const unsigned char *inbuf, *inbuf_top, *inbuf_end;
- Py_UNICODE *outbuf, *outbuf_end;
- PyObject *excobj, *outobj;
+ PyObject *excobj;
+ _PyUnicodeWriter writer;
} MultibyteDecodeBuffer;
PyDoc_STRVAR(MultibyteCodec_Encode__doc__,
@@ -45,7 +46,7 @@ static char *incrementalkwarglist[] = {"input", "final", NULL};
static char *streamkwarglist[] = {"stream", "errors", NULL};
static PyObject *multibytecodec_encode(MultibyteCodec *,
- MultibyteCodec_State *, const Py_UNICODE **, Py_ssize_t,
+ MultibyteCodec_State *, PyObject *, Py_ssize_t *,
PyObject *, int);
#define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */
@@ -197,29 +198,6 @@ expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
goto errorexit; \
}
-static int
-expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize)
-{
- Py_ssize_t orgpos, orgsize;
-
- orgpos = (Py_ssize_t)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj));
- orgsize = PyUnicode_GET_SIZE(buf->outobj);
- if (PyUnicode_Resize(&buf->outobj, orgsize + (
- esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)
- return -1;
-
- buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj) + orgpos;
- buf->outbuf_end = PyUnicode_AS_UNICODE(buf->outobj)
- + PyUnicode_GET_SIZE(buf->outobj);
-
- return 0;
-}
-#define REQUIRE_DECODEBUFFER(buf, s) { \
- if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \
- if (expand_decodebuffer(buf, s) == -1) \
- goto errorexit; \
-}
-
/**
* MultibyteCodec object
@@ -247,7 +225,7 @@ multibytecodec_encerror(MultibyteCodec *codec,
return 0; /* retry it */
case MBERR_TOOFEW:
reason = "incomplete multibyte sequence";
- esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
+ esize = (Py_ssize_t)buf->inpos;
break;
case MBERR_INTERNAL:
PyErr_SetString(PyExc_RuntimeError,
@@ -261,14 +239,24 @@ multibytecodec_encerror(MultibyteCodec *codec,
}
if (errors == ERROR_REPLACE) {
- const Py_UNICODE replchar = '?', *inbuf = &replchar;
+ PyObject *replchar;
Py_ssize_t r;
+ Py_ssize_t inpos;
+ int kind;
+ void *data;
+ replchar = PyUnicode_FromOrdinal('?');
+ if (replchar == NULL)
+ goto errorexit;
+ kind = PyUnicode_KIND(replchar);
+ data = PyUnicode_DATA(replchar);
+
+ inpos = 0;
for (;;) {
- Py_ssize_t outleft;
+ Py_ssize_t outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
- outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
- r = codec->encode(state, codec->config, &inbuf, 1,
+ r = codec->encode(state, codec->config,
+ kind, data, &inpos, 1,
&buf->outbuf, outleft, 0);
if (r == MBERR_TOOSMALL) {
REQUIRE_ENCODEBUFFER(buf, -1);
@@ -278,25 +266,27 @@ multibytecodec_encerror(MultibyteCodec *codec,
break;
}
+ Py_DECREF(replchar);
+
if (r != 0) {
REQUIRE_ENCODEBUFFER(buf, 1);
*buf->outbuf++ = '?';
}
}
if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
- buf->inbuf += esize;
+ buf->inpos += esize;
return 0;
}
- start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
+ start = (Py_ssize_t)buf->inpos;
end = start + esize;
/* use cached exception object if available */
if (buf->excobj == NULL) {
- buf->excobj = PyUnicodeEncodeError_Create(codec->encoding,
- buf->inbuf_top,
- buf->inbuf_end - buf->inbuf_top,
- start, end, reason);
+ buf->excobj = PyObject_CallFunction(PyExc_UnicodeEncodeError,
+ "sOnns",
+ codec->encoding, buf->inobj,
+ start, end, reason);
if (buf->excobj == NULL)
goto errorexit;
}
@@ -325,10 +315,10 @@ multibytecodec_encerror(MultibyteCodec *codec,
}
if (PyUnicode_Check(tobj)) {
- const Py_UNICODE *uraw = PyUnicode_AS_UNICODE(tobj);
+ Py_ssize_t inpos;
- retstr = multibytecodec_encode(codec, state, &uraw,
- PyUnicode_GET_SIZE(tobj), ERROR_STRICT,
+ retstr = multibytecodec_encode(codec, state, tobj,
+ &inpos, ERROR_STRICT,
MBENC_FLUSH);
if (retstr == NULL)
goto errorexit;
@@ -347,15 +337,15 @@ multibytecodec_encerror(MultibyteCodec *codec,
newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
if (newpos < 0 && !PyErr_Occurred())
- newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
- if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
+ newpos += (Py_ssize_t)buf->inlen;
+ if (newpos < 0 || newpos > buf->inlen) {
PyErr_Clear();
PyErr_Format(PyExc_IndexError,
"position %zd from error handler out of bounds",
newpos);
goto errorexit;
}
- buf->inbuf = buf->inbuf_top + newpos;
+ buf->inpos = newpos;
Py_DECREF(retobj);
Py_DECREF(retstr);
@@ -374,7 +364,7 @@ multibytecodec_decerror(MultibyteCodec *codec,
PyObject *errors, Py_ssize_t e)
{
PyObject *retobj = NULL, *retuni = NULL;
- Py_ssize_t retunisize, newpos;
+ Py_ssize_t newpos;
const char *reason;
Py_ssize_t esize, start, end;
@@ -385,7 +375,6 @@ multibytecodec_decerror(MultibyteCodec *codec,
else {
switch (e) {
case MBERR_TOOSMALL:
- REQUIRE_DECODEBUFFER(buf, -1);
return 0; /* retry it */
case MBERR_TOOFEW:
reason = "incomplete multibyte sequence";
@@ -403,8 +392,9 @@ multibytecodec_decerror(MultibyteCodec *codec,
}
if (errors == ERROR_REPLACE) {
- REQUIRE_DECODEBUFFER(buf, 1);
- *buf->outbuf++ = Py_UNICODE_REPLACEMENT_CHARACTER;
+ if (_PyUnicodeWriter_WriteChar(&buf->writer,
+ Py_UNICODE_REPLACEMENT_CHARACTER) < 0)
+ goto errorexit;
}
if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
buf->inbuf += esize;
@@ -447,15 +437,8 @@ multibytecodec_decerror(MultibyteCodec *codec,
goto errorexit;
}
- if (PyUnicode_AsUnicode(retuni) == NULL)
+ if (_PyUnicodeWriter_WriteStr(&buf->writer, retuni) < 0)
goto errorexit;
- retunisize = PyUnicode_GET_SIZE(retuni);
- if (retunisize > 0) {
- REQUIRE_DECODEBUFFER(buf, retunisize);
- memcpy((char *)buf->outbuf, PyUnicode_AS_UNICODE(retuni),
- retunisize * Py_UNICODE_SIZE);
- buf->outbuf += retunisize;
- }
newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
if (newpos < 0 && !PyErr_Occurred())
@@ -479,19 +462,29 @@ errorexit:
static PyObject *
multibytecodec_encode(MultibyteCodec *codec,
MultibyteCodec_State *state,
- const Py_UNICODE **data, Py_ssize_t datalen,
+ PyObject *text, Py_ssize_t *inpos_t,
PyObject *errors, int flags)
{
MultibyteEncodeBuffer buf;
Py_ssize_t finalsize, r = 0;
+ Py_ssize_t datalen;
+ int kind;
+ void *data;
+
+ if (PyUnicode_READY(text) < 0)
+ return NULL;
+ datalen = PyUnicode_GET_LENGTH(text);
if (datalen == 0 && !(flags & MBENC_RESET))
return PyBytes_FromStringAndSize(NULL, 0);
buf.excobj = NULL;
buf.outobj = NULL;
- buf.inbuf = buf.inbuf_top = *data;
- buf.inbuf_end = buf.inbuf_top + datalen;
+ buf.inobj = text; /* borrowed reference */
+ buf.inpos = 0;
+ buf.inlen = datalen;
+ kind = PyUnicode_KIND(buf.inobj);
+ data = PyUnicode_DATA(buf.inobj);
if (datalen > (PY_SSIZE_T_MAX - 16) / 2) {
PyErr_NoMemory();
@@ -504,14 +497,14 @@ multibytecodec_encode(MultibyteCodec *codec,
buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj);
buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj);
- while (buf.inbuf < buf.inbuf_end) {
- Py_ssize_t inleft, outleft;
-
+ while (buf.inpos < buf.inlen) {
/* we don't reuse inleft and outleft here.
* error callbacks can relocate the cursor anywhere on buffer*/
- inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
- outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
- r = codec->encode(state, codec->config, &buf.inbuf, inleft,
+ Py_ssize_t outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
+
+ r = codec->encode(state, codec->config,
+ kind, data,
+ &buf.inpos, buf.inlen,
&buf.outbuf, outleft, flags);
if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))
break;
@@ -542,7 +535,8 @@ multibytecodec_encode(MultibyteCodec *codec,
if (_PyBytes_Resize(&buf.outobj, finalsize) == -1)
goto errorexit;
- *data = buf.inbuf;
+ if (inpos_t)
+ *inpos_t = buf.inpos;
Py_XDECREF(buf.excobj);
return buf.outobj;
@@ -557,7 +551,6 @@ MultibyteCodec_Encode(MultibyteCodecObject *self,
PyObject *args, PyObject *kwargs)
{
MultibyteCodec_State state;
- Py_UNICODE *data;
PyObject *errorcb, *r, *arg, *ucvt;
const char *errors = NULL;
Py_ssize_t datalen;
@@ -580,11 +573,11 @@ MultibyteCodec_Encode(MultibyteCodecObject *self,
}
}
- data = PyUnicode_AsUnicodeAndSize(arg, &datalen);
- if (data == NULL) {
+ if (PyUnicode_READY(arg) < 0) {
Py_XDECREF(ucvt);
return NULL;
}
+ datalen = PyUnicode_GET_LENGTH(arg);
errorcb = internal_error_callback(errors);
if (errorcb == NULL) {
@@ -596,7 +589,7 @@ MultibyteCodec_Encode(MultibyteCodecObject *self,
self->codec->encinit(&state, self->codec->config) != 0)
goto errorexit;
r = multibytecodec_encode(self->codec, &state,
- (const Py_UNICODE **)&data, datalen, errorcb,
+ arg, NULL, errorcb,
MBENC_FLUSH | MBENC_RESET);
if (r == NULL)
goto errorexit;
@@ -617,10 +610,10 @@ MultibyteCodec_Decode(MultibyteCodecObject *self,
{
MultibyteCodec_State state;
MultibyteDecodeBuffer buf;
- PyObject *errorcb;
+ PyObject *errorcb, *res;
Py_buffer pdata;
const char *data, *errors = NULL;
- Py_ssize_t datalen, finalsize;
+ Py_ssize_t datalen;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|z:decode",
codeckwarglist, &pdata, &errors))
@@ -640,29 +633,23 @@ MultibyteCodec_Decode(MultibyteCodecObject *self,
return make_tuple(PyUnicode_New(0, 0), 0);
}
+ _PyUnicodeWriter_Init(&buf.writer);
+ buf.writer.min_length = datalen;
buf.excobj = NULL;
buf.inbuf = buf.inbuf_top = (unsigned char *)data;
buf.inbuf_end = buf.inbuf_top + datalen;
- buf.outobj = PyUnicode_FromUnicode(NULL, datalen);
- if (buf.outobj == NULL)
- goto errorexit;
- buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj);
- if (buf.outbuf == NULL)
- goto errorexit;
- buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj);
if (self->codec->decinit != NULL &&
self->codec->decinit(&state, self->codec->config) != 0)
goto errorexit;
while (buf.inbuf < buf.inbuf_end) {
- Py_ssize_t inleft, outleft, r;
+ Py_ssize_t inleft, r;
inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
- outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
r = self->codec->decode(&state, self->codec->config,
- &buf.inbuf, inleft, &buf.outbuf, outleft);
+ &buf.inbuf, inleft, &buf.writer);
if (r == 0)
break;
else if (multibytecodec_decerror(self->codec, &state,
@@ -670,23 +657,20 @@ MultibyteCodec_Decode(MultibyteCodecObject *self,
goto errorexit;
}
- finalsize = (Py_ssize_t)(buf.outbuf -
- PyUnicode_AS_UNICODE(buf.outobj));
-
- if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
- if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
- goto errorexit;
+ res = _PyUnicodeWriter_Finish(&buf.writer);
+ if (res == NULL)
+ goto errorexit;
PyBuffer_Release(&pdata);
Py_XDECREF(buf.excobj);
ERROR_DECREF(errorcb);
- return make_tuple(buf.outobj, datalen);
+ return make_tuple(res, datalen);
errorexit:
PyBuffer_Release(&pdata);
ERROR_DECREF(errorcb);
Py_XDECREF(buf.excobj);
- Py_XDECREF(buf.outobj);
+ _PyUnicodeWriter_Dealloc(&buf.writer);
return NULL;
}
@@ -752,9 +736,9 @@ encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
PyObject *unistr, int final)
{
PyObject *ucvt, *r = NULL;
- Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL;
- Py_ssize_t datalen, origpending;
- wchar_t *data;
+ PyObject *inbuf = NULL;
+ Py_ssize_t inpos, datalen;
+ PyObject *origpending = NULL;
if (PyUnicode_Check(unistr))
ucvt = NULL;
@@ -770,69 +754,66 @@ encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
}
}
- data = PyUnicode_AsUnicodeAndSize(unistr, &datalen);
- if (data == NULL)
- goto errorexit;
- origpending = ctx->pendingsize;
+ if (ctx->pending) {
+ PyObject *inbuf_tmp;
- if (origpending > 0) {
- if (datalen > PY_SSIZE_T_MAX - ctx->pendingsize) {
- PyErr_NoMemory();
- /* inbuf_tmp == NULL */
- goto errorexit;
- }
- inbuf_tmp = PyMem_New(Py_UNICODE, datalen + ctx->pendingsize);
+ Py_INCREF(ctx->pending);
+ origpending = ctx->pending;
+
+ Py_INCREF(ctx->pending);
+ inbuf_tmp = ctx->pending;
+ PyUnicode_Append(&inbuf_tmp, unistr);
if (inbuf_tmp == NULL)
goto errorexit;
- memcpy(inbuf_tmp, ctx->pending,
- Py_UNICODE_SIZE * ctx->pendingsize);
- memcpy(inbuf_tmp + ctx->pendingsize,
- PyUnicode_AS_UNICODE(unistr),
- Py_UNICODE_SIZE * datalen);
- datalen += ctx->pendingsize;
- ctx->pendingsize = 0;
+ Py_CLEAR(ctx->pending);
inbuf = inbuf_tmp;
}
- else
- inbuf = (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr);
+ else {
+ origpending = NULL;
- inbuf_end = inbuf + datalen;
+ Py_INCREF(unistr);
+ inbuf = unistr;
+ }
+ if (PyUnicode_READY(inbuf) < 0)
+ goto errorexit;
+ inpos = 0;
+ datalen = PyUnicode_GET_LENGTH(inbuf);
r = multibytecodec_encode(ctx->codec, &ctx->state,
- (const Py_UNICODE **)&inbuf, datalen,
- ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);
+ inbuf, &inpos,
+ ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);
if (r == NULL) {
/* recover the original pending buffer */
- if (origpending > 0)
- memcpy(ctx->pending, inbuf_tmp,
- Py_UNICODE_SIZE * origpending);
- ctx->pendingsize = origpending;
+ Py_CLEAR(ctx->pending);
+ ctx->pending = origpending;
+ origpending = NULL;
goto errorexit;
}
+ Py_XDECREF(origpending);
- if (inbuf < inbuf_end) {
- ctx->pendingsize = (Py_ssize_t)(inbuf_end - inbuf);
- if (ctx->pendingsize > MAXENCPENDING) {
+ if (inpos < datalen) {
+ if (datalen - inpos > MAXENCPENDING) {
/* normal codecs can't reach here */
- ctx->pendingsize = 0;
PyErr_SetString(PyExc_UnicodeError,
"pending buffer overflow");
goto errorexit;
}
- memcpy(ctx->pending, inbuf,
- ctx->pendingsize * Py_UNICODE_SIZE);
+ ctx->pending = PyUnicode_Substring(inbuf, inpos, datalen);
+ if (ctx->pending == NULL) {
+ /* normal codecs can't reach here */
+ goto errorexit;
+ }
}
- if (inbuf_tmp != NULL)
- PyMem_Del(inbuf_tmp);
+ Py_DECREF(inbuf);
Py_XDECREF(ucvt);
return r;
errorexit:
- if (inbuf_tmp != NULL)
- PyMem_Del(inbuf_tmp);
Py_XDECREF(r);
Py_XDECREF(ucvt);
+ Py_XDECREF(origpending);
+ Py_XDECREF(inbuf);
return NULL;
}
@@ -859,17 +840,7 @@ decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
{
buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
buf->inbuf_end = buf->inbuf_top + size;
- if (buf->outobj == NULL) { /* only if outobj is not allocated yet */
- buf->outobj = PyUnicode_FromUnicode(NULL, size);
- if (buf->outobj == NULL)
- return -1;
- buf->outbuf = PyUnicode_AsUnicode(buf->outobj);
- if (buf->outbuf == NULL)
- return -1;
- buf->outbuf_end = buf->outbuf +
- PyUnicode_GET_SIZE(buf->outobj);
- }
-
+ buf->writer.min_length += size;
return 0;
}
@@ -878,14 +849,13 @@ decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
MultibyteDecodeBuffer *buf)
{
while (buf->inbuf < buf->inbuf_end) {
- Py_ssize_t inleft, outleft;
+ Py_ssize_t inleft;
Py_ssize_t r;
inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
- outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
r = ctx->codec->decode(&ctx->state, ctx->codec->config,
- &buf->inbuf, inleft, &buf->outbuf, outleft);
+ &buf->inbuf, inleft, &buf->writer);
if (r == 0 || r == MBERR_TOOFEW)
break;
else if (multibytecodec_decerror(ctx->codec, &ctx->state,
@@ -927,7 +897,7 @@ mbiencoder_reset(MultibyteIncrementalEncoderObject *self)
if (r != 0)
return NULL;
}
- self->pendingsize = 0;
+ Py_CLEAR(self->pending);
Py_RETURN_NONE;
}
@@ -963,7 +933,7 @@ mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
}
self->codec = ((MultibyteCodecObject *)codec)->codec;
- self->pendingsize = 0;
+ self->pending = NULL;
self->errors = internal_error_callback(errors);
if (self->errors == NULL)
goto errorexit;
@@ -1058,8 +1028,9 @@ mbidecoder_decode(MultibyteIncrementalDecoderObject *self,
MultibyteDecodeBuffer buf;
char *data, *wdata = NULL;
Py_buffer pdata;
- Py_ssize_t wsize, finalsize = 0, size, origpending;
+ Py_ssize_t wsize, size, origpending;
int final = 0;
+ PyObject *res;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i:decode",
incrementalkwarglist, &pdata, &final))
@@ -1067,7 +1038,8 @@ mbidecoder_decode(MultibyteIncrementalDecoderObject *self,
data = pdata.buf;
size = pdata.len;
- buf.outobj = buf.excobj = NULL;
+ _PyUnicodeWriter_Init(&buf.writer);
+ buf.excobj = NULL;
origpending = self->pendingsize;
if (self->pendingsize == 0) {
@@ -1109,23 +1081,22 @@ mbidecoder_decode(MultibyteIncrementalDecoderObject *self,
goto errorexit;
}
- finalsize = (Py_ssize_t)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj));
- if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
- if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
- goto errorexit;
+ res = _PyUnicodeWriter_Finish(&buf.writer);
+ if (res == NULL)
+ goto errorexit;
PyBuffer_Release(&pdata);
if (wdata != data)
PyMem_Del(wdata);
Py_XDECREF(buf.excobj);
- return buf.outobj;
+ return res;
errorexit:
PyBuffer_Release(&pdata);
if (wdata != NULL && wdata != data)
PyMem_Del(wdata);
Py_XDECREF(buf.excobj);
- Py_XDECREF(buf.outobj);
+ _PyUnicodeWriter_Dealloc(&buf.writer);
return NULL;
}
@@ -1265,13 +1236,14 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self,
const char *method, Py_ssize_t sizehint)
{
MultibyteDecodeBuffer buf;
- PyObject *cres;
- Py_ssize_t rsize, finalsize = 0;
+ PyObject *cres, *res;
+ Py_ssize_t rsize;
if (sizehint == 0)
return PyUnicode_New(0, 0);
- buf.outobj = buf.excobj = NULL;
+ _PyUnicodeWriter_Init(&buf.writer);
+ buf.excobj = NULL;
cres = NULL;
for (;;) {
@@ -1303,19 +1275,19 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self,
if (PyBytes_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) {
PyErr_NoMemory();
goto errorexit;
- }
- rsize = PyBytes_GET_SIZE(cres) + self->pendingsize;
- ctr = PyBytes_FromStringAndSize(NULL, rsize);
- if (ctr == NULL)
- goto errorexit;
- ctrdata = PyBytes_AS_STRING(ctr);
- memcpy(ctrdata, self->pending, self->pendingsize);
- memcpy(ctrdata + self->pendingsize,
- PyBytes_AS_STRING(cres),
- PyBytes_GET_SIZE(cres));
- Py_DECREF(cres);
- cres = ctr;
- self->pendingsize = 0;
+ }
+ rsize = PyBytes_GET_SIZE(cres) + self->pendingsize;
+ ctr = PyBytes_FromStringAndSize(NULL, rsize);
+ if (ctr == NULL)
+ goto errorexit;
+ ctrdata = PyBytes_AS_STRING(ctr);
+ memcpy(ctrdata, self->pending, self->pendingsize);
+ memcpy(ctrdata + self->pendingsize,
+ PyBytes_AS_STRING(cres),
+ PyBytes_GET_SIZE(cres));
+ Py_DECREF(cres);
+ cres = ctr;
+ self->pendingsize = 0;
}
rsize = PyBytes_GET_SIZE(cres);
@@ -1340,29 +1312,27 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self,
goto errorexit;
}
- finalsize = (Py_ssize_t)(buf.outbuf -
- PyUnicode_AS_UNICODE(buf.outobj));
Py_DECREF(cres);
cres = NULL;
- if (sizehint < 0 || finalsize != 0 || rsize == 0)
+ if (sizehint < 0 || buf.writer.pos != 0 || rsize == 0)
break;
sizehint = 1; /* read 1 more byte and retry */
}
- if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
- if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
- goto errorexit;
+ res = _PyUnicodeWriter_Finish(&buf.writer);
+ if (res == NULL)
+ goto errorexit;
Py_XDECREF(cres);
Py_XDECREF(buf.excobj);
- return buf.outobj;
+ return res;
errorexit:
Py_XDECREF(cres);
Py_XDECREF(buf.excobj);
- Py_XDECREF(buf.outobj);
+ _PyUnicodeWriter_Dealloc(&buf.writer);
return NULL;
}
@@ -1649,18 +1619,16 @@ mbstreamwriter_writelines(MultibyteStreamWriterObject *self, PyObject *lines)
static PyObject *
mbstreamwriter_reset(MultibyteStreamWriterObject *self)
{
- const Py_UNICODE *pending;
PyObject *pwrt;
- pending = self->pending;
pwrt = multibytecodec_encode(self->codec, &self->state,
- &pending, self->pendingsize, self->errors,
+ self->pending, NULL, self->errors,
MBENC_FLUSH | MBENC_RESET);
/* some pending buffer can be truncated when UnicodeEncodeError is
* raised on 'strict' mode. but, 'reset' method is designed to
* reset the pending buffer or states so failed string sequence
* ought to be missed */
- self->pendingsize = 0;
+ Py_CLEAR(self->pending);
if (pwrt == NULL)
return NULL;
@@ -1706,7 +1674,7 @@ mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
self->codec = ((MultibyteCodecObject *)codec)->codec;
self->stream = stream;
Py_INCREF(stream);
- self->pendingsize = 0;
+ self->pending = NULL;
self->errors = internal_error_callback(errors);
if (self->errors == NULL)
goto errorexit;
diff --git a/Modules/cjkcodecs/multibytecodec.h b/Modules/cjkcodecs/multibytecodec.h
index 1b6ef55..3050aeb 100644
--- a/Modules/cjkcodecs/multibytecodec.h
+++ b/Modules/cjkcodecs/multibytecodec.h
@@ -10,12 +10,6 @@
extern "C" {
#endif
-#ifdef uint32_t
-typedef uint32_t ucs4_t;
-#else
-typedef unsigned int ucs4_t;
-#endif
-
#ifdef uint16_t
typedef uint16_t ucs2_t, DBCHAR;
#else
@@ -27,13 +21,14 @@ typedef union {
int i;
unsigned char c[8];
ucs2_t u2[4];
- ucs4_t u4[2];
+ Py_UCS4 u4[2];
} MultibyteCodec_State;
typedef int (*mbcodec_init)(const void *config);
typedef Py_ssize_t (*mbencode_func)(MultibyteCodec_State *state,
const void *config,
- const Py_UNICODE **inbuf, Py_ssize_t inleft,
+ int kind, void *data,
+ Py_ssize_t *inpos, Py_ssize_t inlen,
unsigned char **outbuf, Py_ssize_t outleft,
int flags);
typedef int (*mbencodeinit_func)(MultibyteCodec_State *state,
@@ -44,7 +39,7 @@ typedef Py_ssize_t (*mbencodereset_func)(MultibyteCodec_State *state,
typedef Py_ssize_t (*mbdecode_func)(MultibyteCodec_State *state,
const void *config,
const unsigned char **inbuf, Py_ssize_t inleft,
- Py_UNICODE **outbuf, Py_ssize_t outleft);
+ _PyUnicodeWriter *writer);
typedef int (*mbdecodeinit_func)(MultibyteCodec_State *state,
const void *config);
typedef Py_ssize_t (*mbdecodereset_func)(MultibyteCodec_State *state,
@@ -81,8 +76,7 @@ typedef struct {
#define MAXENCPENDING 2
#define _MultibyteStatefulEncoder_HEAD \
_MultibyteStatefulCodec_HEAD \
- Py_UNICODE pending[MAXENCPENDING]; \
- Py_ssize_t pendingsize;
+ PyObject *pending;
typedef struct {
_MultibyteStatefulEncoder_HEAD
} MultibyteStatefulEncoderContext;