summaryrefslogtreecommitdiffstats
path: root/Modules/cjkcodecs
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2013-04-11 20:09:04 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2013-04-11 20:09:04 (GMT)
commita0dd0213cc457bdf2b04206548f5a269db256d4d (patch)
treed282d6b615e9feef64d68fa48cac54e22fd97150 /Modules/cjkcodecs
parentd8a5cc91e6559e11ca28e6a915017433b14b12d1 (diff)
downloadcpython-a0dd0213cc457bdf2b04206548f5a269db256d4d.zip
cpython-a0dd0213cc457bdf2b04206548f5a269db256d4d.tar.gz
cpython-a0dd0213cc457bdf2b04206548f5a269db256d4d.tar.bz2
Close #17693: Rewrite CJK decoders to use the _PyUnicodeWriter API instead of
the legacy Py_UNICODE API. Add also a new _PyUnicodeWriter_WriteChar() function.
Diffstat (limited to 'Modules/cjkcodecs')
-rw-r--r--Modules/cjkcodecs/_codecs_cn.c89
-rw-r--r--Modules/cjkcodecs/_codecs_hk.c34
-rw-r--r--Modules/cjkcodecs/_codecs_iso2022.c186
-rw-r--r--Modules/cjkcodecs/_codecs_jp.c167
-rw-r--r--Modules/cjkcodecs/_codecs_kr.c66
-rw-r--r--Modules/cjkcodecs/_codecs_tw.c26
-rw-r--r--Modules/cjkcodecs/alg_jisx0201.h21
-rw-r--r--Modules/cjkcodecs/cjkcodecs.h87
-rw-r--r--Modules/cjkcodecs/emu_jisx0213_2000.h5
-rw-r--r--Modules/cjkcodecs/mappings_cn.h2
-rw-r--r--Modules/cjkcodecs/mappings_jisx0213_pair.h2
-rw-r--r--Modules/cjkcodecs/multibytecodec.c129
-rw-r--r--Modules/cjkcodecs/multibytecodec.h10
13 files changed, 384 insertions, 440 deletions
diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c
index 9e9e96c..ba6b4ee 100644
--- a/Modules/cjkcodecs/_codecs_cn.c
+++ b/Modules/cjkcodecs/_codecs_cn.c
@@ -23,12 +23,12 @@
* A844 undefined U+2015 HORIZONTAL BAR
*/
-#define GBK_DECODE(dc1, dc2, assi) \
- if ((dc1) == 0xa1 && (dc2) == 0xaa) (assi) = 0x2014; \
- else if ((dc1) == 0xa8 && (dc2) == 0x44) (assi) = 0x2015; \
- else if ((dc1) == 0xa1 && (dc2) == 0xa4) (assi) = 0x00b7; \
- else TRYMAP_DEC(gb2312, assi, dc1 ^ 0x80, dc2 ^ 0x80); \
- else TRYMAP_DEC(gbkext, assi, dc1, dc2);
+#define GBK_DECODE(dc1, dc2, writer) \
+ if ((dc1) == 0xa1 && (dc2) == 0xaa) OUTCHAR(0x2014); \
+ else if ((dc1) == 0xa8 && (dc2) == 0x44) OUTCHAR(0x2015); \
+ else if ((dc1) == 0xa1 && (dc2) == 0xa4) OUTCHAR(0x00b7); \
+ else TRYMAP_DEC(gb2312, writer, dc1 ^ 0x80, dc2 ^ 0x80); \
+ else TRYMAP_DEC(gbkext, writer, dc1, dc2);
#define GBK_ENCODE(code, assi) \
if ((code) == 0x2014) (assi) = 0xa1aa; \
@@ -43,7 +43,7 @@
ENCODER(gb2312)
{
while (inleft > 0) {
- Py_UNICODE c = IN1;
+ Py_UCS4 c = IN1;
DBCHAR code;
if (c < 0x80) {
@@ -73,17 +73,15 @@ DECODER(gb2312)
while (inleft > 0) {
unsigned char c = **inbuf;
- REQUIRE_OUTBUF(1)
-
if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2)
- TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
- NEXT(2, 1)
+ TRYMAP_DEC(gb2312, writer, c ^ 0x80, IN2 ^ 0x80) {
+ NEXT_IN(2);
}
else return 1;
}
@@ -99,7 +97,7 @@ DECODER(gb2312)
ENCODER(gbk)
{
while (inleft > 0) {
- Py_UNICODE c = IN1;
+ Py_UCS4 c = IN1;
DBCHAR code;
if (c < 0x80) {
@@ -130,20 +128,18 @@ DECODER(gbk)
while (inleft > 0) {
unsigned char c = IN1;
- REQUIRE_OUTBUF(1)
-
if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2)
- GBK_DECODE(c, IN2, **outbuf)
+ GBK_DECODE(c, IN2, writer)
else return 1;
- NEXT(2, 1)
+ NEXT_IN(2);
}
return 0;
@@ -157,7 +153,7 @@ DECODER(gbk)
ENCODER(gb18030)
{
while (inleft > 0) {
- ucs4_t c = IN1;
+ Py_UCS4 c = IN1;
DBCHAR code;
if (c < 0x80) {
@@ -174,7 +170,7 @@ ENCODER(gb18030)
return 1;
#endif
else if (c >= 0x10000) {
- ucs4_t tc = c - 0x10000;
+ Py_UCS4 tc = c - 0x10000;
REQUIRE_OUTBUF(4)
@@ -208,7 +204,7 @@ ENCODER(gb18030)
utrrange++)
if (utrrange->first <= c &&
c <= utrrange->last) {
- Py_UNICODE tc;
+ Py_UCS4 tc;
tc = c - utrrange->first +
utrrange->base;
@@ -247,11 +243,9 @@ DECODER(gb18030)
while (inleft > 0) {
unsigned char c = IN1, c2;
- REQUIRE_OUTBUF(1)
-
if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
continue;
}
@@ -261,7 +255,7 @@ DECODER(gb18030)
if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */
const struct _gb18030_to_unibmp_ranges *utr;
unsigned char c3, c4;
- ucs4_t lseq;
+ Py_UCS4 lseq;
REQUIRE_INBUF(4)
c3 = IN3;
@@ -272,34 +266,34 @@ DECODER(gb18030)
c3 -= 0x81; c4 -= 0x30;
if (c < 4) { /* U+0080 - U+FFFF */
- lseq = ((ucs4_t)c * 10 + c2) * 1260 +
- (ucs4_t)c3 * 10 + c4;
+ lseq = ((Py_UCS4)c * 10 + c2) * 1260 +
+ (Py_UCS4)c3 * 10 + c4;
if (lseq < 39420) {
for (utr = gb18030_to_unibmp_ranges;
lseq >= (utr + 1)->base;
utr++) ;
- OUT1(utr->first - utr->base + lseq)
- NEXT(4, 1)
+ OUTCHAR(utr->first - utr->base + lseq);
+ NEXT_IN(4);
continue;
}
}
else if (c >= 15) { /* U+10000 - U+10FFFF */
- lseq = 0x10000 + (((ucs4_t)c-15) * 10 + c2)
- * 1260 + (ucs4_t)c3 * 10 + c4;
+ lseq = 0x10000 + (((Py_UCS4)c-15) * 10 + c2)
+ * 1260 + (Py_UCS4)c3 * 10 + c4;
if (lseq <= 0x10FFFF) {
- WRITEUCS4(lseq);
- NEXT_IN(4)
+ OUTCHAR(lseq);
+ NEXT_IN(4);
continue;
}
}
return 1;
}
- GBK_DECODE(c, c2, **outbuf)
- else TRYMAP_DEC(gb18030ext, **outbuf, c, c2);
+ GBK_DECODE(c, c2, writer)
+ else TRYMAP_DEC(gb18030ext, writer, c, c2);
else return 1;
- NEXT(2, 1)
+ NEXT_IN(2);
}
return 0;
@@ -329,7 +323,7 @@ ENCODER_RESET(hz)
ENCODER(hz)
{
while (inleft > 0) {
- Py_UNICODE c = IN1;
+ Py_UCS4 c = IN1;
DBCHAR code;
if (c < 0x80) {
@@ -389,8 +383,8 @@ DECODER(hz)
REQUIRE_INBUF(2)
if (c2 == '~') {
- WRITE1('~')
- NEXT(2, 1)
+ OUTCHAR('~');
+ NEXT_IN(2);
continue;
}
else if (c2 == '{' && state->i == 0)
@@ -401,7 +395,7 @@ DECODER(hz)
; /* line-continuation */
else
return 1;
- NEXT(2, 0);
+ NEXT_IN(2);
continue;
}
@@ -409,14 +403,13 @@ DECODER(hz)
return 1;
if (state->i == 0) { /* ASCII mode */
- WRITE1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
}
else { /* GB mode */
REQUIRE_INBUF(2)
- REQUIRE_OUTBUF(1)
- TRYMAP_DEC(gb2312, **outbuf, c, IN2) {
- NEXT(2, 1)
+ TRYMAP_DEC(gb2312, writer, c, IN2) {
+ NEXT_IN(2);
}
else
return 1;
diff --git a/Modules/cjkcodecs/_codecs_hk.c b/Modules/cjkcodecs/_codecs_hk.c
index d3ad04b..e31664b 100644
--- a/Modules/cjkcodecs/_codecs_hk.c
+++ b/Modules/cjkcodecs/_codecs_hk.c
@@ -39,7 +39,7 @@ static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5
ENCODER(big5hkscs)
{
while (inleft > 0) {
- ucs4_t c = **inbuf;
+ Py_UCS4 c = **inbuf;
DBCHAR code;
Py_ssize_t insize;
@@ -103,26 +103,24 @@ DECODER(big5hkscs)
{
while (inleft > 0) {
unsigned char c = IN1;
- ucs4_t decoded;
-
- REQUIRE_OUTBUF(1)
+ Py_UCS4 decoded;
if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2)
if (0xc6 > c || c > 0xc8 || (c < 0xc7 && IN2 < 0xa1)) {
- TRYMAP_DEC(big5, **outbuf, c, IN2) {
- NEXT(2, 1)
+ TRYMAP_DEC(big5, writer, c, IN2) {
+ NEXT_IN(2);
continue;
}
}
- TRYMAP_DEC(big5hkscs, decoded, c, IN2)
+ TRYMAP_DEC_CHAR(big5hkscs, decoded, c, IN2)
{
int s = BH2S(c, IN2);
const unsigned char *hintbase;
@@ -146,25 +144,25 @@ DECODER(big5hkscs)
return MBERR_INTERNAL;
if (hintbase[s >> 3] & (1 << (s & 7))) {
- WRITEUCS4(decoded | 0x20000)
- NEXT_IN(2)
+ OUTCHAR(decoded | 0x20000);
+ NEXT_IN(2);
}
else {
- OUT1(decoded)
- NEXT(2, 1)
+ OUTCHAR(decoded);
+ NEXT_IN(2);
}
continue;
}
switch ((c << 8) | IN2) {
- case 0x8862: WRITE2(0x00ca, 0x0304); break;
- case 0x8864: WRITE2(0x00ca, 0x030c); break;
- case 0x88a3: WRITE2(0x00ea, 0x0304); break;
- case 0x88a5: WRITE2(0x00ea, 0x030c); break;
+ case 0x8862: OUTCHAR2(0x00ca, 0x0304); break;
+ case 0x8864: OUTCHAR2(0x00ca, 0x030c); break;
+ case 0x88a3: OUTCHAR2(0x00ea, 0x0304); break;
+ case 0x88a5: OUTCHAR2(0x00ea, 0x030c); break;
default: return 1;
}
- NEXT(2, 2) /* all decoded codepoints are pairs, above. */
+ NEXT_IN(2); /* all decoded codepoints are pairs, above. */
}
return 0;
diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c
index cbc1542..ae14677 100644
--- a/Modules/cjkcodecs/_codecs_iso2022.c
+++ b/Modules/cjkcodecs/_codecs_iso2022.c
@@ -102,8 +102,8 @@
/*-*- internal data structures -*-*/
typedef int (*iso2022_init_func)(void);
-typedef ucs4_t (*iso2022_decode_func)(const unsigned char *data);
-typedef DBCHAR (*iso2022_encode_func)(const ucs4_t *data, Py_ssize_t *length);
+typedef Py_UCS4 (*iso2022_decode_func)(const unsigned char *data);
+typedef DBCHAR (*iso2022_encode_func)(const Py_UCS4 *data, Py_ssize_t *length);
struct iso2022_designation {
unsigned char mark;
@@ -158,7 +158,7 @@ ENCODER(iso2022)
while (inleft > 0) {
const struct iso2022_designation *dsg;
DBCHAR encoded;
- ucs4_t c = **inbuf;
+ Py_UCS4 c = **inbuf;
Py_ssize_t insize;
if (c < 0x80) {
@@ -196,9 +196,9 @@ ENCODER(iso2022)
length = 2;
#if Py_UNICODE_SIZE == 2
if (length == 2) {
- ucs4_t u4in[2];
- u4in[0] = (ucs4_t)IN1;
- u4in[1] = (ucs4_t)IN2;
+ Py_UCS4 u4in[2];
+ u4in[0] = (Py_UCS4)IN1;
+ u4in[1] = (Py_UCS4)IN2;
encoded = dsg->encoder(u4in, &length);
} else
encoded = dsg->encoder(&c, &length);
@@ -277,7 +277,7 @@ ENCODER(iso2022)
WRITE2(encoded >> 8, encoded & 0xff)
NEXT_OUT(2)
}
- NEXT_IN(insize)
+ NEXT_IN(insize);
}
return 0;
@@ -376,45 +376,43 @@ iso2022processesc(const void *config, MultibyteCodec_State *state,
return 0;
}
-#define ISO8859_7_DECODE(c, assi) \
- if ((c) < 0xa0) (assi) = (c); \
- else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \
- (assi) = (c); \
- else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \
- (0xbffffd77L & (1L << ((c)-0xb4))))) \
- (assi) = 0x02d0 + (c); \
- else if ((c) == 0xa1) (assi) = 0x2018; \
- else if ((c) == 0xa2) (assi) = 0x2019; \
- else if ((c) == 0xaf) (assi) = 0x2015;
+#define ISO8859_7_DECODE(c, writer) \
+ if ((c) < 0xa0) OUTCHAR(c); \
+ else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \
+ OUTCHAR(c); \
+ else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \
+ (0xbffffd77L & (1L << ((c)-0xb4))))) \
+ OUTCHAR(0x02d0 + (c)); \
+ else if ((c) == 0xa1) OUTCHAR(0x2018); \
+ else if ((c) == 0xa2) OUTCHAR(0x2019); \
+ else if ((c) == 0xaf) OUTCHAR(0x2015);
static Py_ssize_t
iso2022processg2(const void *config, MultibyteCodec_State *state,
const unsigned char **inbuf, Py_ssize_t *inleft,
- Py_UNICODE **outbuf, Py_ssize_t *outleft)
+ _PyUnicodeWriter *writer)
{
/* not written to use encoder, decoder functions because only few
* encodings use G2 designations in CJKCodecs */
if (STATE_G2 == CHARSET_ISO8859_1) {
if (IN3 < 0x80)
- OUT1(IN3 + 0x80)
+ OUTCHAR(IN3 + 0x80);
else
return 3;
}
else if (STATE_G2 == CHARSET_ISO8859_7) {
- ISO8859_7_DECODE(IN3 ^ 0x80, **outbuf)
+ ISO8859_7_DECODE(IN3 ^ 0x80, writer)
else return 3;
}
else if (STATE_G2 == CHARSET_ASCII) {
if (IN3 & 0x80) return 3;
- else **outbuf = IN3;
+ else OUTCHAR(IN3);
}
else
return MBERR_INTERNAL;
(*inbuf) += 3;
*inleft -= 3;
- (*outbuf) += 1;
- *outleft -= 1;
return 0;
}
@@ -429,8 +427,8 @@ DECODER(iso2022)
if (STATE_GETFLAG(F_ESCTHROUGHOUT)) {
/* ESC throughout mode:
* for non-iso2022 escape sequences */
- WRITE1(c) /* assume as ISO-8859-1 */
- NEXT(1, 1)
+ OUTCHAR(c); /* assume as ISO-8859-1 */
+ NEXT_IN(1);
if (IS_ESCEND(c)) {
STATE_CLEARFLAG(F_ESCTHROUGHOUT)
}
@@ -449,32 +447,32 @@ DECODER(iso2022)
else if (CONFIG_ISSET(USE_G2) && IN2 == 'N') {/* SS2 */
REQUIRE_INBUF(3)
err = iso2022processg2(config, state,
- inbuf, &inleft, outbuf, &outleft);
+ inbuf, &inleft, writer);
if (err != 0)
return err;
}
else {
- WRITE1(ESC)
+ OUTCHAR(ESC);
STATE_SETFLAG(F_ESCTHROUGHOUT)
- NEXT(1, 1)
+ NEXT_IN(1);
}
break;
case SI:
if (CONFIG_ISSET(NO_SHIFT))
goto bypass;
STATE_CLEARFLAG(F_SHIFTED)
- NEXT_IN(1)
+ NEXT_IN(1);
break;
case SO:
if (CONFIG_ISSET(NO_SHIFT))
goto bypass;
STATE_SETFLAG(F_SHIFTED)
- NEXT_IN(1)
+ NEXT_IN(1);
break;
case LF:
STATE_CLEARFLAG(F_SHIFTED)
- WRITE1(LF)
- NEXT(1, 1)
+ OUTCHAR(LF);
+ NEXT_IN(1);
break;
default:
if (c < 0x20) /* C0 */
@@ -484,7 +482,7 @@ DECODER(iso2022)
else {
const struct iso2022_designation *dsg;
unsigned char charset;
- ucs4_t decoded;
+ Py_UCS4 decoded;
if (STATE_GETFLAG(F_SHIFTED))
charset = STATE_G1;
@@ -492,8 +490,8 @@ DECODER(iso2022)
charset = STATE_G0;
if (charset == CHARSET_ASCII) {
-bypass: WRITE1(c)
- NEXT(1, 1)
+bypass: OUTCHAR(c);
+ NEXT_IN(1);
break;
}
@@ -518,17 +516,15 @@ bypass: WRITE1(c)
return dsg->width;
if (decoded < 0x10000) {
- WRITE1(decoded)
- NEXT_OUT(1)
+ OUTCHAR(decoded);
}
else if (decoded < 0x30000) {
- WRITEUCS4(decoded)
+ OUTCHAR(decoded);
}
else { /* JIS X 0213 pairs */
- WRITE2(decoded >> 16, decoded & 0xffff)
- NEXT_OUT(2)
+ OUTCHAR2(decoded >> 16, decoded & 0xffff);
}
- NEXT_IN(dsg->width)
+ NEXT_IN(dsg->width);
}
break;
}
@@ -577,18 +573,18 @@ ksx1001_init(void)
return 0;
}
-static ucs4_t
+static Py_UCS4
ksx1001_decoder(const unsigned char *data)
{
- ucs4_t u;
- TRYMAP_DEC(ksx1001, u, data[0], data[1])
+ Py_UCS4 u;
+ TRYMAP_DEC_CHAR(ksx1001, u, data[0], data[1])
return u;
else
return MAP_UNMAPPABLE;
}
static DBCHAR
-ksx1001_encoder(const ucs4_t *data, Py_ssize_t *length)
+ksx1001_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
assert(*length == 1);
@@ -613,20 +609,20 @@ jisx0208_init(void)
return 0;
}
-static ucs4_t
+static Py_UCS4
jisx0208_decoder(const unsigned char *data)
{
- ucs4_t u;
+ Py_UCS4 u;
if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
return 0xff3c;
- else TRYMAP_DEC(jisx0208, u, data[0], data[1])
+ else TRYMAP_DEC_CHAR(jisx0208, u, data[0], data[1])
return u;
else
return MAP_UNMAPPABLE;
}
static DBCHAR
-jisx0208_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0208_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
assert(*length == 1);
@@ -654,18 +650,18 @@ jisx0212_init(void)
return 0;
}
-static ucs4_t
+static Py_UCS4
jisx0212_decoder(const unsigned char *data)
{
- ucs4_t u;
- TRYMAP_DEC(jisx0212, u, data[0], data[1])
+ Py_UCS4 u;
+ TRYMAP_DEC_CHAR(jisx0212, u, data[0], data[1])
return u;
else
return MAP_UNMAPPABLE;
}
static DBCHAR
-jisx0212_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0212_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
assert(*length == 1);
@@ -705,30 +701,30 @@ jisx0213_init(void)
}
#define config ((void *)2000)
-static ucs4_t
+static Py_UCS4
jisx0213_2000_1_decoder(const unsigned char *data)
{
- ucs4_t u;
+ Py_UCS4 u;
EMULATE_JISX0213_2000_DECODE_PLANE1(u, data[0], data[1])
else if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
return 0xff3c;
- else TRYMAP_DEC(jisx0208, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])
+ else TRYMAP_DEC_CHAR(jisx0208, u, data[0], data[1]);
+ else TRYMAP_DEC_CHAR(jisx0213_1_bmp, u, data[0], data[1]);
+ else TRYMAP_DEC_CHAR(jisx0213_1_emp, u, data[0], data[1])
u |= 0x20000;
- else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);
+ else TRYMAP_DEC_CHAR(jisx0213_pair, u, data[0], data[1]);
else
return MAP_UNMAPPABLE;
return u;
}
-static ucs4_t
+static Py_UCS4
jisx0213_2000_2_decoder(const unsigned char *data)
{
- ucs4_t u;
- EMULATE_JISX0213_2000_DECODE_PLANE2(u, data[0], data[1])
- TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])
+ Py_UCS4 u;
+ EMULATE_JISX0213_2000_DECODE_PLANE2_CHAR(u, data[0], data[1])
+ TRYMAP_DEC_CHAR(jisx0213_2_bmp, u, data[0], data[1]);
+ else TRYMAP_DEC_CHAR(jisx0213_2_emp, u, data[0], data[1])
u |= 0x20000;
else
return MAP_UNMAPPABLE;
@@ -736,28 +732,28 @@ jisx0213_2000_2_decoder(const unsigned char *data)
}
#undef config
-static ucs4_t
+static Py_UCS4
jisx0213_2004_1_decoder(const unsigned char *data)
{
- ucs4_t u;
+ Py_UCS4 u;
if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
return 0xff3c;
- else TRYMAP_DEC(jisx0208, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])
+ else TRYMAP_DEC_CHAR(jisx0208, u, data[0], data[1]);
+ else TRYMAP_DEC_CHAR(jisx0213_1_bmp, u, data[0], data[1]);
+ else TRYMAP_DEC_CHAR(jisx0213_1_emp, u, data[0], data[1])
u |= 0x20000;
- else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);
+ else TRYMAP_DEC_CHAR(jisx0213_pair, u, data[0], data[1]);
else
return MAP_UNMAPPABLE;
return u;
}
-static ucs4_t
+static Py_UCS4
jisx0213_2004_2_decoder(const unsigned char *data)
{
- ucs4_t u;
- TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])
+ Py_UCS4 u;
+ TRYMAP_DEC_CHAR(jisx0213_2_bmp, u, data[0], data[1]);
+ else TRYMAP_DEC_CHAR(jisx0213_2_emp, u, data[0], data[1])
u |= 0x20000;
else
return MAP_UNMAPPABLE;
@@ -765,7 +761,7 @@ jisx0213_2004_2_decoder(const unsigned char *data)
}
static DBCHAR
-jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config)
+jisx0213_encoder(const Py_UCS4 *data, Py_ssize_t *length, void *config)
{
DBCHAR coded;
@@ -819,7 +815,7 @@ jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config)
}
static DBCHAR
-jisx0213_2000_1_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0213_2000_1_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
@@ -831,7 +827,7 @@ jisx0213_2000_1_encoder(const ucs4_t *data, Py_ssize_t *length)
}
static DBCHAR
-jisx0213_2000_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
+jisx0213_2000_1_encoder_paironly(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
Py_ssize_t ilength = *length;
@@ -854,7 +850,7 @@ jisx0213_2000_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
}
static DBCHAR
-jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0213_2000_2_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
@@ -866,7 +862,7 @@ jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length)
}
static DBCHAR
-jisx0213_2004_1_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0213_2004_1_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded = jisx0213_encoder(data, length, NULL);
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
@@ -878,7 +874,7 @@ jisx0213_2004_1_encoder(const ucs4_t *data, Py_ssize_t *length)
}
static DBCHAR
-jisx0213_2004_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
+jisx0213_2004_1_encoder_paironly(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
Py_ssize_t ilength = *length;
@@ -901,7 +897,7 @@ jisx0213_2004_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
}
static DBCHAR
-jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0213_2004_2_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded = jisx0213_encoder(data, length, NULL);
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
@@ -912,17 +908,17 @@ jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length)
return MAP_UNMAPPABLE;
}
-static ucs4_t
+static Py_UCS4
jisx0201_r_decoder(const unsigned char *data)
{
- ucs4_t u;
- JISX0201_R_DECODE(*data, u)
+ Py_UCS4 u;
+ JISX0201_R_DECODE_CHAR(*data, u)
else return MAP_UNMAPPABLE;
return u;
}
static DBCHAR
-jisx0201_r_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0201_r_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
JISX0201_R_ENCODE(*data, coded)
@@ -930,17 +926,17 @@ jisx0201_r_encoder(const ucs4_t *data, Py_ssize_t *length)
return coded;
}
-static ucs4_t
+static Py_UCS4
jisx0201_k_decoder(const unsigned char *data)
{
- ucs4_t u;
- JISX0201_K_DECODE(*data ^ 0x80, u)
+ Py_UCS4 u;
+ JISX0201_K_DECODE_CHAR(*data ^ 0x80, u)
else return MAP_UNMAPPABLE;
return u;
}
static DBCHAR
-jisx0201_k_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0201_k_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
JISX0201_K_ENCODE(*data, coded)
@@ -961,18 +957,18 @@ gb2312_init(void)
return 0;
}
-static ucs4_t
+static Py_UCS4
gb2312_decoder(const unsigned char *data)
{
- ucs4_t u;
- TRYMAP_DEC(gb2312, u, data[0], data[1])
+ Py_UCS4 u;
+ TRYMAP_DEC_CHAR(gb2312, u, data[0], data[1])
return u;
else
return MAP_UNMAPPABLE;
}
static DBCHAR
-gb2312_encoder(const ucs4_t *data, Py_ssize_t *length)
+gb2312_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
assert(*length == 1);
@@ -986,14 +982,14 @@ gb2312_encoder(const ucs4_t *data, Py_ssize_t *length)
}
-static ucs4_t
+static Py_UCS4
dummy_decoder(const unsigned char *data)
{
return MAP_UNMAPPABLE;
}
static DBCHAR
-dummy_encoder(const ucs4_t *data, Py_ssize_t *length)
+dummy_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
return MAP_UNMAPPABLE;
}
diff --git a/Modules/cjkcodecs/_codecs_jp.c b/Modules/cjkcodecs/_codecs_jp.c
index a500696..8bfb813 100644
--- a/Modules/cjkcodecs/_codecs_jp.c
+++ b/Modules/cjkcodecs/_codecs_jp.c
@@ -20,7 +20,7 @@
ENCODER(cp932)
{
while (inleft > 0) {
- Py_UNICODE c = IN1;
+ Py_UCS4 c = IN1;
DBCHAR code;
unsigned char c1, c2;
@@ -66,8 +66,8 @@ ENCODER(cp932)
}
else if (c >= 0xe000 && c < 0xe758) {
/* User-defined area */
- c1 = (Py_UNICODE)(c - 0xe000) / 188;
- c2 = (Py_UNICODE)(c - 0xe000) % 188;
+ c1 = (Py_UCS4)(c - 0xe000) / 188;
+ c2 = (Py_UCS4)(c - 0xe000) % 188;
OUT1(c1 + 0xf0)
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
}
@@ -85,31 +85,30 @@ DECODER(cp932)
while (inleft > 0) {
unsigned char c = IN1, c2;
- REQUIRE_OUTBUF(1)
if (c <= 0x80) {
- OUT1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
continue;
}
else if (c >= 0xa0 && c <= 0xdf) {
if (c == 0xa0)
- OUT1(0xf8f0) /* half-width katakana */
+ OUTCHAR(0xf8f0); /* half-width katakana */
else
- OUT1(0xfec0 + c)
- NEXT(1, 1)
+ OUTCHAR(0xfec0 + c);
+ NEXT_IN(1);
continue;
}
else if (c >= 0xfd/* && c <= 0xff*/) {
/* Windows compatibility */
- OUT1(0xf8f1 - 0xfd + c)
- NEXT(1, 1)
+ OUTCHAR(0xf8f1 - 0xfd + c);
+ NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2)
c2 = IN2;
- TRYMAP_DEC(cp932ext, **outbuf, c, c2);
+ TRYMAP_DEC(cp932ext, writer, c, c2);
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
return 1;
@@ -119,21 +118,21 @@ DECODER(cp932)
c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21);
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
- TRYMAP_DEC(jisx0208, **outbuf, c, c2);
+ TRYMAP_DEC(jisx0208, writer, c, c2);
else return 1;
}
else if (c >= 0xf0 && c <= 0xf9) {
if ((c2 >= 0x40 && c2 <= 0x7e) ||
(c2 >= 0x80 && c2 <= 0xfc))
- OUT1(0xe000 + 188 * (c - 0xf0) +
- (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41))
+ OUTCHAR(0xe000 + 188 * (c - 0xf0) +
+ (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41));
else
return 1;
}
else
return 1;
- NEXT(2, 1)
+ NEXT_IN(2);
}
return 0;
@@ -147,7 +146,7 @@ DECODER(cp932)
ENCODER(euc_jis_2004)
{
while (inleft > 0) {
- ucs4_t c = IN1;
+ Py_UCS4 c = IN1;
DBCHAR code;
Py_ssize_t insize;
@@ -235,13 +234,11 @@ DECODER(euc_jis_2004)
{
while (inleft > 0) {
unsigned char c = IN1;
- ucs4_t code;
-
- REQUIRE_OUTBUF(1)
+ Py_UCS4 code;
if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
continue;
}
@@ -252,8 +249,8 @@ DECODER(euc_jis_2004)
REQUIRE_INBUF(2)
c2 = IN2;
if (c2 >= 0xa1 && c2 <= 0xdf) {
- OUT1(0xfec0 + c2)
- NEXT(2, 1)
+ OUTCHAR(0xfec0 + c2);
+ NEXT_IN(2);
}
else
return 1;
@@ -266,16 +263,16 @@ DECODER(euc_jis_2004)
c3 = IN3 ^ 0x80;
/* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */
- EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf, c2, c3)
- else TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ;
- else TRYMAP_DEC(jisx0213_2_emp, code, c2, c3) {
- WRITEUCS4(EMPBASE | code)
- NEXT_IN(3)
+ EMULATE_JISX0213_2000_DECODE_PLANE2(writer, c2, c3)
+ else TRYMAP_DEC(jisx0213_2_bmp, writer, c2, c3) ;
+ else TRYMAP_DEC_CHAR(jisx0213_2_emp, code, c2, c3) {
+ OUTCHAR(EMPBASE | code);
+ NEXT_IN(3);
continue;
}
- else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ;
+ else TRYMAP_DEC(jisx0212, writer, c2, c3) ;
else return 1;
- NEXT(3, 1)
+ NEXT_IN(3);
}
else {
unsigned char c2;
@@ -285,23 +282,23 @@ DECODER(euc_jis_2004)
c2 = IN2 ^ 0x80;
/* JIS X 0213 Plane 1 */
- EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf, c, c2)
- else if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c;
- else if (c == 0x22 && c2 == 0x32) **outbuf = 0xff5e;
- else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
- else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2);
- else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) {
- WRITEUCS4(EMPBASE | code)
- NEXT_IN(2)
+ EMULATE_JISX0213_2000_DECODE_PLANE1(writer, c, c2)
+ else if (c == 0x21 && c2 == 0x40) OUTCHAR(0xff3c);
+ else if (c == 0x22 && c2 == 0x32) OUTCHAR(0xff5e);
+ else TRYMAP_DEC(jisx0208, writer, c, c2);
+ else TRYMAP_DEC(jisx0213_1_bmp, writer, c, c2);
+ else TRYMAP_DEC_CHAR(jisx0213_1_emp, code, c, c2) {
+ OUTCHAR(EMPBASE | code);
+ NEXT_IN(2);
continue;
}
- else TRYMAP_DEC(jisx0213_pair, code, c, c2) {
- WRITE2(code >> 16, code & 0xffff)
- NEXT(2, 2)
+ else TRYMAP_DEC_CHAR(jisx0213_pair, code, c, c2) {
+ OUTCHAR2(code >> 16, code & 0xffff);
+ NEXT_IN(2);
continue;
}
else return 1;
- NEXT(2, 1)
+ NEXT_IN(2);
}
}
@@ -316,7 +313,7 @@ DECODER(euc_jis_2004)
ENCODER(euc_jp)
{
while (inleft > 0) {
- Py_UNICODE c = IN1;
+ Py_UCS4 c = IN1;
DBCHAR code;
if (c < 0x80) {
@@ -369,11 +366,9 @@ DECODER(euc_jp)
while (inleft > 0) {
unsigned char c = IN1;
- REQUIRE_OUTBUF(1)
-
if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
continue;
}
@@ -384,8 +379,8 @@ DECODER(euc_jp)
REQUIRE_INBUF(2)
c2 = IN2;
if (c2 >= 0xa1 && c2 <= 0xdf) {
- OUT1(0xfec0 + c2)
- NEXT(2, 1)
+ OUTCHAR(0xfec0 + c2);
+ NEXT_IN(2);
}
else
return 1;
@@ -397,8 +392,8 @@ DECODER(euc_jp)
c2 = IN2;
c3 = IN3;
/* JIS X 0212 */
- TRYMAP_DEC(jisx0212, **outbuf, c2 ^ 0x80, c3 ^ 0x80) {
- NEXT(3, 1)
+ TRYMAP_DEC(jisx0212, writer, c2 ^ 0x80, c3 ^ 0x80) {
+ NEXT_IN(3);
}
else
return 1;
@@ -412,13 +407,13 @@ DECODER(euc_jp)
#ifndef STRICT_BUILD
if (c == 0xa1 && c2 == 0xc0)
/* FULL-WIDTH REVERSE SOLIDUS */
- **outbuf = 0xff3c;
+ OUTCHAR(0xff3c);
else
#endif
- TRYMAP_DEC(jisx0208, **outbuf,
+ TRYMAP_DEC(jisx0208, writer,
c ^ 0x80, c2 ^ 0x80) ;
else return 1;
- NEXT(2, 1)
+ NEXT_IN(2);
}
}
@@ -433,7 +428,7 @@ DECODER(euc_jp)
ENCODER(shift_jis)
{
while (inleft > 0) {
- Py_UNICODE c = IN1;
+ Py_UCS4 c = IN1;
DBCHAR code;
unsigned char c1, c2;
@@ -488,14 +483,12 @@ DECODER(shift_jis)
while (inleft > 0) {
unsigned char c = IN1;
- REQUIRE_OUTBUF(1)
-
#ifdef STRICT_BUILD
- JISX0201_R_DECODE(c, **outbuf)
+ JISX0201_R_DECODE(c, writer)
#else
- if (c < 0x80) **outbuf = c;
+ if (c < 0x80) OUTCHAR(c);
#endif
- else JISX0201_K_DECODE(c, **outbuf)
+ else JISX0201_K_DECODE(c, writer)
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
unsigned char c1, c2;
@@ -512,13 +505,13 @@ DECODER(shift_jis)
#ifndef STRICT_BUILD
if (c1 == 0x21 && c2 == 0x40) {
/* FULL-WIDTH REVERSE SOLIDUS */
- OUT1(0xff3c)
- NEXT(2, 1)
+ OUTCHAR(0xff3c);
+ NEXT_IN(2);
continue;
}
#endif
- TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
- NEXT(2, 1)
+ TRYMAP_DEC(jisx0208, writer, c1, c2) {
+ NEXT_IN(2);
continue;
}
else
@@ -527,7 +520,7 @@ DECODER(shift_jis)
else
return 1;
- NEXT(1, 1) /* JIS X 0201 */
+ NEXT_IN(1); /* JIS X 0201 */
}
return 0;
@@ -541,7 +534,7 @@ DECODER(shift_jis)
ENCODER(shift_jis_2004)
{
while (inleft > 0) {
- ucs4_t c = IN1;
+ Py_UCS4 c = IN1;
DBCHAR code = NOCHAR;
int c1, c2;
Py_ssize_t insize;
@@ -636,11 +629,10 @@ DECODER(shift_jis_2004)
while (inleft > 0) {
unsigned char c = IN1;
- REQUIRE_OUTBUF(1)
- JISX0201_DECODE(c, **outbuf)
+ JISX0201_DECODE(c, writer)
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){
unsigned char c1, c2;
- ucs4_t code;
+ Py_UCS4 code;
REQUIRE_INBUF(2)
c2 = IN2;
@@ -654,50 +646,47 @@ DECODER(shift_jis_2004)
if (c1 < 0x5e) { /* Plane 1 */
c1 += 0x21;
- EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf,
+ EMULATE_JISX0213_2000_DECODE_PLANE1(writer,
c1, c2)
- else TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
- NEXT_OUT(1)
+ else TRYMAP_DEC(jisx0208, writer, c1, c2) {
}
- else TRYMAP_DEC(jisx0213_1_bmp, **outbuf,
+ else TRYMAP_DEC(jisx0213_1_bmp, writer,
c1, c2) {
- NEXT_OUT(1)
}
- else TRYMAP_DEC(jisx0213_1_emp, code, c1, c2) {
- WRITEUCS4(EMPBASE | code)
+ else TRYMAP_DEC_CHAR(jisx0213_1_emp, code, c1, c2) {
+ OUTCHAR(EMPBASE | code);
}
- else TRYMAP_DEC(jisx0213_pair, code, c1, c2) {
- WRITE2(code >> 16, code & 0xffff)
- NEXT_OUT(2)
+ else TRYMAP_DEC_CHAR(jisx0213_pair, code, c1, c2) {
+ OUTCHAR2(code >> 16, code & 0xffff);
}
else
return 1;
- NEXT_IN(2)
+ NEXT_IN(2);
}
else { /* Plane 2 */
if (c1 >= 0x67) c1 += 0x07;
else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37;
else c1 -= 0x3d;
- EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf,
+ EMULATE_JISX0213_2000_DECODE_PLANE2(writer,
c1, c2)
- else TRYMAP_DEC(jisx0213_2_bmp, **outbuf,
- c1, c2) ;
- else TRYMAP_DEC(jisx0213_2_emp, code, c1, c2) {
- WRITEUCS4(EMPBASE | code)
- NEXT_IN(2)
+ else TRYMAP_DEC(jisx0213_2_bmp, writer,
+ c1, c2) {
+ } else TRYMAP_DEC_CHAR(jisx0213_2_emp, code, c1, c2) {
+ OUTCHAR(EMPBASE | code);
+ NEXT_IN(2);
continue;
}
else
return 1;
- NEXT(2, 1)
+ NEXT_IN(2);
}
continue;
}
else
return 1;
- NEXT(1, 1) /* JIS X 0201 */
+ NEXT_IN(1); /* JIS X 0201 */
}
return 0;
diff --git a/Modules/cjkcodecs/_codecs_kr.c b/Modules/cjkcodecs/_codecs_kr.c
index f5697dd..ca63ee5 100644
--- a/Modules/cjkcodecs/_codecs_kr.c
+++ b/Modules/cjkcodecs/_codecs_kr.c
@@ -34,7 +34,7 @@ static const unsigned char u2cgk_jongseong[28] = {
ENCODER(euc_kr)
{
while (inleft > 0) {
- Py_UNICODE c = IN1;
+ Py_UCS4 c = IN1;
DBCHAR code;
if (c < 0x80) {
@@ -104,11 +104,9 @@ DECODER(euc_kr)
while (inleft > 0) {
unsigned char c = IN1;
- REQUIRE_OUTBUF(1)
-
if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
continue;
}
@@ -145,11 +143,11 @@ DECODER(euc_kr)
if (cho == NONE || jung == NONE || jong == NONE)
return 1;
- OUT1(0xac00 + cho*588 + jung*28 + jong);
- NEXT(8, 1)
+ OUTCHAR(0xac00 + cho*588 + jung*28 + jong);
+ NEXT_IN(8);
}
- else TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
- NEXT(2, 1)
+ else TRYMAP_DEC(ksx1001, writer, c ^ 0x80, IN2 ^ 0x80) {
+ NEXT_IN(2);
}
else
return 1;
@@ -167,7 +165,7 @@ DECODER(euc_kr)
ENCODER(cp949)
{
while (inleft > 0) {
- Py_UNICODE c = IN1;
+ Py_UCS4 c = IN1;
DBCHAR code;
if (c < 0x80) {
@@ -197,20 +195,18 @@ DECODER(cp949)
while (inleft > 0) {
unsigned char c = IN1;
- REQUIRE_OUTBUF(1)
-
if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2)
- TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);
- else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);
+ TRYMAP_DEC(ksx1001, writer, c ^ 0x80, IN2 ^ 0x80);
+ else TRYMAP_DEC(cp949ext, writer, c, IN2);
else return 1;
- NEXT(2, 1)
+ NEXT_IN(2);
}
return 0;
@@ -251,7 +247,7 @@ static const DBCHAR u2johabjamo[] = {
ENCODER(johab)
{
while (inleft > 0) {
- Py_UNICODE c = IN1;
+ Py_UCS4 c = IN1;
DBCHAR code;
if (c < 0x80) {
@@ -350,11 +346,9 @@ DECODER(johab)
while (inleft > 0) {
unsigned char c = IN1, c2;
- REQUIRE_OUTBUF(1)
-
if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
continue;
}
@@ -381,33 +375,33 @@ DECODER(johab)
if (i_cho == FILL) {
if (i_jung == FILL) {
if (i_jong == FILL)
- OUT1(0x3000)
+ OUTCHAR(0x3000);
else
- OUT1(0x3100 |
- johabjamo_jongseong[c_jong])
+ OUTCHAR(0x3100 |
+ johabjamo_jongseong[c_jong]);
}
else {
if (i_jong == FILL)
- OUT1(0x3100 |
- johabjamo_jungseong[c_jung])
+ OUTCHAR(0x3100 |
+ johabjamo_jungseong[c_jung]);
else
return 1;
}
} else {
if (i_jung == FILL) {
if (i_jong == FILL)
- OUT1(0x3100 |
- johabjamo_choseong[c_cho])
+ OUTCHAR(0x3100 |
+ johabjamo_choseong[c_cho]);
else
return 1;
}
else
- OUT1(0xac00 +
- i_cho * 588 +
- i_jung * 28 +
- (i_jong == FILL ? 0 : i_jong))
+ OUTCHAR(0xac00 +
+ i_cho * 588 +
+ i_jung * 28 +
+ (i_jong == FILL ? 0 : i_jong));
}
- NEXT(2, 1)
+ NEXT_IN(2);
} else {
/* KS X 1001 except hangul jamos and syllables */
if (c == 0xdf || c > 0xf9 ||
@@ -424,9 +418,9 @@ DECODER(johab)
t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21;
t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
- TRYMAP_DEC(ksx1001, **outbuf, t1, t2);
+ TRYMAP_DEC(ksx1001, writer, t1, t2);
else return 1;
- NEXT(2, 1)
+ NEXT_IN(2);
}
}
}
diff --git a/Modules/cjkcodecs/_codecs_tw.c b/Modules/cjkcodecs/_codecs_tw.c
index 916298d..a91c01b 100644
--- a/Modules/cjkcodecs/_codecs_tw.c
+++ b/Modules/cjkcodecs/_codecs_tw.c
@@ -14,7 +14,7 @@
ENCODER(big5)
{
while (inleft > 0) {
- Py_UNICODE c = **inbuf;
+ Py_UCS4 c = **inbuf;
DBCHAR code;
if (c < 0x80) {
@@ -43,17 +43,15 @@ DECODER(big5)
while (inleft > 0) {
unsigned char c = IN1;
- REQUIRE_OUTBUF(1)
-
if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2)
- TRYMAP_DEC(big5, **outbuf, c, IN2) {
- NEXT(2, 1)
+ TRYMAP_DEC(big5, writer, c, IN2) {
+ NEXT_IN(2);
}
else return 1;
}
@@ -69,7 +67,7 @@ DECODER(big5)
ENCODER(cp950)
{
while (inleft > 0) {
- Py_UNICODE c = IN1;
+ Py_UCS4 c = IN1;
DBCHAR code;
if (c < 0x80) {
@@ -97,21 +95,19 @@ DECODER(cp950)
while (inleft > 0) {
unsigned char c = IN1;
- REQUIRE_OUTBUF(1)
-
if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
+ OUTCHAR(c);
+ NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2)
- TRYMAP_DEC(cp950ext, **outbuf, c, IN2);
- else TRYMAP_DEC(big5, **outbuf, c, IN2);
+ TRYMAP_DEC(cp950ext, writer, c, IN2);
+ else TRYMAP_DEC(big5, writer, c, IN2);
else return 1;
- NEXT(2, 1)
+ NEXT_IN(2);
}
return 0;
diff --git a/Modules/cjkcodecs/alg_jisx0201.h b/Modules/cjkcodecs/alg_jisx0201.h
index 0bc7db5..98c63e6 100644
--- a/Modules/cjkcodecs/alg_jisx0201.h
+++ b/Modules/cjkcodecs/alg_jisx0201.h
@@ -10,15 +10,24 @@
JISX0201_R_ENCODE(c, assi) \
else JISX0201_K_ENCODE(c, assi)
-#define JISX0201_R_DECODE(c, assi) \
+#define JISX0201_R_DECODE_CHAR(c, assi) \
if ((c) < 0x5c) (assi) = (c); \
else if ((c) == 0x5c) (assi) = 0x00a5; \
else if ((c) < 0x7e) (assi) = (c); \
else if ((c) == 0x7e) (assi) = 0x203e; \
else if ((c) == 0x7f) (assi) = 0x7f;
-#define JISX0201_K_DECODE(c, assi) \
+#define JISX0201_R_DECODE(c, writer) \
+ if ((c) < 0x5c) OUTCHAR(c); \
+ else if ((c) == 0x5c) OUTCHAR(0x00a5); \
+ else if ((c) < 0x7e) OUTCHAR(c); \
+ else if ((c) == 0x7e) OUTCHAR(0x203e); \
+ else if ((c) == 0x7f) OUTCHAR(0x7f);
+#define JISX0201_K_DECODE(c, writer) \
if ((c) >= 0xa1 && (c) <= 0xdf) \
- (assi) = 0xfec0 + (c);
-#define JISX0201_DECODE(c, assi) \
- JISX0201_R_DECODE(c, assi) \
- else JISX0201_K_DECODE(c, assi)
+ OUTCHAR(0xfec0 + (c));
+#define JISX0201_K_DECODE_CHAR(c, assi) \
+ if ((c) >= 0xa1 && (c) <= 0xdf) \
+ (assi) = 0xfec0 + (c);
+#define JISX0201_DECODE(c, writer) \
+ JISX0201_R_DECODE(c, writer) \
+ else JISX0201_K_DECODE(c, writer)
diff --git a/Modules/cjkcodecs/cjkcodecs.h b/Modules/cjkcodecs/cjkcodecs.h
index 3a00d41..65b5c07 100644
--- a/Modules/cjkcodecs/cjkcodecs.h
+++ b/Modules/cjkcodecs/cjkcodecs.h
@@ -33,7 +33,7 @@ struct dbcs_index {
typedef struct dbcs_index decode_map;
struct widedbcs_index {
- const ucs4_t *map;
+ const Py_UCS4 *map;
unsigned char bottom, top;
};
typedef struct widedbcs_index widedecode_map;
@@ -56,7 +56,7 @@ struct dbcs_map {
};
struct pair_encodemap {
- ucs4_t uniseq;
+ Py_UCS4 uniseq;
DBCHAR code;
};
@@ -86,7 +86,7 @@ static const struct dbcs_map *mapping_list;
static Py_ssize_t encoding##_decode( \
MultibyteCodec_State *state, const void *config, \
const unsigned char **inbuf, Py_ssize_t inleft, \
- Py_UNICODE **outbuf, Py_ssize_t outleft)
+ _PyUnicodeWriter *writer)
#define DECODER_RESET(encoding) \
static Py_ssize_t encoding##_decode_reset( \
MultibyteCodec_State *state, const void *config)
@@ -101,13 +101,15 @@ static const struct dbcs_map *mapping_list;
#endif
#define NEXT_IN(i) \
- (*inbuf) += (i); \
- (inleft) -= (i);
+ do { \
+ (*inbuf) += (i); \
+ (inleft) -= (i); \
+ } while (0)
#define NEXT_OUT(o) \
(*outbuf) += (o); \
(outleft) -= (o);
#define NEXT(i, o) \
- NEXT_IN(i) NEXT_OUT(o)
+ NEXT_IN(i); NEXT_OUT(o)
#define REQUIRE_INBUF(n) \
if (inleft < (n)) \
@@ -121,6 +123,23 @@ static const struct dbcs_map *mapping_list;
#define IN3 ((*inbuf)[2])
#define IN4 ((*inbuf)[3])
+#define OUTCHAR(c) \
+ do { \
+ if (_PyUnicodeWriter_WriteChar(writer, (c)) < 0) \
+ return MBERR_TOOSMALL; \
+ } while (0)
+
+#define OUTCHAR2(c1, c2) \
+ do { \
+ Py_UCS4 _c1 = (c1); \
+ Py_UCS4 _c2 = (c2); \
+ if (_PyUnicodeWriter_Prepare(writer, 2, Py_MAX(_c1, c2)) < 0) \
+ return MBERR_TOOSMALL; \
+ PyUnicode_WRITE(writer->kind, writer->data, writer->pos, _c1); \
+ PyUnicode_WRITE(writer->kind, writer->data, writer->pos + 1, _c2); \
+ writer->pos += 2; \
+ } while (0)
+
#define OUT1(c) ((*outbuf)[0]) = (c);
#define OUT2(c) ((*outbuf)[1]) = (c);
#define OUT3(c) ((*outbuf)[2]) = (c);
@@ -145,19 +164,6 @@ static const struct dbcs_map *mapping_list;
(*outbuf)[2] = (c3); \
(*outbuf)[3] = (c4);
-#if Py_UNICODE_SIZE == 2
-# define WRITEUCS4(c) \
- REQUIRE_OUTBUF(2) \
- (*outbuf)[0] = Py_UNICODE_HIGH_SURROGATE(c); \
- (*outbuf)[1] = Py_UNICODE_LOW_SURROGATE(c); \
- NEXT_OUT(2)
-#else
-# define WRITEUCS4(c) \
- REQUIRE_OUTBUF(1) \
- **outbuf = (Py_UNICODE)(c); \
- NEXT_OUT(1)
-#endif
-
#define _TRYMAP_ENC(m, assi, val) \
((m)->map != NULL && (val) >= (m)->bottom && \
(val)<= (m)->top && ((assi) = (m)->map[(val) - \
@@ -167,24 +173,41 @@ static const struct dbcs_map *mapping_list;
#define TRYMAP_ENC(charset, assi, uni) \
if TRYMAP_ENC_COND(charset, assi, uni)
-#define _TRYMAP_DEC(m, assi, val) \
- ((m)->map != NULL && (val) >= (m)->bottom && \
- (val)<= (m)->top && ((assi) = (m)->map[(val) - \
- (m)->bottom]) != UNIINV)
-#define TRYMAP_DEC(charset, assi, c1, c2) \
- if _TRYMAP_DEC(&charset##_decmap[c1], assi, c2)
+Py_LOCAL_INLINE(int)
+_TRYMAP_DEC_WRITE(_PyUnicodeWriter *writer, Py_UCS4 c)
+{
+ if (c == UNIINV || _PyUnicodeWriter_WriteChar(writer, c) < 0)
+ return UNIINV;
+ else
+ return c;
+}
-#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) \
- ((m)->map != NULL && (val) >= (m)->bottom && \
- (val)<= (m)->top && \
- ((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \
+#define _TRYMAP_DEC(m, writer, val) \
+ ((m)->map != NULL && \
+ (val) >= (m)->bottom && \
+ (val)<= (m)->top && \
+ _TRYMAP_DEC_WRITE(writer, (m)->map[(val) - (m)->bottom]) != UNIINV)
+#define _TRYMAP_DEC_CHAR(m, assi, val) \
+ ((m)->map != NULL && \
+ (val) >= (m)->bottom && \
+ (val)<= (m)->top && \
+ ((assi) = (m)->map[(val) - (m)->bottom]) != UNIINV)
+#define TRYMAP_DEC(charset, writer, c1, c2) \
+ if _TRYMAP_DEC(&charset##_decmap[c1], writer, c2)
+#define TRYMAP_DEC_CHAR(charset, assi, c1, c2) \
+ if _TRYMAP_DEC_CHAR(&charset##_decmap[c1], assi, c2)
+
+#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) \
+ ((m)->map != NULL && (val) >= (m)->bottom && \
+ (val)<= (m)->top && \
+ ((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \
(((asshi) = (m)->map[((val) - (m)->bottom)*3 + 1]), 1) && \
(((asslo) = (m)->map[((val) - (m)->bottom)*3 + 2]), 1))
#define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) \
if _TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], \
assplane, asshi, asslo, (uni) & 0xff)
-#define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2) \
- if _TRYMAP_DEC(&charset##_decmap[plane][c1], assi, c2)
+#define TRYMAP_DEC_MPLANE(charset, writer, plane, c1, c2) \
+ if _TRYMAP_DEC(&charset##_decmap[plane][c1], writer, c2)
#if Py_UNICODE_SIZE == 2
#define DECODE_SURROGATE(c) \
@@ -323,7 +346,7 @@ find_pairencmap(ucs2_t body, ucs2_t modifier,
const struct pair_encodemap *haystack, int haystacksize)
{
int pos, min, max;
- ucs4_t value = body << 16 | modifier;
+ Py_UCS4 value = body << 16 | modifier;
min = 0;
max = haystacksize;
diff --git a/Modules/cjkcodecs/emu_jisx0213_2000.h b/Modules/cjkcodecs/emu_jisx0213_2000.h
index 4227fb2..877337e 100644
--- a/Modules/cjkcodecs/emu_jisx0213_2000.h
+++ b/Modules/cjkcodecs/emu_jisx0213_2000.h
@@ -38,6 +38,9 @@
((c1) == 0x7E && (c2) == 0x7E))) \
return EMULATE_JISX0213_2000_DECODE_INVALID;
-#define EMULATE_JISX0213_2000_DECODE_PLANE2(assi, c1, c2) \
+#define EMULATE_JISX0213_2000_DECODE_PLANE2(writer, c1, c2) \
+ if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B) \
+ OUTCHAR(0x9B1D);
+#define EMULATE_JISX0213_2000_DECODE_PLANE2_CHAR(assi, c1, c2) \
if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B) \
(assi) = 0x9B1D;
diff --git a/Modules/cjkcodecs/mappings_cn.h b/Modules/cjkcodecs/mappings_cn.h
index a6dcebf..1f8c299 100644
--- a/Modules/cjkcodecs/mappings_cn.h
+++ b/Modules/cjkcodecs/mappings_cn.h
@@ -4049,7 +4049,7 @@ __gb18030ext_encmap+3126,0,100},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
static const struct _gb18030_to_unibmp_ranges {
- Py_UNICODE first, last;
+ Py_UCS4 first, last;
DBCHAR base;
} gb18030_to_unibmp_ranges[] = {
{128,163,0},{165,166,36},{169,175,38},{178,182,45},{184,214,50},{216,223,81},{
diff --git a/Modules/cjkcodecs/mappings_jisx0213_pair.h b/Modules/cjkcodecs/mappings_jisx0213_pair.h
index eda8e9e..729e4bc 100644
--- a/Modules/cjkcodecs/mappings_jisx0213_pair.h
+++ b/Modules/cjkcodecs/mappings_jisx0213_pair.h
@@ -3,7 +3,7 @@
static const struct widedbcs_index *jisx0213_pair_decmap;
static const struct pair_encodemap *jisx0213_pair_encmap;
#else
-static const ucs4_t __jisx0213_pair_decmap[49] = {
+static const Py_UCS4 __jisx0213_pair_decmap[49] = {
810234010,810365082,810496154,810627226,810758298,816525466,816656538,
816787610,816918682,817049754,817574042,818163866,818426010,838283418,
15074048,U,U,U,39060224,39060225,42730240,42730241,39387904,39387905,39453440,
diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c
index c032cdb..7e16b63 100644
--- a/Modules/cjkcodecs/multibytecodec.c
+++ b/Modules/cjkcodecs/multibytecodec.c
@@ -17,8 +17,8 @@ typedef struct {
typedef struct {
const unsigned char *inbuf, *inbuf_top, *inbuf_end;
- Py_UNICODE *outbuf, *outbuf_end;
- PyObject *excobj, *outobj;
+ PyObject *excobj;
+ _PyUnicodeWriter writer;
} MultibyteDecodeBuffer;
PyDoc_STRVAR(MultibyteCodec_Encode__doc__,
@@ -197,29 +197,6 @@ expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
goto errorexit; \
}
-static int
-expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize)
-{
- Py_ssize_t orgpos, orgsize;
-
- orgpos = (Py_ssize_t)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj));
- orgsize = PyUnicode_GET_SIZE(buf->outobj);
- if (PyUnicode_Resize(&buf->outobj, orgsize + (
- esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)
- return -1;
-
- buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj) + orgpos;
- buf->outbuf_end = PyUnicode_AS_UNICODE(buf->outobj)
- + PyUnicode_GET_SIZE(buf->outobj);
-
- return 0;
-}
-#define REQUIRE_DECODEBUFFER(buf, s) { \
- if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \
- if (expand_decodebuffer(buf, s) == -1) \
- goto errorexit; \
-}
-
/**
* MultibyteCodec object
@@ -374,7 +351,7 @@ multibytecodec_decerror(MultibyteCodec *codec,
PyObject *errors, Py_ssize_t e)
{
PyObject *retobj = NULL, *retuni = NULL;
- Py_ssize_t retunisize, newpos;
+ Py_ssize_t newpos;
const char *reason;
Py_ssize_t esize, start, end;
@@ -385,7 +362,6 @@ multibytecodec_decerror(MultibyteCodec *codec,
else {
switch (e) {
case MBERR_TOOSMALL:
- REQUIRE_DECODEBUFFER(buf, -1);
return 0; /* retry it */
case MBERR_TOOFEW:
reason = "incomplete multibyte sequence";
@@ -403,8 +379,9 @@ multibytecodec_decerror(MultibyteCodec *codec,
}
if (errors == ERROR_REPLACE) {
- REQUIRE_DECODEBUFFER(buf, 1);
- *buf->outbuf++ = Py_UNICODE_REPLACEMENT_CHARACTER;
+ if (_PyUnicodeWriter_WriteChar(&buf->writer,
+ Py_UNICODE_REPLACEMENT_CHARACTER) < 0)
+ goto errorexit;
}
if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
buf->inbuf += esize;
@@ -447,15 +424,8 @@ multibytecodec_decerror(MultibyteCodec *codec,
goto errorexit;
}
- if (PyUnicode_AsUnicode(retuni) == NULL)
+ if (_PyUnicodeWriter_WriteStr(&buf->writer, retuni) < 0)
goto errorexit;
- retunisize = PyUnicode_GET_SIZE(retuni);
- if (retunisize > 0) {
- REQUIRE_DECODEBUFFER(buf, retunisize);
- memcpy((char *)buf->outbuf, PyUnicode_AS_UNICODE(retuni),
- retunisize * Py_UNICODE_SIZE);
- buf->outbuf += retunisize;
- }
newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
if (newpos < 0 && !PyErr_Occurred())
@@ -617,10 +587,10 @@ MultibyteCodec_Decode(MultibyteCodecObject *self,
{
MultibyteCodec_State state;
MultibyteDecodeBuffer buf;
- PyObject *errorcb;
+ PyObject *errorcb, *res;
Py_buffer pdata;
const char *data, *errors = NULL;
- Py_ssize_t datalen, finalsize;
+ Py_ssize_t datalen;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|z:decode",
codeckwarglist, &pdata, &errors))
@@ -640,29 +610,22 @@ MultibyteCodec_Decode(MultibyteCodecObject *self,
return make_tuple(PyUnicode_New(0, 0), 0);
}
+ _PyUnicodeWriter_Init(&buf.writer, datalen);
buf.excobj = NULL;
buf.inbuf = buf.inbuf_top = (unsigned char *)data;
buf.inbuf_end = buf.inbuf_top + datalen;
- buf.outobj = PyUnicode_FromUnicode(NULL, datalen);
- if (buf.outobj == NULL)
- goto errorexit;
- buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj);
- if (buf.outbuf == NULL)
- goto errorexit;
- buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj);
if (self->codec->decinit != NULL &&
self->codec->decinit(&state, self->codec->config) != 0)
goto errorexit;
while (buf.inbuf < buf.inbuf_end) {
- Py_ssize_t inleft, outleft, r;
+ Py_ssize_t inleft, r;
inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
- outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
r = self->codec->decode(&state, self->codec->config,
- &buf.inbuf, inleft, &buf.outbuf, outleft);
+ &buf.inbuf, inleft, &buf.writer);
if (r == 0)
break;
else if (multibytecodec_decerror(self->codec, &state,
@@ -670,23 +633,20 @@ MultibyteCodec_Decode(MultibyteCodecObject *self,
goto errorexit;
}
- finalsize = (Py_ssize_t)(buf.outbuf -
- PyUnicode_AS_UNICODE(buf.outobj));
-
- if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
- if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
- goto errorexit;
+ res = _PyUnicodeWriter_Finish(&buf.writer);
+ if (res == NULL)
+ goto errorexit;
PyBuffer_Release(&pdata);
Py_XDECREF(buf.excobj);
ERROR_DECREF(errorcb);
- return make_tuple(buf.outobj, datalen);
+ return make_tuple(res, datalen);
errorexit:
PyBuffer_Release(&pdata);
ERROR_DECREF(errorcb);
Py_XDECREF(buf.excobj);
- Py_XDECREF(buf.outobj);
+ _PyUnicodeWriter_Dealloc(&buf.writer);
return NULL;
}
@@ -859,17 +819,7 @@ decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
{
buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
buf->inbuf_end = buf->inbuf_top + size;
- if (buf->outobj == NULL) { /* only if outobj is not allocated yet */
- buf->outobj = PyUnicode_FromUnicode(NULL, size);
- if (buf->outobj == NULL)
- return -1;
- buf->outbuf = PyUnicode_AsUnicode(buf->outobj);
- if (buf->outbuf == NULL)
- return -1;
- buf->outbuf_end = buf->outbuf +
- PyUnicode_GET_SIZE(buf->outobj);
- }
-
+ _PyUnicodeWriter_Init(&buf->writer, size);
return 0;
}
@@ -878,14 +828,13 @@ decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
MultibyteDecodeBuffer *buf)
{
while (buf->inbuf < buf->inbuf_end) {
- Py_ssize_t inleft, outleft;
+ Py_ssize_t inleft;
Py_ssize_t r;
inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
- outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
r = ctx->codec->decode(&ctx->state, ctx->codec->config,
- &buf->inbuf, inleft, &buf->outbuf, outleft);
+ &buf->inbuf, inleft, &buf->writer);
if (r == 0 || r == MBERR_TOOFEW)
break;
else if (multibytecodec_decerror(ctx->codec, &ctx->state,
@@ -1058,8 +1007,9 @@ mbidecoder_decode(MultibyteIncrementalDecoderObject *self,
MultibyteDecodeBuffer buf;
char *data, *wdata = NULL;
Py_buffer pdata;
- Py_ssize_t wsize, finalsize = 0, size, origpending;
+ Py_ssize_t wsize, size, origpending;
int final = 0;
+ PyObject *res;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i:decode",
incrementalkwarglist, &pdata, &final))
@@ -1067,7 +1017,8 @@ mbidecoder_decode(MultibyteIncrementalDecoderObject *self,
data = pdata.buf;
size = pdata.len;
- buf.outobj = buf.excobj = NULL;
+ _PyUnicodeWriter_Init(&buf.writer, 1);
+ buf.excobj = NULL;
origpending = self->pendingsize;
if (self->pendingsize == 0) {
@@ -1109,23 +1060,22 @@ mbidecoder_decode(MultibyteIncrementalDecoderObject *self,
goto errorexit;
}
- finalsize = (Py_ssize_t)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj));
- if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
- if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
- goto errorexit;
+ res = _PyUnicodeWriter_Finish(&buf.writer);
+ if (res == NULL)
+ goto errorexit;
PyBuffer_Release(&pdata);
if (wdata != data)
PyMem_Del(wdata);
Py_XDECREF(buf.excobj);
- return buf.outobj;
+ return res;
errorexit:
PyBuffer_Release(&pdata);
if (wdata != NULL && wdata != data)
PyMem_Del(wdata);
Py_XDECREF(buf.excobj);
- Py_XDECREF(buf.outobj);
+ _PyUnicodeWriter_Dealloc(&buf.writer);
return NULL;
}
@@ -1265,13 +1215,14 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self,
const char *method, Py_ssize_t sizehint)
{
MultibyteDecodeBuffer buf;
- PyObject *cres;
- Py_ssize_t rsize, finalsize = 0;
+ PyObject *cres, *res;
+ Py_ssize_t rsize;
if (sizehint == 0)
return PyUnicode_New(0, 0);
- buf.outobj = buf.excobj = NULL;
+ _PyUnicodeWriter_Init(&buf.writer, 1);
+ buf.excobj = NULL;
cres = NULL;
for (;;) {
@@ -1340,29 +1291,27 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self,
goto errorexit;
}
- finalsize = (Py_ssize_t)(buf.outbuf -
- PyUnicode_AS_UNICODE(buf.outobj));
Py_DECREF(cres);
cres = NULL;
- if (sizehint < 0 || finalsize != 0 || rsize == 0)
+ if (sizehint < 0 || buf.writer.pos != 0 || rsize == 0)
break;
sizehint = 1; /* read 1 more byte and retry */
}
- if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
- if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
- goto errorexit;
+ res = _PyUnicodeWriter_Finish(&buf.writer);
+ if (res == NULL)
+ goto errorexit;
Py_XDECREF(cres);
Py_XDECREF(buf.excobj);
- return buf.outobj;
+ return res;
errorexit:
Py_XDECREF(cres);
Py_XDECREF(buf.excobj);
- Py_XDECREF(buf.outobj);
+ _PyUnicodeWriter_Dealloc(&buf.writer);
return NULL;
}
diff --git a/Modules/cjkcodecs/multibytecodec.h b/Modules/cjkcodecs/multibytecodec.h
index 1b6ef55..8e71266 100644
--- a/Modules/cjkcodecs/multibytecodec.h
+++ b/Modules/cjkcodecs/multibytecodec.h
@@ -10,12 +10,6 @@
extern "C" {
#endif
-#ifdef uint32_t
-typedef uint32_t ucs4_t;
-#else
-typedef unsigned int ucs4_t;
-#endif
-
#ifdef uint16_t
typedef uint16_t ucs2_t, DBCHAR;
#else
@@ -27,7 +21,7 @@ typedef union {
int i;
unsigned char c[8];
ucs2_t u2[4];
- ucs4_t u4[2];
+ Py_UCS4 u4[2];
} MultibyteCodec_State;
typedef int (*mbcodec_init)(const void *config);
@@ -44,7 +38,7 @@ typedef Py_ssize_t (*mbencodereset_func)(MultibyteCodec_State *state,
typedef Py_ssize_t (*mbdecode_func)(MultibyteCodec_State *state,
const void *config,
const unsigned char **inbuf, Py_ssize_t inleft,
- Py_UNICODE **outbuf, Py_ssize_t outleft);
+ _PyUnicodeWriter *writer);
typedef int (*mbdecodeinit_func)(MultibyteCodec_State *state,
const void *config);
typedef Py_ssize_t (*mbdecodereset_func)(MultibyteCodec_State *state,