diff options
Diffstat (limited to 'Modules/cjkcodecs')
-rw-r--r-- | Modules/cjkcodecs/_codecs_cn.c | 14 | ||||
-rw-r--r-- | Modules/cjkcodecs/_codecs_hk.c | 2 | ||||
-rw-r--r-- | Modules/cjkcodecs/_codecs_iso2022.c | 2 | ||||
-rw-r--r-- | Modules/cjkcodecs/_codecs_jp.c | 34 | ||||
-rw-r--r-- | Modules/cjkcodecs/_codecs_kr.c | 18 | ||||
-rw-r--r-- | Modules/cjkcodecs/_codecs_tw.c | 4 | ||||
-rw-r--r-- | Modules/cjkcodecs/multibytecodec.c | 48 |
7 files changed, 72 insertions, 50 deletions
diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c index ab4e659..9e9e96c 100644 --- a/Modules/cjkcodecs/_codecs_cn.c +++ b/Modules/cjkcodecs/_codecs_cn.c @@ -85,7 +85,7 @@ DECODER(gb2312) TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) { NEXT(2, 1) } - else return 2; + else return 1; } return 0; @@ -141,7 +141,7 @@ DECODER(gbk) REQUIRE_INBUF(2) GBK_DECODE(c, IN2, **outbuf) - else return 2; + else return 1; NEXT(2, 1) } @@ -267,7 +267,7 @@ DECODER(gb18030) c3 = IN3; c4 = IN4; if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39) - return 4; + return 1; c -= 0x81; c2 -= 0x30; c3 -= 0x81; c4 -= 0x30; @@ -292,12 +292,12 @@ DECODER(gb18030) continue; } } - return 4; + return 1; } GBK_DECODE(c, c2, **outbuf) else TRYMAP_DEC(gb18030ext, **outbuf, c, c2); - else return 2; + else return 1; NEXT(2, 1) } @@ -400,7 +400,7 @@ DECODER(hz) else if (c2 == '\n') ; /* line-continuation */ else - return 2; + return 1; NEXT(2, 0); continue; } @@ -419,7 +419,7 @@ DECODER(hz) NEXT(2, 1) } else - return 2; + return 1; } } diff --git a/Modules/cjkcodecs/_codecs_hk.c b/Modules/cjkcodecs/_codecs_hk.c index 558a42f..d3ad04b 100644 --- a/Modules/cjkcodecs/_codecs_hk.c +++ b/Modules/cjkcodecs/_codecs_hk.c @@ -161,7 +161,7 @@ DECODER(big5hkscs) case 0x8864: WRITE2(0x00ca, 0x030c); break; case 0x88a3: WRITE2(0x00ea, 0x0304); break; case 0x88a5: WRITE2(0x00ea, 0x030c); break; - default: return 2; + default: return 1; } NEXT(2, 2) /* all decoded codepoints are pairs, above. */ diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c index 25c1a36..cbc1542 100644 --- a/Modules/cjkcodecs/_codecs_iso2022.c +++ b/Modules/cjkcodecs/_codecs_iso2022.c @@ -123,7 +123,7 @@ struct iso2022_config { CODEC_INIT(iso2022) { - const struct iso2022_designation *desig = CONFIG_DESIGNATIONS; + const struct iso2022_designation *desig; for (desig = CONFIG_DESIGNATIONS; desig->mark; desig++) if (desig->initializer != NULL && desig->initializer() != 0) return -1; diff --git a/Modules/cjkcodecs/_codecs_jp.c b/Modules/cjkcodecs/_codecs_jp.c index a05e01b..a500696 100644 --- a/Modules/cjkcodecs/_codecs_jp.c +++ b/Modules/cjkcodecs/_codecs_jp.c @@ -112,7 +112,7 @@ DECODER(cp932) TRYMAP_DEC(cp932ext, **outbuf, c, c2); else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){ if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) - return 2; + return 1; c = (c < 0xe0 ? c - 0x81 : c - 0xc1); c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); @@ -120,7 +120,7 @@ DECODER(cp932) c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21; TRYMAP_DEC(jisx0208, **outbuf, c, c2); - else return 2; + else return 1; } else if (c >= 0xf0 && c <= 0xf9) { if ((c2 >= 0x40 && c2 <= 0x7e) || @@ -128,10 +128,10 @@ DECODER(cp932) OUT1(0xe000 + 188 * (c - 0xf0) + (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41)) else - return 2; + return 1; } else - return 2; + return 1; NEXT(2, 1) } @@ -256,7 +256,7 @@ DECODER(euc_jis_2004) NEXT(2, 1) } else - return 2; + return 1; } else if (c == 0x8f) { unsigned char c2, c3; @@ -274,7 +274,7 @@ DECODER(euc_jis_2004) continue; } else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ; - else return 3; + else return 1; NEXT(3, 1) } else { @@ -300,7 +300,7 @@ DECODER(euc_jis_2004) NEXT(2, 2) continue; } - else return 2; + else return 1; NEXT(2, 1) } } @@ -388,7 +388,7 @@ DECODER(euc_jp) NEXT(2, 1) } else - return 2; + return 1; } else if (c == 0x8f) { unsigned char c2, c3; @@ -401,7 +401,7 @@ DECODER(euc_jp) NEXT(3, 1) } else - return 3; + return 1; } else { unsigned char c2; @@ -417,7 +417,7 @@ DECODER(euc_jp) #endif TRYMAP_DEC(jisx0208, **outbuf, c ^ 0x80, c2 ^ 0x80) ; - else return 2; + else return 1; NEXT(2, 1) } } @@ -502,7 +502,7 @@ DECODER(shift_jis) REQUIRE_INBUF(2) c2 = IN2; if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) - return 2; + return 1; c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1); c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); @@ -522,10 +522,10 @@ DECODER(shift_jis) continue; } else - return 2; + return 1; } else - return 2; + return 1; NEXT(1, 1) /* JIS X 0201 */ } @@ -645,7 +645,7 @@ DECODER(shift_jis_2004) REQUIRE_INBUF(2) c2 = IN2; if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) - return 2; + return 1; c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1); c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); @@ -671,7 +671,7 @@ DECODER(shift_jis_2004) NEXT_OUT(2) } else - return 2; + return 1; NEXT_IN(2) } else { /* Plane 2 */ @@ -689,13 +689,13 @@ DECODER(shift_jis_2004) continue; } else - return 2; + return 1; NEXT(2, 1) } continue; } else - return 2; + return 1; NEXT(1, 1) /* JIS X 0201 */ } diff --git a/Modules/cjkcodecs/_codecs_kr.c b/Modules/cjkcodecs/_codecs_kr.c index 9272e36..f5697dd 100644 --- a/Modules/cjkcodecs/_codecs_kr.c +++ b/Modules/cjkcodecs/_codecs_kr.c @@ -123,7 +123,7 @@ DECODER(euc_kr) if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE || (*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE || (*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE) - return 8; + return 1; c = (*inbuf)[3]; if (0xa1 <= c && c <= 0xbe) @@ -143,7 +143,7 @@ DECODER(euc_kr) jong = NONE; if (cho == NONE || jung == NONE || jong == NONE) - return 8; + return 1; OUT1(0xac00 + cho*588 + jung*28 + jong); NEXT(8, 1) @@ -152,7 +152,7 @@ DECODER(euc_kr) NEXT(2, 1) } else - return 2; + return 1; } return 0; @@ -208,7 +208,7 @@ DECODER(cp949) REQUIRE_INBUF(2) TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80); else TRYMAP_DEC(cp949ext, **outbuf, c, IN2); - else return 2; + else return 1; NEXT(2, 1) } @@ -375,7 +375,7 @@ DECODER(johab) i_jong = johabidx_jongseong[c_jong]; if (i_cho == NONE || i_jung == NONE || i_jong == NONE) - return 2; + return 1; /* we don't use U+1100 hangul jamo yet. */ if (i_cho == FILL) { @@ -391,7 +391,7 @@ DECODER(johab) OUT1(0x3100 | johabjamo_jungseong[c_jung]) else - return 2; + return 1; } } else { if (i_jung == FILL) { @@ -399,7 +399,7 @@ DECODER(johab) OUT1(0x3100 | johabjamo_choseong[c_cho]) else - return 2; + return 1; } else OUT1(0xac00 + @@ -414,7 +414,7 @@ DECODER(johab) c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) || (c2 & 0x7f) == 0x7f || (c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3))) - return 2; + return 1; else { unsigned char t1, t2; @@ -425,7 +425,7 @@ DECODER(johab) t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21; TRYMAP_DEC(ksx1001, **outbuf, t1, t2); - else return 2; + else return 1; NEXT(2, 1) } } diff --git a/Modules/cjkcodecs/_codecs_tw.c b/Modules/cjkcodecs/_codecs_tw.c index 38cf723..916298d 100644 --- a/Modules/cjkcodecs/_codecs_tw.c +++ b/Modules/cjkcodecs/_codecs_tw.c @@ -55,7 +55,7 @@ DECODER(big5) TRYMAP_DEC(big5, **outbuf, c, IN2) { NEXT(2, 1) } - else return 2; + else return 1; } return 0; @@ -109,7 +109,7 @@ DECODER(cp950) TRYMAP_DEC(cp950ext, **outbuf, c, IN2); else TRYMAP_DEC(big5, **outbuf, c, IN2); - else return 2; + else return 1; NEXT(2, 1) } diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c index 7b04f020..abad251 100644 --- a/Modules/cjkcodecs/multibytecodec.c +++ b/Modules/cjkcodecs/multibytecodec.c @@ -443,10 +443,12 @@ multibytecodec_decerror(MultibyteCodec *codec, goto errorexit; } + if (PyUnicode_AsUnicode(retuni) == NULL) + goto errorexit; retunisize = PyUnicode_GET_SIZE(retuni); if (retunisize > 0) { REQUIRE_DECODEBUFFER(buf, retunisize); - memcpy((char *)buf->outbuf, PyUnicode_AS_DATA(retuni), + memcpy((char *)buf->outbuf, PyUnicode_AS_UNICODE(retuni), retunisize * Py_UNICODE_SIZE); buf->outbuf += retunisize; } @@ -483,6 +485,7 @@ multibytecodec_encode(MultibyteCodec *codec, return PyBytes_FromStringAndSize(NULL, 0); buf.excobj = NULL; + buf.outobj = NULL; buf.inbuf = buf.inbuf_top = *data; buf.inbuf_end = buf.inbuf_top + datalen; @@ -573,8 +576,11 @@ MultibyteCodec_Encode(MultibyteCodecObject *self, } } - data = PyUnicode_AS_UNICODE(arg); - datalen = PyUnicode_GET_SIZE(arg); + data = PyUnicode_AsUnicodeAndSize(arg, &datalen); + if (data == NULL) { + Py_XDECREF(ucvt); + return NULL; + } errorcb = internal_error_callback(errors); if (errorcb == NULL) { @@ -627,7 +633,7 @@ MultibyteCodec_Decode(MultibyteCodecObject *self, if (datalen == 0) { PyBuffer_Release(&pdata); ERROR_DECREF(errorcb); - return make_tuple(PyUnicode_FromUnicode(NULL, 0), 0); + return make_tuple(PyUnicode_New(0, 0), 0); } buf.excobj = NULL; @@ -637,6 +643,8 @@ MultibyteCodec_Decode(MultibyteCodecObject *self, if (buf.outobj == NULL) goto errorexit; buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj); + if (buf.outbuf == NULL) + goto errorexit; buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj); if (self->codec->decinit != NULL && @@ -742,6 +750,7 @@ encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx, PyObject *ucvt, *r = NULL; Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL; Py_ssize_t datalen, origpending; + wchar_t *data; if (PyUnicode_Check(unistr)) ucvt = NULL; @@ -757,7 +766,9 @@ encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx, } } - datalen = PyUnicode_GET_SIZE(unistr); + data = PyUnicode_AsUnicodeAndSize(unistr, &datalen); + if (data == NULL) + goto errorexit; origpending = ctx->pendingsize; if (origpending > 0) { @@ -848,7 +859,9 @@ decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data, buf->outobj = PyUnicode_FromUnicode(NULL, size); if (buf->outobj == NULL) return -1; - buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj); + buf->outbuf = PyUnicode_AsUnicode(buf->outobj); + if (buf->outbuf == NULL) + return -1; buf->outbuf_end = buf->outbuf + PyUnicode_GET_SIZE(buf->outobj); } @@ -900,11 +913,17 @@ mbiencoder_encode(MultibyteIncrementalEncoderObject *self, static PyObject * mbiencoder_reset(MultibyteIncrementalEncoderObject *self) { - if (self->codec->decreset != NULL && - self->codec->decreset(&self->state, self->codec->config) != 0) - return NULL; + /* Longest output: 4 bytes (b'\x0F\x1F(B') with ISO 2022 */ + unsigned char buffer[4], *outbuf; + Py_ssize_t r; + if (self->codec->encreset != NULL) { + outbuf = buffer; + r = self->codec->encreset(&self->state, self->codec->config, + &outbuf, sizeof(buffer)); + if (r != 0) + return NULL; + } self->pendingsize = 0; - Py_RETURN_NONE; } @@ -1246,7 +1265,7 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self, Py_ssize_t rsize, finalsize = 0; if (sizehint == 0) - return PyUnicode_FromUnicode(NULL, 0); + return PyUnicode_New(0, 0); buf.outobj = buf.excobj = NULL; cres = NULL; @@ -1572,12 +1591,13 @@ mbstreamwriter_iwrite(MultibyteStreamWriterObject *self, PyObject *unistr) { PyObject *str, *wr; + _Py_IDENTIFIER(write); str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0); if (str == NULL) return -1; - wr = PyObject_CallMethod(self->stream, "write", "O", str); + wr = _PyObject_CallMethodId(self->stream, &PyId_write, "O", str); Py_DECREF(str); if (wr == NULL) return -1; @@ -1643,7 +1663,9 @@ mbstreamwriter_reset(MultibyteStreamWriterObject *self) assert(PyBytes_Check(pwrt)); if (PyBytes_Size(pwrt) > 0) { PyObject *wr; - wr = PyObject_CallMethod(self->stream, "write", "O", pwrt); + _Py_IDENTIFIER(write); + + wr = _PyObject_CallMethodId(self->stream, &PyId_write, "O", pwrt); if (wr == NULL) { Py_DECREF(pwrt); return NULL; |