summaryrefslogtreecommitdiffstats
path: root/Modules/cjkcodecs
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/cjkcodecs')
-rw-r--r--Modules/cjkcodecs/_codecs_cn.c14
-rw-r--r--Modules/cjkcodecs/_codecs_hk.c2
-rw-r--r--Modules/cjkcodecs/_codecs_iso2022.c2
-rw-r--r--Modules/cjkcodecs/_codecs_jp.c34
-rw-r--r--Modules/cjkcodecs/_codecs_kr.c18
-rw-r--r--Modules/cjkcodecs/_codecs_tw.c4
-rw-r--r--Modules/cjkcodecs/multibytecodec.c48
7 files changed, 72 insertions, 50 deletions
diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c
index ab4e659..9e9e96c 100644
--- a/Modules/cjkcodecs/_codecs_cn.c
+++ b/Modules/cjkcodecs/_codecs_cn.c
@@ -85,7 +85,7 @@ DECODER(gb2312)
TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
NEXT(2, 1)
}
- else return 2;
+ else return 1;
}
return 0;
@@ -141,7 +141,7 @@ DECODER(gbk)
REQUIRE_INBUF(2)
GBK_DECODE(c, IN2, **outbuf)
- else return 2;
+ else return 1;
NEXT(2, 1)
}
@@ -267,7 +267,7 @@ DECODER(gb18030)
c3 = IN3;
c4 = IN4;
if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39)
- return 4;
+ return 1;
c -= 0x81; c2 -= 0x30;
c3 -= 0x81; c4 -= 0x30;
@@ -292,12 +292,12 @@ DECODER(gb18030)
continue;
}
}
- return 4;
+ return 1;
}
GBK_DECODE(c, c2, **outbuf)
else TRYMAP_DEC(gb18030ext, **outbuf, c, c2);
- else return 2;
+ else return 1;
NEXT(2, 1)
}
@@ -400,7 +400,7 @@ DECODER(hz)
else if (c2 == '\n')
; /* line-continuation */
else
- return 2;
+ return 1;
NEXT(2, 0);
continue;
}
@@ -419,7 +419,7 @@ DECODER(hz)
NEXT(2, 1)
}
else
- return 2;
+ return 1;
}
}
diff --git a/Modules/cjkcodecs/_codecs_hk.c b/Modules/cjkcodecs/_codecs_hk.c
index 558a42f..d3ad04b 100644
--- a/Modules/cjkcodecs/_codecs_hk.c
+++ b/Modules/cjkcodecs/_codecs_hk.c
@@ -161,7 +161,7 @@ DECODER(big5hkscs)
case 0x8864: WRITE2(0x00ca, 0x030c); break;
case 0x88a3: WRITE2(0x00ea, 0x0304); break;
case 0x88a5: WRITE2(0x00ea, 0x030c); break;
- default: return 2;
+ default: return 1;
}
NEXT(2, 2) /* all decoded codepoints are pairs, above. */
diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c
index 25c1a36..cbc1542 100644
--- a/Modules/cjkcodecs/_codecs_iso2022.c
+++ b/Modules/cjkcodecs/_codecs_iso2022.c
@@ -123,7 +123,7 @@ struct iso2022_config {
CODEC_INIT(iso2022)
{
- const struct iso2022_designation *desig = CONFIG_DESIGNATIONS;
+ const struct iso2022_designation *desig;
for (desig = CONFIG_DESIGNATIONS; desig->mark; desig++)
if (desig->initializer != NULL && desig->initializer() != 0)
return -1;
diff --git a/Modules/cjkcodecs/_codecs_jp.c b/Modules/cjkcodecs/_codecs_jp.c
index a05e01b..a500696 100644
--- a/Modules/cjkcodecs/_codecs_jp.c
+++ b/Modules/cjkcodecs/_codecs_jp.c
@@ -112,7 +112,7 @@ DECODER(cp932)
TRYMAP_DEC(cp932ext, **outbuf, c, c2);
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
- return 2;
+ return 1;
c = (c < 0xe0 ? c - 0x81 : c - 0xc1);
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
@@ -120,7 +120,7 @@ DECODER(cp932)
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
TRYMAP_DEC(jisx0208, **outbuf, c, c2);
- else return 2;
+ else return 1;
}
else if (c >= 0xf0 && c <= 0xf9) {
if ((c2 >= 0x40 && c2 <= 0x7e) ||
@@ -128,10 +128,10 @@ DECODER(cp932)
OUT1(0xe000 + 188 * (c - 0xf0) +
(c2 < 0x80 ? c2 - 0x40 : c2 - 0x41))
else
- return 2;
+ return 1;
}
else
- return 2;
+ return 1;
NEXT(2, 1)
}
@@ -256,7 +256,7 @@ DECODER(euc_jis_2004)
NEXT(2, 1)
}
else
- return 2;
+ return 1;
}
else if (c == 0x8f) {
unsigned char c2, c3;
@@ -274,7 +274,7 @@ DECODER(euc_jis_2004)
continue;
}
else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ;
- else return 3;
+ else return 1;
NEXT(3, 1)
}
else {
@@ -300,7 +300,7 @@ DECODER(euc_jis_2004)
NEXT(2, 2)
continue;
}
- else return 2;
+ else return 1;
NEXT(2, 1)
}
}
@@ -388,7 +388,7 @@ DECODER(euc_jp)
NEXT(2, 1)
}
else
- return 2;
+ return 1;
}
else if (c == 0x8f) {
unsigned char c2, c3;
@@ -401,7 +401,7 @@ DECODER(euc_jp)
NEXT(3, 1)
}
else
- return 3;
+ return 1;
}
else {
unsigned char c2;
@@ -417,7 +417,7 @@ DECODER(euc_jp)
#endif
TRYMAP_DEC(jisx0208, **outbuf,
c ^ 0x80, c2 ^ 0x80) ;
- else return 2;
+ else return 1;
NEXT(2, 1)
}
}
@@ -502,7 +502,7 @@ DECODER(shift_jis)
REQUIRE_INBUF(2)
c2 = IN2;
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
- return 2;
+ return 1;
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
@@ -522,10 +522,10 @@ DECODER(shift_jis)
continue;
}
else
- return 2;
+ return 1;
}
else
- return 2;
+ return 1;
NEXT(1, 1) /* JIS X 0201 */
}
@@ -645,7 +645,7 @@ DECODER(shift_jis_2004)
REQUIRE_INBUF(2)
c2 = IN2;
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
- return 2;
+ return 1;
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
@@ -671,7 +671,7 @@ DECODER(shift_jis_2004)
NEXT_OUT(2)
}
else
- return 2;
+ return 1;
NEXT_IN(2)
}
else { /* Plane 2 */
@@ -689,13 +689,13 @@ DECODER(shift_jis_2004)
continue;
}
else
- return 2;
+ return 1;
NEXT(2, 1)
}
continue;
}
else
- return 2;
+ return 1;
NEXT(1, 1) /* JIS X 0201 */
}
diff --git a/Modules/cjkcodecs/_codecs_kr.c b/Modules/cjkcodecs/_codecs_kr.c
index 9272e36..f5697dd 100644
--- a/Modules/cjkcodecs/_codecs_kr.c
+++ b/Modules/cjkcodecs/_codecs_kr.c
@@ -123,7 +123,7 @@ DECODER(euc_kr)
if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE ||
(*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE ||
(*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE)
- return 8;
+ return 1;
c = (*inbuf)[3];
if (0xa1 <= c && c <= 0xbe)
@@ -143,7 +143,7 @@ DECODER(euc_kr)
jong = NONE;
if (cho == NONE || jung == NONE || jong == NONE)
- return 8;
+ return 1;
OUT1(0xac00 + cho*588 + jung*28 + jong);
NEXT(8, 1)
@@ -152,7 +152,7 @@ DECODER(euc_kr)
NEXT(2, 1)
}
else
- return 2;
+ return 1;
}
return 0;
@@ -208,7 +208,7 @@ DECODER(cp949)
REQUIRE_INBUF(2)
TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);
else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);
- else return 2;
+ else return 1;
NEXT(2, 1)
}
@@ -375,7 +375,7 @@ DECODER(johab)
i_jong = johabidx_jongseong[c_jong];
if (i_cho == NONE || i_jung == NONE || i_jong == NONE)
- return 2;
+ return 1;
/* we don't use U+1100 hangul jamo yet. */
if (i_cho == FILL) {
@@ -391,7 +391,7 @@ DECODER(johab)
OUT1(0x3100 |
johabjamo_jungseong[c_jung])
else
- return 2;
+ return 1;
}
} else {
if (i_jung == FILL) {
@@ -399,7 +399,7 @@ DECODER(johab)
OUT1(0x3100 |
johabjamo_choseong[c_cho])
else
- return 2;
+ return 1;
}
else
OUT1(0xac00 +
@@ -414,7 +414,7 @@ DECODER(johab)
c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) ||
(c2 & 0x7f) == 0x7f ||
(c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3)))
- return 2;
+ return 1;
else {
unsigned char t1, t2;
@@ -425,7 +425,7 @@ DECODER(johab)
t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
TRYMAP_DEC(ksx1001, **outbuf, t1, t2);
- else return 2;
+ else return 1;
NEXT(2, 1)
}
}
diff --git a/Modules/cjkcodecs/_codecs_tw.c b/Modules/cjkcodecs/_codecs_tw.c
index 38cf723..916298d 100644
--- a/Modules/cjkcodecs/_codecs_tw.c
+++ b/Modules/cjkcodecs/_codecs_tw.c
@@ -55,7 +55,7 @@ DECODER(big5)
TRYMAP_DEC(big5, **outbuf, c, IN2) {
NEXT(2, 1)
}
- else return 2;
+ else return 1;
}
return 0;
@@ -109,7 +109,7 @@ DECODER(cp950)
TRYMAP_DEC(cp950ext, **outbuf, c, IN2);
else TRYMAP_DEC(big5, **outbuf, c, IN2);
- else return 2;
+ else return 1;
NEXT(2, 1)
}
diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c
index 7b04f020..abad251 100644
--- a/Modules/cjkcodecs/multibytecodec.c
+++ b/Modules/cjkcodecs/multibytecodec.c
@@ -443,10 +443,12 @@ multibytecodec_decerror(MultibyteCodec *codec,
goto errorexit;
}
+ if (PyUnicode_AsUnicode(retuni) == NULL)
+ goto errorexit;
retunisize = PyUnicode_GET_SIZE(retuni);
if (retunisize > 0) {
REQUIRE_DECODEBUFFER(buf, retunisize);
- memcpy((char *)buf->outbuf, PyUnicode_AS_DATA(retuni),
+ memcpy((char *)buf->outbuf, PyUnicode_AS_UNICODE(retuni),
retunisize * Py_UNICODE_SIZE);
buf->outbuf += retunisize;
}
@@ -483,6 +485,7 @@ multibytecodec_encode(MultibyteCodec *codec,
return PyBytes_FromStringAndSize(NULL, 0);
buf.excobj = NULL;
+ buf.outobj = NULL;
buf.inbuf = buf.inbuf_top = *data;
buf.inbuf_end = buf.inbuf_top + datalen;
@@ -573,8 +576,11 @@ MultibyteCodec_Encode(MultibyteCodecObject *self,
}
}
- data = PyUnicode_AS_UNICODE(arg);
- datalen = PyUnicode_GET_SIZE(arg);
+ data = PyUnicode_AsUnicodeAndSize(arg, &datalen);
+ if (data == NULL) {
+ Py_XDECREF(ucvt);
+ return NULL;
+ }
errorcb = internal_error_callback(errors);
if (errorcb == NULL) {
@@ -627,7 +633,7 @@ MultibyteCodec_Decode(MultibyteCodecObject *self,
if (datalen == 0) {
PyBuffer_Release(&pdata);
ERROR_DECREF(errorcb);
- return make_tuple(PyUnicode_FromUnicode(NULL, 0), 0);
+ return make_tuple(PyUnicode_New(0, 0), 0);
}
buf.excobj = NULL;
@@ -637,6 +643,8 @@ MultibyteCodec_Decode(MultibyteCodecObject *self,
if (buf.outobj == NULL)
goto errorexit;
buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj);
+ if (buf.outbuf == NULL)
+ goto errorexit;
buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj);
if (self->codec->decinit != NULL &&
@@ -742,6 +750,7 @@ encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
PyObject *ucvt, *r = NULL;
Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL;
Py_ssize_t datalen, origpending;
+ wchar_t *data;
if (PyUnicode_Check(unistr))
ucvt = NULL;
@@ -757,7 +766,9 @@ encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
}
}
- datalen = PyUnicode_GET_SIZE(unistr);
+ data = PyUnicode_AsUnicodeAndSize(unistr, &datalen);
+ if (data == NULL)
+ goto errorexit;
origpending = ctx->pendingsize;
if (origpending > 0) {
@@ -848,7 +859,9 @@ decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
buf->outobj = PyUnicode_FromUnicode(NULL, size);
if (buf->outobj == NULL)
return -1;
- buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj);
+ buf->outbuf = PyUnicode_AsUnicode(buf->outobj);
+ if (buf->outbuf == NULL)
+ return -1;
buf->outbuf_end = buf->outbuf +
PyUnicode_GET_SIZE(buf->outobj);
}
@@ -900,11 +913,17 @@ mbiencoder_encode(MultibyteIncrementalEncoderObject *self,
static PyObject *
mbiencoder_reset(MultibyteIncrementalEncoderObject *self)
{
- if (self->codec->decreset != NULL &&
- self->codec->decreset(&self->state, self->codec->config) != 0)
- return NULL;
+ /* Longest output: 4 bytes (b'\x0F\x1F(B') with ISO 2022 */
+ unsigned char buffer[4], *outbuf;
+ Py_ssize_t r;
+ if (self->codec->encreset != NULL) {
+ outbuf = buffer;
+ r = self->codec->encreset(&self->state, self->codec->config,
+ &outbuf, sizeof(buffer));
+ if (r != 0)
+ return NULL;
+ }
self->pendingsize = 0;
-
Py_RETURN_NONE;
}
@@ -1246,7 +1265,7 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self,
Py_ssize_t rsize, finalsize = 0;
if (sizehint == 0)
- return PyUnicode_FromUnicode(NULL, 0);
+ return PyUnicode_New(0, 0);
buf.outobj = buf.excobj = NULL;
cres = NULL;
@@ -1572,12 +1591,13 @@ mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
PyObject *unistr)
{
PyObject *str, *wr;
+ _Py_IDENTIFIER(write);
str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0);
if (str == NULL)
return -1;
- wr = PyObject_CallMethod(self->stream, "write", "O", str);
+ wr = _PyObject_CallMethodId(self->stream, &PyId_write, "O", str);
Py_DECREF(str);
if (wr == NULL)
return -1;
@@ -1643,7 +1663,9 @@ mbstreamwriter_reset(MultibyteStreamWriterObject *self)
assert(PyBytes_Check(pwrt));
if (PyBytes_Size(pwrt) > 0) {
PyObject *wr;
- wr = PyObject_CallMethod(self->stream, "write", "O", pwrt);
+ _Py_IDENTIFIER(write);
+
+ wr = _PyObject_CallMethodId(self->stream, &PyId_write, "O", pwrt);
if (wr == NULL) {
Py_DECREF(pwrt);
return NULL;