summaryrefslogtreecommitdiffstats
path: root/Modules/cjkcodecs/_codecs_cn.c
diff options
context:
space:
mode:
authorChristopher Thorne <libcthorne@users.noreply.github.com>2018-11-01 10:48:49 (GMT)
committerMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2018-11-01 10:48:49 (GMT)
commitac22f6aa989f18c33c12615af1c66c73cf75d5e7 (patch)
treebb21f3018f9b5e4b40ede33ce78bba1b13980f86 /Modules/cjkcodecs/_codecs_cn.c
parent4b5e62dbb22a3593e0db266c12f805b727a42b00 (diff)
downloadcpython-ac22f6aa989f18c33c12615af1c66c73cf75d5e7.zip
cpython-ac22f6aa989f18c33c12615af1c66c73cf75d5e7.tar.gz
cpython-ac22f6aa989f18c33c12615af1c66c73cf75d5e7.tar.bz2
bpo-33578: Add getstate/setstate for CJK codec (GH-6984)
This implements getstate and setstate for the cjkcodecs multibyte incremental encoders/decoders, primarily to fix issues with seek/tell. The encoder getstate/setstate is slightly tricky as the "state" is pending bytes + MultibyteCodec_State but only an integer can be returned. The approach I've taken is to encode this data into a long, similar to how .tell() encodes a "cookie_type" as a long. https://bugs.python.org/issue33578
Diffstat (limited to 'Modules/cjkcodecs/_codecs_cn.c')
-rw-r--r--Modules/cjkcodecs/_codecs_cn.c38
1 files changed, 22 insertions, 16 deletions
diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c
index 1fcc220..8a62f7e 100644
--- a/Modules/cjkcodecs/_codecs_cn.c
+++ b/Modules/cjkcodecs/_codecs_cn.c
@@ -52,6 +52,12 @@
}
/*
+ * codecs in this file use the first byte of MultibyteCodec_State.c[8]
+ * to store a 0 or 1 state value
+ */
+#define CN_STATE_OFFSET 0
+
+/*
* GB2312 codec
*/
@@ -329,15 +335,15 @@ DECODER(gb18030)
ENCODER_INIT(hz)
{
- state->i = 0;
+ state->c[CN_STATE_OFFSET] = 0;
return 0;
}
ENCODER_RESET(hz)
{
- if (state->i != 0) {
+ if (state->c[CN_STATE_OFFSET] != 0) {
WRITEBYTE2('~', '}');
- state->i = 0;
+ state->c[CN_STATE_OFFSET] = 0;
NEXT_OUT(2);
}
return 0;
@@ -350,10 +356,10 @@ ENCODER(hz)
DBCHAR code;
if (c < 0x80) {
- if (state->i) {
+ if (state->c[CN_STATE_OFFSET]) {
WRITEBYTE2('~', '}');
NEXT_OUT(2);
- state->i = 0;
+ state->c[CN_STATE_OFFSET] = 0;
}
WRITEBYTE1((unsigned char)c);
NEXT(1, 1);
@@ -375,10 +381,10 @@ ENCODER(hz)
if (code & 0x8000) /* MSB set: GBK */
return 1;
- if (state->i == 0) {
+ if (state->c[CN_STATE_OFFSET] == 0) {
WRITEBYTE4('~', '{', code >> 8, code & 0xff);
NEXT(1, 4);
- state->i = 1;
+ state->c[CN_STATE_OFFSET] = 1;
}
else {
WRITEBYTE2(code >> 8, code & 0xff);
@@ -391,13 +397,13 @@ ENCODER(hz)
DECODER_INIT(hz)
{
- state->i = 0;
+ state->c[CN_STATE_OFFSET] = 0;
return 0;
}
DECODER_RESET(hz)
{
- state->i = 0;
+ state->c[CN_STATE_OFFSET] = 0;
return 0;
}
@@ -411,14 +417,14 @@ DECODER(hz)
unsigned char c2 = INBYTE2;
REQUIRE_INBUF(2);
- if (c2 == '~' && state->i == 0)
+ if (c2 == '~' && state->c[CN_STATE_OFFSET] == 0)
OUTCHAR('~');
- else if (c2 == '{' && state->i == 0)
- state->i = 1; /* set GB */
- else if (c2 == '\n' && state->i == 0)
+ else if (c2 == '{' && state->c[CN_STATE_OFFSET] == 0)
+ state->c[CN_STATE_OFFSET] = 1; /* set GB */
+ else if (c2 == '\n' && state->c[CN_STATE_OFFSET] == 0)
; /* line-continuation */
- else if (c2 == '}' && state->i == 1)
- state->i = 0; /* set ASCII */
+ else if (c2 == '}' && state->c[CN_STATE_OFFSET] == 1)
+ state->c[CN_STATE_OFFSET] = 0; /* set ASCII */
else
return 1;
NEXT_IN(2);
@@ -428,7 +434,7 @@ DECODER(hz)
if (c & 0x80)
return 1;
- if (state->i == 0) { /* ASCII mode */
+ if (state->c[CN_STATE_OFFSET] == 0) { /* ASCII mode */
OUTCHAR(c);
NEXT_IN(1);
}