diff options
author | Christopher Thorne <libcthorne@users.noreply.github.com> | 2018-11-01 10:48:49 (GMT) |
---|---|---|
committer | Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> | 2018-11-01 10:48:49 (GMT) |
commit | ac22f6aa989f18c33c12615af1c66c73cf75d5e7 (patch) | |
tree | bb21f3018f9b5e4b40ede33ce78bba1b13980f86 /Modules/cjkcodecs/_codecs_cn.c | |
parent | 4b5e62dbb22a3593e0db266c12f805b727a42b00 (diff) | |
download | cpython-ac22f6aa989f18c33c12615af1c66c73cf75d5e7.zip cpython-ac22f6aa989f18c33c12615af1c66c73cf75d5e7.tar.gz cpython-ac22f6aa989f18c33c12615af1c66c73cf75d5e7.tar.bz2 |
bpo-33578: Add getstate/setstate for CJK codec (GH-6984)
This implements getstate and setstate for the cjkcodecs multibyte incremental encoders/decoders, primarily to fix issues with seek/tell.
The encoder getstate/setstate is slightly tricky as the "state" is pending bytes + MultibyteCodec_State but only an integer can be returned. The approach I've taken is to encode this data into a long, similar to how .tell() encodes a "cookie_type" as a long.
https://bugs.python.org/issue33578
Diffstat (limited to 'Modules/cjkcodecs/_codecs_cn.c')
-rw-r--r-- | Modules/cjkcodecs/_codecs_cn.c | 38 |
1 files changed, 22 insertions, 16 deletions
diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c index 1fcc220..8a62f7e 100644 --- a/Modules/cjkcodecs/_codecs_cn.c +++ b/Modules/cjkcodecs/_codecs_cn.c @@ -52,6 +52,12 @@ } /* + * codecs in this file use the first byte of MultibyteCodec_State.c[8] + * to store a 0 or 1 state value + */ +#define CN_STATE_OFFSET 0 + +/* * GB2312 codec */ @@ -329,15 +335,15 @@ DECODER(gb18030) ENCODER_INIT(hz) { - state->i = 0; + state->c[CN_STATE_OFFSET] = 0; return 0; } ENCODER_RESET(hz) { - if (state->i != 0) { + if (state->c[CN_STATE_OFFSET] != 0) { WRITEBYTE2('~', '}'); - state->i = 0; + state->c[CN_STATE_OFFSET] = 0; NEXT_OUT(2); } return 0; @@ -350,10 +356,10 @@ ENCODER(hz) DBCHAR code; if (c < 0x80) { - if (state->i) { + if (state->c[CN_STATE_OFFSET]) { WRITEBYTE2('~', '}'); NEXT_OUT(2); - state->i = 0; + state->c[CN_STATE_OFFSET] = 0; } WRITEBYTE1((unsigned char)c); NEXT(1, 1); @@ -375,10 +381,10 @@ ENCODER(hz) if (code & 0x8000) /* MSB set: GBK */ return 1; - if (state->i == 0) { + if (state->c[CN_STATE_OFFSET] == 0) { WRITEBYTE4('~', '{', code >> 8, code & 0xff); NEXT(1, 4); - state->i = 1; + state->c[CN_STATE_OFFSET] = 1; } else { WRITEBYTE2(code >> 8, code & 0xff); @@ -391,13 +397,13 @@ ENCODER(hz) DECODER_INIT(hz) { - state->i = 0; + state->c[CN_STATE_OFFSET] = 0; return 0; } DECODER_RESET(hz) { - state->i = 0; + state->c[CN_STATE_OFFSET] = 0; return 0; } @@ -411,14 +417,14 @@ DECODER(hz) unsigned char c2 = INBYTE2; REQUIRE_INBUF(2); - if (c2 == '~' && state->i == 0) + if (c2 == '~' && state->c[CN_STATE_OFFSET] == 0) OUTCHAR('~'); - else if (c2 == '{' && state->i == 0) - state->i = 1; /* set GB */ - else if (c2 == '\n' && state->i == 0) + else if (c2 == '{' && state->c[CN_STATE_OFFSET] == 0) + state->c[CN_STATE_OFFSET] = 1; /* set GB */ + else if (c2 == '\n' && state->c[CN_STATE_OFFSET] == 0) ; /* line-continuation */ - else if (c2 == '}' && state->i == 1) - state->i = 0; /* set ASCII */ + else if (c2 == '}' && state->c[CN_STATE_OFFSET] == 1) + state->c[CN_STATE_OFFSET] = 0; /* set ASCII */ else return 1; NEXT_IN(2); @@ -428,7 +434,7 @@ DECODER(hz) if (c & 0x80) return 1; - if (state->i == 0) { /* ASCII mode */ + if (state->c[CN_STATE_OFFSET] == 0) { /* ASCII mode */ OUTCHAR(c); NEXT_IN(1); } |