diff options
author | Victor Stinner <victor.stinner@haypocalc.com> | 2011-07-07 23:45:13 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@haypocalc.com> | 2011-07-07 23:45:13 (GMT) |
commit | 2cded9c3f31d2fea4b033f44eaa828e508f03391 (patch) | |
tree | 1554d9f0baa575b7ae791ff1267c4e493a1b36bf /Modules/cjkcodecs/_codecs_jp.c | |
parent | 081fe46ff96bccb1a256c356443b625b467814c8 (diff) | |
download | cpython-2cded9c3f31d2fea4b033f44eaa828e508f03391.zip cpython-2cded9c3f31d2fea4b033f44eaa828e508f03391.tar.gz cpython-2cded9c3f31d2fea4b033f44eaa828e508f03391.tar.bz2 |
Issue #12016: Multibyte CJK decoders now resynchronize faster
They only ignore the first byte of an invalid byte sequence.
For example, b'\xff\n'.decode('gb2312', 'replace') gives '\ufffd\n' instead of
'\ufffd'.
Diffstat (limited to 'Modules/cjkcodecs/_codecs_jp.c')
-rw-r--r-- | Modules/cjkcodecs/_codecs_jp.c | 34 |
1 files changed, 17 insertions, 17 deletions
diff --git a/Modules/cjkcodecs/_codecs_jp.c b/Modules/cjkcodecs/_codecs_jp.c index a05e01b..a500696 100644 --- a/Modules/cjkcodecs/_codecs_jp.c +++ b/Modules/cjkcodecs/_codecs_jp.c @@ -112,7 +112,7 @@ DECODER(cp932) TRYMAP_DEC(cp932ext, **outbuf, c, c2); else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){ if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) - return 2; + return 1; c = (c < 0xe0 ? c - 0x81 : c - 0xc1); c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); @@ -120,7 +120,7 @@ DECODER(cp932) c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21; TRYMAP_DEC(jisx0208, **outbuf, c, c2); - else return 2; + else return 1; } else if (c >= 0xf0 && c <= 0xf9) { if ((c2 >= 0x40 && c2 <= 0x7e) || @@ -128,10 +128,10 @@ DECODER(cp932) OUT1(0xe000 + 188 * (c - 0xf0) + (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41)) else - return 2; + return 1; } else - return 2; + return 1; NEXT(2, 1) } @@ -256,7 +256,7 @@ DECODER(euc_jis_2004) NEXT(2, 1) } else - return 2; + return 1; } else if (c == 0x8f) { unsigned char c2, c3; @@ -274,7 +274,7 @@ DECODER(euc_jis_2004) continue; } else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ; - else return 3; + else return 1; NEXT(3, 1) } else { @@ -300,7 +300,7 @@ DECODER(euc_jis_2004) NEXT(2, 2) continue; } - else return 2; + else return 1; NEXT(2, 1) } } @@ -388,7 +388,7 @@ DECODER(euc_jp) NEXT(2, 1) } else - return 2; + return 1; } else if (c == 0x8f) { unsigned char c2, c3; @@ -401,7 +401,7 @@ DECODER(euc_jp) NEXT(3, 1) } else - return 3; + return 1; } else { unsigned char c2; @@ -417,7 +417,7 @@ DECODER(euc_jp) #endif TRYMAP_DEC(jisx0208, **outbuf, c ^ 0x80, c2 ^ 0x80) ; - else return 2; + else return 1; NEXT(2, 1) } } @@ -502,7 +502,7 @@ DECODER(shift_jis) REQUIRE_INBUF(2) c2 = IN2; if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) - return 2; + return 1; c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1); c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); @@ -522,10 +522,10 @@ DECODER(shift_jis) continue; } else - return 2; + return 1; } else - return 2; + return 1; NEXT(1, 1) /* JIS X 0201 */ } @@ -645,7 +645,7 @@ DECODER(shift_jis_2004) REQUIRE_INBUF(2) c2 = IN2; if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) - return 2; + return 1; c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1); c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); @@ -671,7 +671,7 @@ DECODER(shift_jis_2004) NEXT_OUT(2) } else - return 2; + return 1; NEXT_IN(2) } else { /* Plane 2 */ @@ -689,13 +689,13 @@ DECODER(shift_jis_2004) continue; } else - return 2; + return 1; NEXT(2, 1) } continue; } else - return 2; + return 1; NEXT(1, 1) /* JIS X 0201 */ } |