diff options
author | Victor Stinner <victor.stinner@haypocalc.com> | 2011-07-07 23:45:13 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@haypocalc.com> | 2011-07-07 23:45:13 (GMT) |
commit | 2cded9c3f31d2fea4b033f44eaa828e508f03391 (patch) | |
tree | 1554d9f0baa575b7ae791ff1267c4e493a1b36bf /Modules/cjkcodecs/_codecs_kr.c | |
parent | 081fe46ff96bccb1a256c356443b625b467814c8 (diff) | |
download | cpython-2cded9c3f31d2fea4b033f44eaa828e508f03391.zip cpython-2cded9c3f31d2fea4b033f44eaa828e508f03391.tar.gz cpython-2cded9c3f31d2fea4b033f44eaa828e508f03391.tar.bz2 |
Issue #12016: Multibyte CJK decoders now resynchronize faster
They only ignore the first byte of an invalid byte sequence.
For example, b'\xff\n'.decode('gb2312', 'replace') gives '\ufffd\n' instead of
'\ufffd'.
Diffstat (limited to 'Modules/cjkcodecs/_codecs_kr.c')
-rw-r--r-- | Modules/cjkcodecs/_codecs_kr.c | 18 |
1 files changed, 9 insertions, 9 deletions
diff --git a/Modules/cjkcodecs/_codecs_kr.c b/Modules/cjkcodecs/_codecs_kr.c index 9272e36..f5697dd 100644 --- a/Modules/cjkcodecs/_codecs_kr.c +++ b/Modules/cjkcodecs/_codecs_kr.c @@ -123,7 +123,7 @@ DECODER(euc_kr) if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE || (*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE || (*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE) - return 8; + return 1; c = (*inbuf)[3]; if (0xa1 <= c && c <= 0xbe) @@ -143,7 +143,7 @@ DECODER(euc_kr) jong = NONE; if (cho == NONE || jung == NONE || jong == NONE) - return 8; + return 1; OUT1(0xac00 + cho*588 + jung*28 + jong); NEXT(8, 1) @@ -152,7 +152,7 @@ DECODER(euc_kr) NEXT(2, 1) } else - return 2; + return 1; } return 0; @@ -208,7 +208,7 @@ DECODER(cp949) REQUIRE_INBUF(2) TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80); else TRYMAP_DEC(cp949ext, **outbuf, c, IN2); - else return 2; + else return 1; NEXT(2, 1) } @@ -375,7 +375,7 @@ DECODER(johab) i_jong = johabidx_jongseong[c_jong]; if (i_cho == NONE || i_jung == NONE || i_jong == NONE) - return 2; + return 1; /* we don't use U+1100 hangul jamo yet. */ if (i_cho == FILL) { @@ -391,7 +391,7 @@ DECODER(johab) OUT1(0x3100 | johabjamo_jungseong[c_jung]) else - return 2; + return 1; } } else { if (i_jung == FILL) { @@ -399,7 +399,7 @@ DECODER(johab) OUT1(0x3100 | johabjamo_choseong[c_cho]) else - return 2; + return 1; } else OUT1(0xac00 + @@ -414,7 +414,7 @@ DECODER(johab) c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) || (c2 & 0x7f) == 0x7f || (c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3))) - return 2; + return 1; else { unsigned char t1, t2; @@ -425,7 +425,7 @@ DECODER(johab) t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21; TRYMAP_DEC(ksx1001, **outbuf, t1, t2); - else return 2; + else return 1; NEXT(2, 1) } } |