summaryrefslogtreecommitdiffstats
path: root/Modules/cjkcodecs/_codecs_jp.c
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2011-07-07 23:45:13 (GMT)
committerVictor Stinner <victor.stinner@haypocalc.com>2011-07-07 23:45:13 (GMT)
commit2cded9c3f31d2fea4b033f44eaa828e508f03391 (patch)
tree1554d9f0baa575b7ae791ff1267c4e493a1b36bf /Modules/cjkcodecs/_codecs_jp.c
parent081fe46ff96bccb1a256c356443b625b467814c8 (diff)
downloadcpython-2cded9c3f31d2fea4b033f44eaa828e508f03391.zip
cpython-2cded9c3f31d2fea4b033f44eaa828e508f03391.tar.gz
cpython-2cded9c3f31d2fea4b033f44eaa828e508f03391.tar.bz2
Issue #12016: Multibyte CJK decoders now resynchronize faster
They only ignore the first byte of an invalid byte sequence. For example, b'\xff\n'.decode('gb2312', 'replace') gives '\ufffd\n' instead of '\ufffd'.
Diffstat (limited to 'Modules/cjkcodecs/_codecs_jp.c')
-rw-r--r--Modules/cjkcodecs/_codecs_jp.c34
1 files changed, 17 insertions, 17 deletions
diff --git a/Modules/cjkcodecs/_codecs_jp.c b/Modules/cjkcodecs/_codecs_jp.c
index a05e01b..a500696 100644
--- a/Modules/cjkcodecs/_codecs_jp.c
+++ b/Modules/cjkcodecs/_codecs_jp.c
@@ -112,7 +112,7 @@ DECODER(cp932)
TRYMAP_DEC(cp932ext, **outbuf, c, c2);
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
- return 2;
+ return 1;
c = (c < 0xe0 ? c - 0x81 : c - 0xc1);
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
@@ -120,7 +120,7 @@ DECODER(cp932)
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
TRYMAP_DEC(jisx0208, **outbuf, c, c2);
- else return 2;
+ else return 1;
}
else if (c >= 0xf0 && c <= 0xf9) {
if ((c2 >= 0x40 && c2 <= 0x7e) ||
@@ -128,10 +128,10 @@ DECODER(cp932)
OUT1(0xe000 + 188 * (c - 0xf0) +
(c2 < 0x80 ? c2 - 0x40 : c2 - 0x41))
else
- return 2;
+ return 1;
}
else
- return 2;
+ return 1;
NEXT(2, 1)
}
@@ -256,7 +256,7 @@ DECODER(euc_jis_2004)
NEXT(2, 1)
}
else
- return 2;
+ return 1;
}
else if (c == 0x8f) {
unsigned char c2, c3;
@@ -274,7 +274,7 @@ DECODER(euc_jis_2004)
continue;
}
else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ;
- else return 3;
+ else return 1;
NEXT(3, 1)
}
else {
@@ -300,7 +300,7 @@ DECODER(euc_jis_2004)
NEXT(2, 2)
continue;
}
- else return 2;
+ else return 1;
NEXT(2, 1)
}
}
@@ -388,7 +388,7 @@ DECODER(euc_jp)
NEXT(2, 1)
}
else
- return 2;
+ return 1;
}
else if (c == 0x8f) {
unsigned char c2, c3;
@@ -401,7 +401,7 @@ DECODER(euc_jp)
NEXT(3, 1)
}
else
- return 3;
+ return 1;
}
else {
unsigned char c2;
@@ -417,7 +417,7 @@ DECODER(euc_jp)
#endif
TRYMAP_DEC(jisx0208, **outbuf,
c ^ 0x80, c2 ^ 0x80) ;
- else return 2;
+ else return 1;
NEXT(2, 1)
}
}
@@ -502,7 +502,7 @@ DECODER(shift_jis)
REQUIRE_INBUF(2)
c2 = IN2;
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
- return 2;
+ return 1;
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
@@ -522,10 +522,10 @@ DECODER(shift_jis)
continue;
}
else
- return 2;
+ return 1;
}
else
- return 2;
+ return 1;
NEXT(1, 1) /* JIS X 0201 */
}
@@ -645,7 +645,7 @@ DECODER(shift_jis_2004)
REQUIRE_INBUF(2)
c2 = IN2;
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
- return 2;
+ return 1;
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
@@ -671,7 +671,7 @@ DECODER(shift_jis_2004)
NEXT_OUT(2)
}
else
- return 2;
+ return 1;
NEXT_IN(2)
}
else { /* Plane 2 */
@@ -689,13 +689,13 @@ DECODER(shift_jis_2004)
continue;
}
else
- return 2;
+ return 1;
NEXT(2, 1)
}
continue;
}
else
- return 2;
+ return 1;
NEXT(1, 1) /* JIS X 0201 */
}