summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorXiang Zhang <angwerzx@126.com>2017-05-22 17:03:00 (GMT)
committerGitHub <noreply@github.com>2017-05-22 17:03:00 (GMT)
commit54af41d42eebbe4c6afe6b34ebb0fb550de1e7ba (patch)
treee7d7577bba787044850a6fc708de2fca0c280a2d
parent0702cc01fa3dc52087a5d468fc5a4984f3697446 (diff)
downloadcpython-54af41d42eebbe4c6afe6b34ebb0fb550de1e7ba.zip
cpython-54af41d42eebbe4c6afe6b34ebb0fb550de1e7ba.tar.gz
cpython-54af41d42eebbe4c6afe6b34ebb0fb550de1e7ba.tar.bz2
bpo-30003: Fix handling escape characters in HZ codec (#1556) (#1719)
-rw-r--r--Lib/test/test_codecencodings_cn.py4
-rw-r--r--Misc/NEWS3
-rw-r--r--Modules/cjkcodecs/_codecs_cn.c25
3 files changed, 19 insertions, 13 deletions
diff --git a/Lib/test/test_codecencodings_cn.py b/Lib/test/test_codecencodings_cn.py
index c8a410c..2a45071 100644
--- a/Lib/test/test_codecencodings_cn.py
+++ b/Lib/test/test_codecencodings_cn.py
@@ -86,6 +86,10 @@ class Test_HZ(multibytecodec_support.TestBase, unittest.TestCase):
(b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'),
(b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'),
(b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"),
+ # issue 30003
+ ('ab~cd', 'strict', b'ab~~cd'), # escape ~
+ (b'~{Dc~~:C~}', 'strict', None), # ~~ only in ASCII mode
+ (b'~{Dc~\n:C~}', 'strict', None), # ~\n only in ASCII mode
)
if __name__ == "__main__":
diff --git a/Misc/NEWS b/Misc/NEWS
index d3c6868..ae15eef 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -43,6 +43,9 @@ Core and Builtins
Library
-------
+- bpo-30003: Fix handling escape characters in HZ codec. Based on patch
+ by Ma Lin.
+
- bpo-30301: Fix AttributeError when using SimpleQueue.empty() under
*spawn* and *forkserver* start methods.
diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c
index bda175c..1fcc220 100644
--- a/Modules/cjkcodecs/_codecs_cn.c
+++ b/Modules/cjkcodecs/_codecs_cn.c
@@ -350,15 +350,17 @@ ENCODER(hz)
DBCHAR code;
if (c < 0x80) {
- if (state->i == 0) {
- WRITEBYTE1((unsigned char)c);
- NEXT(1, 1);
- }
- else {
- WRITEBYTE3('~', '}', (unsigned char)c);
- NEXT(1, 3);
+ if (state->i) {
+ WRITEBYTE2('~', '}');
+ NEXT_OUT(2);
state->i = 0;
}
+ WRITEBYTE1((unsigned char)c);
+ NEXT(1, 1);
+ if (c == '~') {
+ WRITEBYTE1('~');
+ NEXT_OUT(1);
+ }
continue;
}
@@ -409,17 +411,14 @@ DECODER(hz)
unsigned char c2 = INBYTE2;
REQUIRE_INBUF(2);
- if (c2 == '~') {
+ if (c2 == '~' && state->i == 0)
OUTCHAR('~');
- NEXT_IN(2);
- continue;
- }
else if (c2 == '{' && state->i == 0)
state->i = 1; /* set GB */
+ else if (c2 == '\n' && state->i == 0)
+ ; /* line-continuation */
else if (c2 == '}' && state->i == 1)
state->i = 0; /* set ASCII */
- else if (c2 == '\n')
- ; /* line-continuation */
else
return 1;
NEXT_IN(2);