summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorXiang Zhang <angwerzx@126.com>2017-05-22 14:42:05 (GMT)
committerGitHub <noreply@github.com>2017-05-22 14:42:05 (GMT)
commit89a5e03244370f41ce9bed5cea38e0dd620edb73 (patch)
tree604c02dcee1283840d77863004af88397654ccfe
parent15033d145b77207cea82267aa2fe5f1c2b71d3bd (diff)
downloadcpython-89a5e03244370f41ce9bed5cea38e0dd620edb73.zip
cpython-89a5e03244370f41ce9bed5cea38e0dd620edb73.tar.gz
cpython-89a5e03244370f41ce9bed5cea38e0dd620edb73.tar.bz2
bpo-30003: Fix handling escape characters in HZ codec (#1556)
-rw-r--r--Lib/test/test_codecencodings_cn.py4
-rw-r--r--Misc/NEWS3
-rw-r--r--Modules/cjkcodecs/_codecs_cn.c25
3 files changed, 19 insertions, 13 deletions
diff --git a/Lib/test/test_codecencodings_cn.py b/Lib/test/test_codecencodings_cn.py
index c8a410c..2a45071 100644
--- a/Lib/test/test_codecencodings_cn.py
+++ b/Lib/test/test_codecencodings_cn.py
@@ -86,6 +86,10 @@ class Test_HZ(multibytecodec_support.TestBase, unittest.TestCase):
(b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'),
(b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'),
(b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"),
+ # issue 30003
+ ('ab~cd', 'strict', b'ab~~cd'), # escape ~
+ (b'~{Dc~~:C~}', 'strict', None), # ~~ only in ASCII mode
+ (b'~{Dc~\n:C~}', 'strict', None), # ~\n only in ASCII mode
)
if __name__ == "__main__":
diff --git a/Misc/NEWS b/Misc/NEWS
index 74f7922..5a7e377 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -334,6 +334,9 @@ Extension Modules
Library
-------
+- bpo-30003: Fix handling escape characters in HZ codec. Based on patch
+ by Ma Lin.
+
- bpo-30149: inspect.signature() now supports callables with
variable-argument parameters wrapped with partialmethod.
Patch by Dong-hee Na.
diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c
index bda175c..1fcc220 100644
--- a/Modules/cjkcodecs/_codecs_cn.c
+++ b/Modules/cjkcodecs/_codecs_cn.c
@@ -350,15 +350,17 @@ ENCODER(hz)
DBCHAR code;
if (c < 0x80) {
- if (state->i == 0) {
- WRITEBYTE1((unsigned char)c);
- NEXT(1, 1);
- }
- else {
- WRITEBYTE3('~', '}', (unsigned char)c);
- NEXT(1, 3);
+ if (state->i) {
+ WRITEBYTE2('~', '}');
+ NEXT_OUT(2);
state->i = 0;
}
+ WRITEBYTE1((unsigned char)c);
+ NEXT(1, 1);
+ if (c == '~') {
+ WRITEBYTE1('~');
+ NEXT_OUT(1);
+ }
continue;
}
@@ -409,17 +411,14 @@ DECODER(hz)
unsigned char c2 = INBYTE2;
REQUIRE_INBUF(2);
- if (c2 == '~') {
+ if (c2 == '~' && state->i == 0)
OUTCHAR('~');
- NEXT_IN(2);
- continue;
- }
else if (c2 == '{' && state->i == 0)
state->i = 1; /* set GB */
+ else if (c2 == '\n' && state->i == 0)
+ ; /* line-continuation */
else if (c2 == '}' && state->i == 1)
state->i = 0; /* set ASCII */
- else if (c2 == '\n')
- ; /* line-continuation */
else
return 1;
NEXT_IN(2);