diff options
-rw-r--r-- | Lib/test/test_codecs.py | 9 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Windows/2019-03-16-16-51-17.bpo-36312.Niwm-T.rst | 2 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 21 |
3 files changed, 27 insertions, 5 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index e8c7d76..3314493 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -3066,6 +3066,15 @@ class CodePageTest(unittest.TestCase): ('[\U0010ffff\uDC80]', 'replace', b'[\xf4\x8f\xbf\xbf?]'), )) + def test_code_page_decode_flags(self): + # Issue #36312: For some code pages (e.g. UTF-7) flags for + # MultiByteToWideChar() must be set to 0. + for cp in (50220, 50221, 50222, 50225, 50227, 50229, + *range(57002, 57011+1), 65000): + self.assertEqual(codecs.code_page_decode(cp, b'abc'), ('abc', 3)) + self.assertEqual(codecs.code_page_decode(42, b'abc'), + ('\uf061\uf062\uf063', 3)) + def test_incremental(self): decoded = codecs.code_page_decode(932, b'\x82', 'strict', False) self.assertEqual(decoded, ('', 0)) diff --git a/Misc/NEWS.d/next/Windows/2019-03-16-16-51-17.bpo-36312.Niwm-T.rst b/Misc/NEWS.d/next/Windows/2019-03-16-16-51-17.bpo-36312.Niwm-T.rst new file mode 100644 index 0000000..8b325db --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2019-03-16-16-51-17.bpo-36312.Niwm-T.rst @@ -0,0 +1,2 @@ +Fixed decoders for the following code pages: 50220, 50221, 50222, 50225, +50227, 50229, 57002 through 57011, 65000 and 42. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 6e83ed6..8ab3943 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -7083,15 +7083,21 @@ decode_code_page_strict(UINT code_page, const char *in, int insize) { - const DWORD flags = decode_code_page_flags(code_page); + DWORD flags = MB_ERR_INVALID_CHARS; wchar_t *out; DWORD outsize; /* First get the size of the result */ assert(insize > 0); - outsize = MultiByteToWideChar(code_page, flags, in, insize, NULL, 0); - if (outsize <= 0) - goto error; + while ((outsize = MultiByteToWideChar(code_page, flags, + in, insize, NULL, 0)) <= 0) + { + if (!flags || GetLastError() != ERROR_INVALID_FLAGS) { + goto error; + } + /* For some code pages (e.g. UTF-7) flags must be set to 0. */ + flags = 0; + } /* Extend a wchar_t* buffer */ Py_ssize_t n = *bufsize; /* Get the current length */ @@ -7129,7 +7135,7 @@ decode_code_page_errors(UINT code_page, { const char *startin = in; const char *endin = in + size; - const DWORD flags = decode_code_page_flags(code_page); + DWORD flags = MB_ERR_INVALID_CHARS; /* Ideally, we should get reason from FormatMessage. This is the Windows 2000 English version of the message. */ const char *reason = "No mapping for the Unicode character exists " @@ -7187,6 +7193,11 @@ decode_code_page_errors(UINT code_page, if (outsize > 0) break; err = GetLastError(); + if (err == ERROR_INVALID_FLAGS && flags) { + /* For some code pages (e.g. UTF-7) flags must be set to 0. */ + flags = 0; + continue; + } if (err != ERROR_NO_UNICODE_TRANSLATION && err != ERROR_INSUFFICIENT_BUFFER) { |