diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2009-05-29 16:22:26 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2009-05-29 16:22:26 (GMT) |
commit | 8ed91b2768137b35dd5cd45f25fa96ad53ba1066 (patch) | |
tree | 4686ac465f458492e3633b7ba2619d866426e7bc /Modules/python.c | |
parent | e23c8683a5e11307e4a3528826e0b84fa3fb5660 (diff) | |
download | cpython-8ed91b2768137b35dd5cd45f25fa96ad53ba1066.zip cpython-8ed91b2768137b35dd5cd45f25fa96ad53ba1066.tar.gz cpython-8ed91b2768137b35dd5cd45f25fa96ad53ba1066.tar.bz2 |
Issue #6097: Escape UTF-8 surrogates resulting from mbstocs conversion
of the command line.
Diffstat (limited to 'Modules/python.c')
-rw-r--r-- | Modules/python.c | 20 |
1 files changed, 18 insertions, 2 deletions
diff --git a/Modules/python.c b/Modules/python.c index 13c6d5b..edd33f4 100644 --- a/Modules/python.c +++ b/Modules/python.c @@ -38,8 +38,16 @@ char2wchar(char* arg) if (!res) goto oom; count = mbstowcs(res, arg, argsize+1); - if (count != (size_t)-1) - return res; + if (count != (size_t)-1) { + wchar_t *tmp; + /* Only use the result if it contains no + surrogate characters. */ + for (tmp = res; *tmp != 0 && + (*tmp < 0xd800 || *tmp > 0xdfff); tmp++) + ; + if (*tmp == 0) + return res; + } PyMem_Free(res); } /* Conversion failed. Fall back to escaping with surrogateescape. */ @@ -75,6 +83,14 @@ char2wchar(char* arg) memset(&mbs, 0, sizeof mbs); continue; } + if (*out >= 0xd800 && *out <= 0xdfff) { + /* Surrogate character. Escape the original + byte sequence with surrogateescape. */ + argsize -= converted; + while (converted--) + *out++ = 0xdc00 + *in++; + continue; + } /* successfully converted some bytes */ in += converted; argsize -= converted; |