summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2009-05-29 16:22:26 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2009-05-29 16:22:26 (GMT)
commit8ed91b2768137b35dd5cd45f25fa96ad53ba1066 (patch)
tree4686ac465f458492e3633b7ba2619d866426e7bc
parente23c8683a5e11307e4a3528826e0b84fa3fb5660 (diff)
downloadcpython-8ed91b2768137b35dd5cd45f25fa96ad53ba1066.zip
cpython-8ed91b2768137b35dd5cd45f25fa96ad53ba1066.tar.gz
cpython-8ed91b2768137b35dd5cd45f25fa96ad53ba1066.tar.bz2
Issue #6097: Escape UTF-8 surrogates resulting from mbstocs conversion
of the command line.
-rw-r--r--Misc/NEWS3
-rw-r--r--Modules/python.c20
2 files changed, 21 insertions, 2 deletions
diff --git a/Misc/NEWS b/Misc/NEWS
index b98a368..1e12773 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,9 @@ What's New in Python 3.1 release candidate 1?
Core and Builtins
-----------------
+- Issue #6097: Escape UTF-8 surrogates resulting from mbstocs conversion
+ of the command line.
+
- Issue #6012: Add cleanup support to O& argument parsing.
- Issue #6089: Fixed str.format with certain invalid field specifiers
diff --git a/Modules/python.c b/Modules/python.c
index 13c6d5b..edd33f4 100644
--- a/Modules/python.c
+++ b/Modules/python.c
@@ -38,8 +38,16 @@ char2wchar(char* arg)
if (!res)
goto oom;
count = mbstowcs(res, arg, argsize+1);
- if (count != (size_t)-1)
- return res;
+ if (count != (size_t)-1) {
+ wchar_t *tmp;
+ /* Only use the result if it contains no
+ surrogate characters. */
+ for (tmp = res; *tmp != 0 &&
+ (*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
+ ;
+ if (*tmp == 0)
+ return res;
+ }
PyMem_Free(res);
}
/* Conversion failed. Fall back to escaping with surrogateescape. */
@@ -75,6 +83,14 @@ char2wchar(char* arg)
memset(&mbs, 0, sizeof mbs);
continue;
}
+ if (*out >= 0xd800 && *out <= 0xdfff) {
+ /* Surrogate character. Escape the original
+ byte sequence with surrogateescape. */
+ argsize -= converted;
+ while (converted--)
+ *out++ = 0xdc00 + *in++;
+ continue;
+ }
/* successfully converted some bytes */
in += converted;
argsize -= converted;