summaryrefslogtreecommitdiffstats
path: root/Modules/python.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/python.c')
-rw-r--r--Modules/python.c113
1 files changed, 89 insertions, 24 deletions
diff --git a/Modules/python.c b/Modules/python.c
index f6da86f..4c0a55b 100644
--- a/Modules/python.c
+++ b/Modules/python.c
@@ -14,6 +14,93 @@ wmain(int argc, wchar_t **argv)
return Py_Main(argc, argv);
}
#else
+static wchar_t*
+char2wchar(char* arg)
+{
+ wchar_t *res;
+#ifdef HAVE_BROKEN_MBSTOWCS
+ /* Some platforms have a broken implementation of
+ * mbstowcs which does not count the characters that
+ * would result from conversion. Use an upper bound.
+ */
+ size_t argsize = strlen(arg);
+#else
+ size_t argsize = mbstowcs(NULL, arg, 0);
+#endif
+ size_t count;
+ unsigned char *in;
+ wchar_t *out;
+#ifdef HAVE_MBRTOWC
+ mbstate_t mbs;
+#endif
+ if (argsize != (size_t)-1) {
+ res = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
+ if (!res)
+ goto oom;
+ count = mbstowcs(res, arg, argsize+1);
+ if (count != (size_t)-1)
+ return res;
+ PyMem_Free(res);
+ }
+ /* Conversion failed. Fall back to escaping with utf8b. */
+#ifdef HAVE_MBRTOWC
+ /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
+
+ /* Overallocate; as multi-byte characters are in the argument, the
+ actual output could use less memory. */
+ argsize = strlen(arg) + 1;
+ res = PyMem_Malloc(argsize*sizeof(wchar_t));
+ if (!res) goto oom;
+ in = (unsigned char*)arg;
+ out = res;
+ memset(&mbs, 0, sizeof mbs);
+ while (argsize) {
+ size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
+ if (converted == 0)
+ /* Reached end of string; null char stored. */
+ break;
+ if (converted == (size_t)-2) {
+ /* Incomplete character. This should never happen,
+ since we provide everything that we have -
+ unless there is a bug in the C library, or I
+ misunderstood how mbrtowc works. */
+ fprintf(stderr, "unexpected mbrtowc result -2\n");
+ return NULL;
+ }
+ if (converted == (size_t)-1) {
+ /* Conversion error. Escape as UTF-8b, and start over
+ in the initial shift state. */
+ *out++ = 0xdc00 + *in++;
+ argsize--;
+ memset(&mbs, 0, sizeof mbs);
+ continue;
+ }
+ /* successfully converted some bytes */
+ in += converted;
+ argsize -= converted;
+ out++;
+ }
+#else
+ /* Cannot use C locale for escaping; manually escape as if charset
+ is ASCII (i.e. escape all bytes > 128. This will still roundtrip
+ correctly in the locale's charset, which must be an ASCII superset. */
+ res = PyMem_Malloc((strlen(arg)+1)*sizeof(wchar_t));
+ if (!res) goto oom;
+ in = (unsigned char*)arg;
+ out = res;
+ while(*in)
+ if(*in < 128)
+ *out++ = *in++;
+ else
+ *out++ = 0xdc00 + *in++;
+ *out = 0;
+#endif
+ return res;
+oom:
+ fprintf(stderr, "out of memory\n");
+ return NULL;
+}
+
int
main(int argc, char **argv)
{
@@ -40,31 +127,9 @@ main(int argc, char **argv)
oldloc = strdup(setlocale(LC_ALL, NULL));
setlocale(LC_ALL, "");
for (i = 0; i < argc; i++) {
-#ifdef HAVE_BROKEN_MBSTOWCS
- /* Some platforms have a broken implementation of
- * mbstowcs which does not count the characters that
- * would result from conversion. Use an upper bound.
- */
- size_t argsize = strlen(argv[i]);
-#else
- size_t argsize = mbstowcs(NULL, argv[i], 0);
-#endif
- size_t count;
- if (argsize == (size_t)-1) {
- fprintf(stderr, "Could not convert argument %d to string\n", i);
+ argv_copy2[i] = argv_copy[i] = char2wchar(argv[i]);
+ if (!argv_copy[i])
return 1;
- }
- argv_copy[i] = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
- argv_copy2[i] = argv_copy[i];
- if (!argv_copy[i]) {
- fprintf(stderr, "out of memory\n");
- return 1;
- }
- count = mbstowcs(argv_copy[i], argv[i], argsize+1);
- if (count == (size_t)-1) {
- fprintf(stderr, "Could not convert argument %d to string\n", i);
- return 1;
- }
}
setlocale(LC_ALL, oldloc);
free(oldloc);