summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRonald Oussoren <ronaldoussoren@mac.com>2010-04-18 14:46:12 (GMT)
committerRonald Oussoren <ronaldoussoren@mac.com>2010-04-18 14:46:12 (GMT)
commitd61deca27cdef2d1e49aeb72d8556855420a77e1 (patch)
treefa554317563eb6c130e7b6655453a787ead740cf
parentceb513ab131aee1059f8bdc9cf9260bb306ef0e9 (diff)
downloadcpython-d61deca27cdef2d1e49aeb72d8556855420a77e1.zip
cpython-d61deca27cdef2d1e49aeb72d8556855420a77e1.tar.gz
cpython-d61deca27cdef2d1e49aeb72d8556855420a77e1.tar.bz2
Move _Py_char2wchar from python.c to main.c.
This fixes issue #8441: python.c is not included in the framework while main.c is and without this patch you get a link error when building Python.framework on OSX.
-rw-r--r--Include/Python.h2
-rw-r--r--Modules/main.c104
-rw-r--r--Modules/python.c102
3 files changed, 105 insertions, 103 deletions
diff --git a/Include/Python.h b/Include/Python.h
index 9b26d16..8b038ac 100644
--- a/Include/Python.h
+++ b/Include/Python.h
@@ -126,7 +126,7 @@ extern "C" {
/* _Py_Mangle is defined in compile.c */
PyAPI_FUNC(PyObject*) _Py_Mangle(PyObject *p, PyObject *name);
-/* _Py_char2wchar lives in python.c */
+/* _Py_char2wchar lives in main.c */
PyAPI_FUNC(wchar_t *) _Py_char2wchar(char *);
#ifdef __cplusplus
}
diff --git a/Modules/main.c b/Modules/main.c
index fa2002f..24c36db 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -706,6 +706,110 @@ Py_GetArgcArgv(int *argc, wchar_t ***argv)
*argv = orig_argv;
}
+
+wchar_t*
+_Py_char2wchar(char* arg)
+{
+ wchar_t *res;
+#ifdef HAVE_BROKEN_MBSTOWCS
+ /* Some platforms have a broken implementation of
+ * mbstowcs which does not count the characters that
+ * would result from conversion. Use an upper bound.
+ */
+ size_t argsize = strlen(arg);
+#else
+ size_t argsize = mbstowcs(NULL, arg, 0);
+#endif
+ size_t count;
+ unsigned char *in;
+ wchar_t *out;
+#ifdef HAVE_MBRTOWC
+ mbstate_t mbs;
+#endif
+ if (argsize != (size_t)-1) {
+ res = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
+ if (!res)
+ goto oom;
+ count = mbstowcs(res, arg, argsize+1);
+ if (count != (size_t)-1) {
+ wchar_t *tmp;
+ /* Only use the result if it contains no
+ surrogate characters. */
+ for (tmp = res; *tmp != 0 &&
+ (*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
+ ;
+ if (*tmp == 0)
+ return res;
+ }
+ PyMem_Free(res);
+ }
+ /* Conversion failed. Fall back to escaping with surrogateescape. */
+#ifdef HAVE_MBRTOWC
+ /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
+
+ /* Overallocate; as multi-byte characters are in the argument, the
+ actual output could use less memory. */
+ argsize = strlen(arg) + 1;
+ res = (wchar_t*)PyMem_Malloc(argsize*sizeof(wchar_t));
+ if (!res) goto oom;
+ in = (unsigned char*)arg;
+ out = res;
+ memset(&mbs, 0, sizeof mbs);
+ while (argsize) {
+ size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
+ if (converted == 0)
+ /* Reached end of string; null char stored. */
+ break;
+ if (converted == (size_t)-2) {
+ /* Incomplete character. This should never happen,
+ since we provide everything that we have -
+ unless there is a bug in the C library, or I
+ misunderstood how mbrtowc works. */
+ fprintf(stderr, "unexpected mbrtowc result -2\n");
+ return NULL;
+ }
+ if (converted == (size_t)-1) {
+ /* Conversion error. Escape as UTF-8b, and start over
+ in the initial shift state. */
+ *out++ = 0xdc00 + *in++;
+ argsize--;
+ memset(&mbs, 0, sizeof mbs);
+ continue;
+ }
+ if (*out >= 0xd800 && *out <= 0xdfff) {
+ /* Surrogate character. Escape the original
+ byte sequence with surrogateescape. */
+ argsize -= converted;
+ while (converted--)
+ *out++ = 0xdc00 + *in++;
+ continue;
+ }
+ /* successfully converted some bytes */
+ in += converted;
+ argsize -= converted;
+ out++;
+ }
+#else
+ /* Cannot use C locale for escaping; manually escape as if charset
+ is ASCII (i.e. escape all bytes > 128. This will still roundtrip
+ correctly in the locale's charset, which must be an ASCII superset. */
+ res = PyMem_Malloc((strlen(arg)+1)*sizeof(wchar_t));
+ if (!res) goto oom;
+ in = (unsigned char*)arg;
+ out = res;
+ while(*in)
+ if(*in < 128)
+ *out++ = *in++;
+ else
+ *out++ = 0xdc00 + *in++;
+ *out = 0;
+#endif
+ return res;
+oom:
+ fprintf(stderr, "out of memory\n");
+ return NULL;
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/Modules/python.c b/Modules/python.c
index f5d3870..03c3e2f 100644
--- a/Modules/python.c
+++ b/Modules/python.c
@@ -14,108 +14,6 @@ wmain(int argc, wchar_t **argv)
return Py_Main(argc, argv);
}
#else
-wchar_t*
-_Py_char2wchar(char* arg)
-{
- wchar_t *res;
-#ifdef HAVE_BROKEN_MBSTOWCS
- /* Some platforms have a broken implementation of
- * mbstowcs which does not count the characters that
- * would result from conversion. Use an upper bound.
- */
- size_t argsize = strlen(arg);
-#else
- size_t argsize = mbstowcs(NULL, arg, 0);
-#endif
- size_t count;
- unsigned char *in;
- wchar_t *out;
-#ifdef HAVE_MBRTOWC
- mbstate_t mbs;
-#endif
- if (argsize != (size_t)-1) {
- res = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
- if (!res)
- goto oom;
- count = mbstowcs(res, arg, argsize+1);
- if (count != (size_t)-1) {
- wchar_t *tmp;
- /* Only use the result if it contains no
- surrogate characters. */
- for (tmp = res; *tmp != 0 &&
- (*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
- ;
- if (*tmp == 0)
- return res;
- }
- PyMem_Free(res);
- }
- /* Conversion failed. Fall back to escaping with surrogateescape. */
-#ifdef HAVE_MBRTOWC
- /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
-
- /* Overallocate; as multi-byte characters are in the argument, the
- actual output could use less memory. */
- argsize = strlen(arg) + 1;
- res = (wchar_t*)PyMem_Malloc(argsize*sizeof(wchar_t));
- if (!res) goto oom;
- in = (unsigned char*)arg;
- out = res;
- memset(&mbs, 0, sizeof mbs);
- while (argsize) {
- size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
- if (converted == 0)
- /* Reached end of string; null char stored. */
- break;
- if (converted == (size_t)-2) {
- /* Incomplete character. This should never happen,
- since we provide everything that we have -
- unless there is a bug in the C library, or I
- misunderstood how mbrtowc works. */
- fprintf(stderr, "unexpected mbrtowc result -2\n");
- return NULL;
- }
- if (converted == (size_t)-1) {
- /* Conversion error. Escape as UTF-8b, and start over
- in the initial shift state. */
- *out++ = 0xdc00 + *in++;
- argsize--;
- memset(&mbs, 0, sizeof mbs);
- continue;
- }
- if (*out >= 0xd800 && *out <= 0xdfff) {
- /* Surrogate character. Escape the original
- byte sequence with surrogateescape. */
- argsize -= converted;
- while (converted--)
- *out++ = 0xdc00 + *in++;
- continue;
- }
- /* successfully converted some bytes */
- in += converted;
- argsize -= converted;
- out++;
- }
-#else
- /* Cannot use C locale for escaping; manually escape as if charset
- is ASCII (i.e. escape all bytes > 128. This will still roundtrip
- correctly in the locale's charset, which must be an ASCII superset. */
- res = PyMem_Malloc((strlen(arg)+1)*sizeof(wchar_t));
- if (!res) goto oom;
- in = (unsigned char*)arg;
- out = res;
- while(*in)
- if(*in < 128)
- *out++ = *in++;
- else
- *out++ = 0xdc00 + *in++;
- *out = 0;
-#endif
- return res;
-oom:
- fprintf(stderr, "out of memory\n");
- return NULL;
-}
int
main(int argc, char **argv)