summaryrefslogtreecommitdiffstats
path: root/Modules/main.c
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2010-10-07 21:45:39 (GMT)
committerVictor Stinner <victor.stinner@haypocalc.com>2010-10-07 21:45:39 (GMT)
commit4e31443c4d2c1fb211a6ea90fc6a8fbd9ff81c97 (patch)
tree5fb0a5fc704c00fcdd2b9885ac0896ab2f971309 /Modules/main.c
parent7ae7c87b058137537bdc2b7f1d8e585aa0245c1c (diff)
downloadcpython-4e31443c4d2c1fb211a6ea90fc6a8fbd9ff81c97.zip
cpython-4e31443c4d2c1fb211a6ea90fc6a8fbd9ff81c97.tar.gz
cpython-4e31443c4d2c1fb211a6ea90fc6a8fbd9ff81c97.tar.bz2
Create fileutils.c/.h
* _Py_fopen() and _Py_stat() come from Python/import.c * (_Py)_wrealpath() comes from Python/sysmodule.c * _Py_char2wchar(), _Py_wchar2char() and _Py_wfopen() come from Modules/main.c * (_Py)_wstat(), (_Py)_wgetcwd(), _Py_wreadlink() come from Modules/getpath.c
Diffstat (limited to 'Modules/main.c')
-rw-r--r--Modules/main.c208
1 files changed, 0 insertions, 208 deletions
diff --git a/Modules/main.c b/Modules/main.c
index 5e9f82a..4a10d93 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -104,31 +104,6 @@ static char *usage_5 =
#endif
;
-FILE *
-_Py_wfopen(const wchar_t *path, const wchar_t *mode)
-{
-#ifndef MS_WINDOWS
- FILE *f;
- char *cpath;
- char cmode[10];
- size_t r;
- r = wcstombs(cmode, mode, 10);
- if (r == (size_t)-1 || r >= 10) {
- errno = EINVAL;
- return NULL;
- }
- cpath = _Py_wchar2char(path);
- if (cpath == NULL)
- return NULL;
- f = fopen(cpath, cmode);
- PyMem_Free(cpath);
- return f;
-#else
- return _wfopen(path, mode);
-#endif
-}
-
-
static int
usage(int exitcode, wchar_t* program)
{
@@ -756,189 +731,6 @@ Py_GetArgcArgv(int *argc, wchar_t ***argv)
*argv = orig_argv;
}
-
-/* Encode a (wide) character string to the locale encoding with the
- surrogateescape error handler (characters in range U+DC80..U+DCFF are
- converted to bytes 0x80..0xFF).
-
- This function is the reverse of _Py_char2wchar().
-
- Return a pointer to a newly allocated byte string (use PyMem_Free() to free
- the memory), or NULL on error (conversion error or memory error). */
-char*
-_Py_wchar2char(const wchar_t *text)
-{
- const size_t len = wcslen(text);
- char *result = NULL, *bytes = NULL;
- size_t i, size, converted;
- wchar_t c, buf[2];
-
- /* The function works in two steps:
- 1. compute the length of the output buffer in bytes (size)
- 2. outputs the bytes */
- size = 0;
- buf[1] = 0;
- while (1) {
- for (i=0; i < len; i++) {
- c = text[i];
- if (c >= 0xdc80 && c <= 0xdcff) {
- /* UTF-8b surrogate */
- if (bytes != NULL) {
- *bytes++ = c - 0xdc00;
- size--;
- }
- else
- size++;
- continue;
- }
- else {
- buf[0] = c;
- if (bytes != NULL)
- converted = wcstombs(bytes, buf, size);
- else
- converted = wcstombs(NULL, buf, 0);
- if (converted == (size_t)-1) {
- if (result != NULL)
- PyMem_Free(result);
- return NULL;
- }
- if (bytes != NULL) {
- bytes += converted;
- size -= converted;
- }
- else
- size += converted;
- }
- }
- if (result != NULL) {
- *bytes = 0;
- break;
- }
-
- size += 1; /* nul byte at the end */
- result = PyMem_Malloc(size);
- if (result == NULL)
- return NULL;
- bytes = result;
- }
- return result;
-}
-
-
-/* Decode a byte string from the locale encoding with the
- surrogateescape error handler (undecodable bytes are decoded as characters
- in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
- character, escape the bytes using the surrogateescape error handler instead
- of decoding them.
-
- Use _Py_wchar2char() to encode the character string back to a byte string.
-
- Return a pointer to a newly allocated (wide) character string (use
- PyMem_Free() to free the memory), or NULL on error (conversion error or
- memory error). */
-wchar_t*
-_Py_char2wchar(char* arg)
-{
- wchar_t *res;
-#ifdef HAVE_BROKEN_MBSTOWCS
- /* Some platforms have a broken implementation of
- * mbstowcs which does not count the characters that
- * would result from conversion. Use an upper bound.
- */
- size_t argsize = strlen(arg);
-#else
- size_t argsize = mbstowcs(NULL, arg, 0);
-#endif
- size_t count;
- unsigned char *in;
- wchar_t *out;
-#ifdef HAVE_MBRTOWC
- mbstate_t mbs;
-#endif
- if (argsize != (size_t)-1) {
- res = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
- if (!res)
- goto oom;
- count = mbstowcs(res, arg, argsize+1);
- if (count != (size_t)-1) {
- wchar_t *tmp;
- /* Only use the result if it contains no
- surrogate characters. */
- for (tmp = res; *tmp != 0 &&
- (*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
- ;
- if (*tmp == 0)
- return res;
- }
- PyMem_Free(res);
- }
- /* Conversion failed. Fall back to escaping with surrogateescape. */
-#ifdef HAVE_MBRTOWC
- /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
-
- /* Overallocate; as multi-byte characters are in the argument, the
- actual output could use less memory. */
- argsize = strlen(arg) + 1;
- res = (wchar_t*)PyMem_Malloc(argsize*sizeof(wchar_t));
- if (!res) goto oom;
- in = (unsigned char*)arg;
- out = res;
- memset(&mbs, 0, sizeof mbs);
- while (argsize) {
- size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
- if (converted == 0)
- /* Reached end of string; null char stored. */
- break;
- if (converted == (size_t)-2) {
- /* Incomplete character. This should never happen,
- since we provide everything that we have -
- unless there is a bug in the C library, or I
- misunderstood how mbrtowc works. */
- fprintf(stderr, "unexpected mbrtowc result -2\n");
- return NULL;
- }
- if (converted == (size_t)-1) {
- /* Conversion error. Escape as UTF-8b, and start over
- in the initial shift state. */
- *out++ = 0xdc00 + *in++;
- argsize--;
- memset(&mbs, 0, sizeof mbs);
- continue;
- }
- if (*out >= 0xd800 && *out <= 0xdfff) {
- /* Surrogate character. Escape the original
- byte sequence with surrogateescape. */
- argsize -= converted;
- while (converted--)
- *out++ = 0xdc00 + *in++;
- continue;
- }
- /* successfully converted some bytes */
- in += converted;
- argsize -= converted;
- out++;
- }
-#else
- /* Cannot use C locale for escaping; manually escape as if charset
- is ASCII (i.e. escape all bytes > 128. This will still roundtrip
- correctly in the locale's charset, which must be an ASCII superset. */
- res = PyMem_Malloc((strlen(arg)+1)*sizeof(wchar_t));
- if (!res) goto oom;
- in = (unsigned char*)arg;
- out = res;
- while(*in)
- if(*in < 128)
- *out++ = *in++;
- else
- *out++ = 0xdc00 + *in++;
- *out = 0;
-#endif
- return res;
-oom:
- fprintf(stderr, "out of memory\n");
- return NULL;
-}
-
#ifdef __cplusplus
}
#endif