From fb67be2f6b12e6ab07c17ece1caaf0057d610252 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 29 Aug 2007 18:38:11 +0000 Subject: Three patches from issue #1047, by Amaury Forgeot d'Arc: 1/ getargs.diff adds the 'Z' and 'Z#' format specifiers for PyArg_ParseTuple. They mimic z and z# for unicode strings, by accepting a Unicode or None (in which case the Py_UNICODE* pointer is set to NULL). With doc and tests. 2/ subprocess.diff converts file PC/_subprocess.c to unicode. We use the Unicode version of the win32 api (and Z conversion from previous patch) 3/ stdout.diff: sys.stdout must not convert the line endings, Windows already does it. Without this patch, when redirecting the output of python, the file contains \r\r\n for each line. (test_subprocess did catch this) However, I (GvR) removed the change to _fileio.c (included in the patches) that prevents closing file descripors < 3 from being closed; I think that needs to be solved in a different way. --- Doc/c-api/utilities.rst | 7 ++++++ Lib/site.py | 6 +++--- Modules/_testcapimodule.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++ PC/_subprocess.c | 55 ++++++++++++++++++++++------------------------- Python/getargs.c | 28 ++++++++++++++++++++++++ 5 files changed, 118 insertions(+), 32 deletions(-) diff --git a/Doc/c-api/utilities.rst b/Doc/c-api/utilities.rst index 01c1ceb..8999316 100644 --- a/Doc/c-api/utilities.rst +++ b/Doc/c-api/utilities.rst @@ -484,6 +484,13 @@ variable(s) whose address should be passed. by interpreting their read-buffer pointer as pointer to a :ctype:`Py_UNICODE` array. +``Z`` (Unicode or ``None``) [Py_UNICODE \*] + Like ``s``, but the Python object may also be ``None``, in which case the C + pointer is set to *NULL*. + +``Z#`` (Unicode or ``None``) [Py_UNICODE \*, int] + This is to ``u#`` as ``Z`` is to ``u``. + ``es`` (string, Unicode object or character buffer compatible object) [const char \*encoding, char \*\*buffer] This variant on ``s`` is used for encoding Unicode and objects convertible to Unicode into a character buffer. It only works for encoded data without embedded diff --git a/Lib/site.py b/Lib/site.py index 30c54b0..53e859e 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -414,9 +414,9 @@ def installnewio(): def __new__(cls, *args, **kwds): return io.open(*args, **kwds) __builtin__.open = open - sys.__stdin__ = sys.stdin = io.open(0, "r") - sys.__stdout__ = sys.stdout = io.open(1, "w") - sys.__stderr__ = sys.stderr = io.open(2, "w") + sys.__stdin__ = sys.stdin = io.open(0, "r", newline='\n') + sys.__stdout__ = sys.stdout = io.open(1, "w", newline='\n') + sys.__stderr__ = sys.stderr = io.open(2, "w", newline='\n') def main(): diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index ca159e5..b76c713 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -497,6 +497,59 @@ test_u_code(PyObject *self) return Py_None; } +/* Test Z and Z# codes for PyArg_ParseTuple */ +static PyObject * +test_Z_code(PyObject *self) +{ + PyObject *tuple, *obj; + Py_UNICODE *value1, *value2; + int len1, len2; + + tuple = PyTuple_New(2); + if (tuple == NULL) + return NULL; + + obj = PyUnicode_FromString("test"); + PyTuple_SET_ITEM(tuple, 0, obj); + Py_INCREF(Py_None); + PyTuple_SET_ITEM(tuple, 1, Py_None); + + /* swap values on purpose */ + value1 = NULL; + value2 = PyUnicode_AS_UNICODE(obj); + + /* Test Z for both values */ + if (PyArg_ParseTuple(tuple, "ZZ:test_Z_code", &value1, &value2) < 0) + return NULL; + if (value1 != PyUnicode_AS_UNICODE(obj)) + return raiseTestError("test_Z_code", + "Z code returned wrong value for 'test'"); + if (value2 != NULL) + return raiseTestError("test_Z_code", + "Z code returned wrong value for None"); + + value1 = NULL; + value2 = PyUnicode_AS_UNICODE(obj); + len1 = -1; + len2 = -1; + + /* Test Z# for both values */ + if (PyArg_ParseTuple(tuple, "Z#Z#:test_Z_code", &value1, &len1, + &value2, &len2) < 0) + return NULL; + if (value1 != PyUnicode_AS_UNICODE(obj) || + len1 != PyUnicode_GET_SIZE(obj)) + return raiseTestError("test_Z_code", + "Z# code returned wrong values for 'test'"); + if (value2 != NULL || + len2 != 0) + return raiseTestError("test_Z_code", + "Z# code returned wrong values for None'"); + + Py_DECREF(tuple); + Py_RETURN_NONE; +} + static PyObject * codec_incrementalencoder(PyObject *self, PyObject *args) { @@ -862,6 +915,7 @@ static PyMethodDef TestMethods[] = { (PyCFunction)codec_incrementaldecoder, METH_VARARGS}, #endif {"test_u_code", (PyCFunction)test_u_code, METH_NOARGS}, + {"test_Z_code", (PyCFunction)test_Z_code, METH_NOARGS}, #ifdef WITH_THREAD {"_test_thread_state", test_thread_state, METH_VARARGS}, #endif diff --git a/PC/_subprocess.c b/PC/_subprocess.c index 14e1333..e711e13 100644 --- a/PC/_subprocess.c +++ b/PC/_subprocess.c @@ -35,9 +35,6 @@ /* Licensed to PSF under a Contributor Agreement. */ /* See http://www.python.org/2.4/license for licensing details. */ -/* TODO: handle unicode command lines? */ -/* TODO: handle unicode environment? */ - #include "Python.h" #define WINDOWS_LEAN_AND_MEAN @@ -272,7 +269,7 @@ gethandle(PyObject* obj, char* name) PyErr_Clear(); /* FIXME: propagate error? */ return NULL; } - if (Py_Type(&value) != &sp_handle_type) + if (Py_Type(value) != &sp_handle_type) ret = NULL; else ret = value->handle; @@ -287,7 +284,7 @@ getenvironment(PyObject* environment) PyObject* out = NULL; PyObject* keys; PyObject* values; - char* p; + Py_UNICODE* p; /* convert environment dictionary to windows enviroment string */ if (! PyMapping_Check(environment)) { @@ -303,42 +300,42 @@ getenvironment(PyObject* environment) if (!keys || !values) goto error; - out = PyString_FromStringAndSize(NULL, 2048); + out = PyUnicode_FromUnicode(NULL, 2048); if (! out) goto error; - p = PyString_AS_STRING(out); + p = PyUnicode_AS_UNICODE(out); for (i = 0; i < envsize; i++) { int ksize, vsize, totalsize; PyObject* key = PyList_GET_ITEM(keys, i); PyObject* value = PyList_GET_ITEM(values, i); - if (! PyString_Check(key) || ! PyString_Check(value)) { + if (! PyUnicode_Check(key) || ! PyUnicode_Check(value)) { PyErr_SetString(PyExc_TypeError, "environment can only contain strings"); goto error; } - ksize = PyString_GET_SIZE(key); - vsize = PyString_GET_SIZE(value); - totalsize = (p - PyString_AS_STRING(out)) + ksize + 1 + + ksize = PyUnicode_GET_SIZE(key); + vsize = PyUnicode_GET_SIZE(value); + totalsize = (p - PyUnicode_AS_UNICODE(out)) + ksize + 1 + vsize + 1 + 1; - if (totalsize > PyString_GET_SIZE(out)) { - int offset = p - PyString_AS_STRING(out); - _PyString_Resize(&out, totalsize + 1024); - p = PyString_AS_STRING(out) + offset; + if (totalsize > PyUnicode_GET_SIZE(out)) { + int offset = p - PyUnicode_AS_UNICODE(out); + PyUnicode_Resize(&out, totalsize + 1024); + p = PyUnicode_AS_UNICODE(out) + offset; } - memcpy(p, PyString_AS_STRING(key), ksize); + Py_UNICODE_COPY(p, PyUnicode_AS_UNICODE(key), ksize); p += ksize; *p++ = '='; - memcpy(p, PyString_AS_STRING(value), vsize); + Py_UNICODE_COPY(p, PyUnicode_AS_UNICODE(value), vsize); p += vsize; *p++ = '\0'; } /* add trailing null byte */ *p++ = '\0'; - _PyString_Resize(&out, p - PyString_AS_STRING(out)); + PyUnicode_Resize(&out, p - PyUnicode_AS_UNICODE(out)); /* PyObject_Print(out, stdout, 0); */ @@ -359,20 +356,20 @@ sp_CreateProcess(PyObject* self, PyObject* args) { BOOL result; PROCESS_INFORMATION pi; - STARTUPINFO si; + STARTUPINFOW si; PyObject* environment; - char* application_name; - char* command_line; + Py_UNICODE* application_name; + Py_UNICODE* command_line; PyObject* process_attributes; /* ignored */ PyObject* thread_attributes; /* ignored */ int inherit_handles; int creation_flags; PyObject* env_mapping; - char* current_directory; + Py_UNICODE* current_directory; PyObject* startup_info; - if (! PyArg_ParseTuple(args, "zzOOiiOzO:CreateProcess", + if (! PyArg_ParseTuple(args, "ZZOOiiOZO:CreateProcess", &application_name, &command_line, &process_attributes, @@ -406,13 +403,13 @@ sp_CreateProcess(PyObject* self, PyObject* args) } Py_BEGIN_ALLOW_THREADS - result = CreateProcess(application_name, + result = CreateProcessW(application_name, command_line, NULL, NULL, inherit_handles, - creation_flags, - environment ? PyString_AS_STRING(environment) : NULL, + creation_flags | CREATE_UNICODE_ENVIRONMENT, + environment ? PyUnicode_AS_UNICODE(environment) : NULL, current_directory, &si, &pi); @@ -504,18 +501,18 @@ sp_GetModuleFileName(PyObject* self, PyObject* args) { BOOL result; long module; - TCHAR filename[MAX_PATH]; + WCHAR filename[MAX_PATH]; if (! PyArg_ParseTuple(args, "l:GetModuleFileName", &module)) return NULL; - result = GetModuleFileName((HMODULE)module, filename, MAX_PATH); + result = GetModuleFileNameW((HMODULE)module, filename, MAX_PATH); filename[MAX_PATH-1] = '\0'; if (! result) return PyErr_SetFromWindowsErr(GetLastError()); - return PyString_FromString(filename); + return PyUnicode_FromUnicode(filename, Py_UNICODE_strlen(filename)); } static PyMethodDef sp_functions[] = { diff --git a/Python/getargs.c b/Python/getargs.c index a0aa872..8d90111 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -921,6 +921,34 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, break; } + case 'Z': {/* unicode, may be NULL (None) */ + if (*format == '#') { /* any buffer-like object */ + Py_UNICODE **p = va_arg(*p_va, Py_UNICODE **); + FETCH_SIZE; + + if (arg == Py_None) { + *p = 0; + STORE_SIZE(0); + } + else if (PyUnicode_Check(arg)) { + *p = PyUnicode_AS_UNICODE(arg); + STORE_SIZE(PyUnicode_GET_SIZE(arg)); + } + format++; + } else { + Py_UNICODE **p = va_arg(*p_va, Py_UNICODE **); + + if (arg == Py_None) + *p = 0; + else if (PyUnicode_Check(arg)) + *p = PyUnicode_AS_UNICODE(arg); + else + return converterr("string or None", + arg, msgbuf, bufsize); + } + break; + } + case 'e': {/* encoded string */ char **buffer; const char *encoding; -- cgit v0.12