From 0740459248d1c01d393b52cff5f3a9b561e32e7c Mon Sep 17 00:00:00 2001 From: Amaury Forgeot d'Arc Date: Mon, 12 May 2008 13:19:07 +0000 Subject: #2798: PyArg_ParseTuple did not correctly handle the "s" code in case of unicode strings with chars outside the 7bit ascii (s# was already correct). This is necessary to allow python run from a non-ASCII directory, and seems enough on some platforms, probably where the default PyUnicode encoding (utf-8) is also the default filesystem encoding. --- Misc/NEWS | 4 ++++ Modules/_testcapimodule.c | 33 +++++++++++++++++++++++++++++++++ Python/getargs.c | 16 ++++++++-------- 3 files changed, 45 insertions(+), 8 deletions(-) diff --git a/Misc/NEWS b/Misc/NEWS index 0af5a10..e40844e 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,10 @@ What's new in Python 3.0b1? Core and Builtins ----------------- +- Issue 2798: When parsing arguments with PyArg_ParseTuple, the "s" code now + allows any unicode string and returns a utf-8 encoded buffer, just like the + "s#" code already does. The "z" code was corrected as well. + - Issue 2801: fix bug in the float.is_integer method where a ValueError was sometimes incorrectly raised. diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index cdee975..2e68a66 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -475,6 +475,38 @@ test_k_code(PyObject *self) } +/* Test the s and z codes for PyArg_ParseTuple. +*/ +static PyObject * +test_s_code(PyObject *self) +{ + /* Unicode strings should be accepted */ + PyObject *tuple, *obj; + char *value; + + tuple = PyTuple_New(1); + if (tuple == NULL) + return NULL; + + obj = PyUnicode_Decode("t\xeate", strlen("t\xeate"), + "latin-1", NULL); + if (obj == NULL) + return NULL; + + PyTuple_SET_ITEM(tuple, 0, obj); + + /* These two blocks used to raise a TypeError: + * "argument must be string without null bytes, not str" + */ + if (PyArg_ParseTuple(tuple, "s:test_s_code1", &value) < 0) + return NULL; + + if (PyArg_ParseTuple(tuple, "z:test_s_code2", &value) < 0) + return NULL; + + Py_RETURN_NONE; +} + /* Test the u and u# codes for PyArg_ParseTuple. May leak memory in case of an error. */ @@ -952,6 +984,7 @@ static PyMethodDef TestMethods[] = { {"codec_incrementaldecoder", (PyCFunction)codec_incrementaldecoder, METH_VARARGS}, #endif + {"test_s_code", (PyCFunction)test_s_code, METH_NOARGS}, {"test_u_code", (PyCFunction)test_u_code, METH_NOARGS}, {"test_Z_code", (PyCFunction)test_Z_code, METH_NOARGS}, #ifdef WITH_THREAD diff --git a/Python/getargs.c b/Python/getargs.c index 2bbafdb..427a951 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -822,10 +822,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } else return converterr("string", arg, msgbuf, bufsize); - /* XXX(gb): this test is completely wrong -- p is a - * byte string while arg is a Unicode. I *think* it should - * check against the size of uarg... */ - if ((Py_ssize_t)strlen(*p) != PyUnicode_GetSize(arg)) + if ((Py_ssize_t) strlen(*p) != PyString_GET_SIZE(uarg)) return converterr("string without null bytes", arg, msgbuf, bufsize); } @@ -874,11 +871,15 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, format++; } else { char **p = va_arg(*p_va, char **); + uarg = NULL; if (arg == Py_None) *p = 0; - else if (PyString_Check(arg)) + else if (PyString_Check(arg)) { + /* Enable null byte check below */ + uarg = arg; *p = PyString_AS_STRING(arg); + } else if (PyUnicode_Check(arg)) { uarg = UNICODE_DEFAULT_ENCODING(arg); if (uarg == NULL) @@ -900,9 +901,8 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } format++; } - /* XXX(gb): same comment as for 's' applies here... */ - else if (*p != NULL && - (Py_ssize_t)strlen(*p) != PyUnicode_GetSize(arg)) + else if (*p != NULL && uarg != NULL && + (Py_ssize_t) strlen(*p) != PyString_GET_SIZE(uarg)) return converterr( "string without null bytes or None", arg, msgbuf, bufsize); -- cgit v0.12