summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAmaury Forgeot d'Arc <amauryfa@gmail.com>2008-05-12 13:19:07 (GMT)
committerAmaury Forgeot d'Arc <amauryfa@gmail.com>2008-05-12 13:19:07 (GMT)
commit0740459248d1c01d393b52cff5f3a9b561e32e7c (patch)
tree8f95cf7c27597429dd2a8eed7b88f45184fcd545
parente6161492fefafe039f416204289b97a084c771dc (diff)
downloadcpython-0740459248d1c01d393b52cff5f3a9b561e32e7c.zip
cpython-0740459248d1c01d393b52cff5f3a9b561e32e7c.tar.gz
cpython-0740459248d1c01d393b52cff5f3a9b561e32e7c.tar.bz2
#2798: PyArg_ParseTuple did not correctly handle the "s" code in case of unicode strings
with chars outside the 7bit ascii (s# was already correct). This is necessary to allow python run from a non-ASCII directory, and seems enough on some platforms, probably where the default PyUnicode encoding (utf-8) is also the default filesystem encoding.
-rw-r--r--Misc/NEWS4
-rw-r--r--Modules/_testcapimodule.c33
-rw-r--r--Python/getargs.c16
3 files changed, 45 insertions, 8 deletions
diff --git a/Misc/NEWS b/Misc/NEWS
index 0af5a10..e40844e 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,10 @@ What's new in Python 3.0b1?
Core and Builtins
-----------------
+- Issue 2798: When parsing arguments with PyArg_ParseTuple, the "s" code now
+ allows any unicode string and returns a utf-8 encoded buffer, just like the
+ "s#" code already does. The "z" code was corrected as well.
+
- Issue 2801: fix bug in the float.is_integer method where a ValueError
was sometimes incorrectly raised.
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
index cdee975..2e68a66 100644
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -475,6 +475,38 @@ test_k_code(PyObject *self)
}
+/* Test the s and z codes for PyArg_ParseTuple.
+*/
+static PyObject *
+test_s_code(PyObject *self)
+{
+ /* Unicode strings should be accepted */
+ PyObject *tuple, *obj;
+ char *value;
+
+ tuple = PyTuple_New(1);
+ if (tuple == NULL)
+ return NULL;
+
+ obj = PyUnicode_Decode("t\xeate", strlen("t\xeate"),
+ "latin-1", NULL);
+ if (obj == NULL)
+ return NULL;
+
+ PyTuple_SET_ITEM(tuple, 0, obj);
+
+ /* These two blocks used to raise a TypeError:
+ * "argument must be string without null bytes, not str"
+ */
+ if (PyArg_ParseTuple(tuple, "s:test_s_code1", &value) < 0)
+ return NULL;
+
+ if (PyArg_ParseTuple(tuple, "z:test_s_code2", &value) < 0)
+ return NULL;
+
+ Py_RETURN_NONE;
+}
+
/* Test the u and u# codes for PyArg_ParseTuple. May leak memory in case
of an error.
*/
@@ -952,6 +984,7 @@ static PyMethodDef TestMethods[] = {
{"codec_incrementaldecoder",
(PyCFunction)codec_incrementaldecoder, METH_VARARGS},
#endif
+ {"test_s_code", (PyCFunction)test_s_code, METH_NOARGS},
{"test_u_code", (PyCFunction)test_u_code, METH_NOARGS},
{"test_Z_code", (PyCFunction)test_Z_code, METH_NOARGS},
#ifdef WITH_THREAD
diff --git a/Python/getargs.c b/Python/getargs.c
index 2bbafdb..427a951 100644
--- a/Python/getargs.c
+++ b/Python/getargs.c
@@ -822,10 +822,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
}
else
return converterr("string", arg, msgbuf, bufsize);
- /* XXX(gb): this test is completely wrong -- p is a
- * byte string while arg is a Unicode. I *think* it should
- * check against the size of uarg... */
- if ((Py_ssize_t)strlen(*p) != PyUnicode_GetSize(arg))
+ if ((Py_ssize_t) strlen(*p) != PyString_GET_SIZE(uarg))
return converterr("string without null bytes",
arg, msgbuf, bufsize);
}
@@ -874,11 +871,15 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
format++;
} else {
char **p = va_arg(*p_va, char **);
+ uarg = NULL;
if (arg == Py_None)
*p = 0;
- else if (PyString_Check(arg))
+ else if (PyString_Check(arg)) {
+ /* Enable null byte check below */
+ uarg = arg;
*p = PyString_AS_STRING(arg);
+ }
else if (PyUnicode_Check(arg)) {
uarg = UNICODE_DEFAULT_ENCODING(arg);
if (uarg == NULL)
@@ -900,9 +901,8 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
}
format++;
}
- /* XXX(gb): same comment as for 's' applies here... */
- else if (*p != NULL &&
- (Py_ssize_t)strlen(*p) != PyUnicode_GetSize(arg))
+ else if (*p != NULL && uarg != NULL &&
+ (Py_ssize_t) strlen(*p) != PyString_GET_SIZE(uarg))
return converterr(
"string without null bytes or None",
arg, msgbuf, bufsize);