From 0740459248d1c01d393b52cff5f3a9b561e32e7c Mon Sep 17 00:00:00 2001
From: Amaury Forgeot d'Arc <amauryfa@gmail.com>
Date: Mon, 12 May 2008 13:19:07 +0000
Subject: #2798: PyArg_ParseTuple did not correctly handle the "s" code in case
 of unicode strings with chars outside the 7bit ascii (s# was already
 correct).

This is necessary to allow python run from a non-ASCII directory,
and seems enough on some platforms, probably where the default PyUnicode encoding (utf-8)
is also the default filesystem encoding.
---
 Misc/NEWS                 |  4 ++++
 Modules/_testcapimodule.c | 33 +++++++++++++++++++++++++++++++++
 Python/getargs.c          | 16 ++++++++--------
 3 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/Misc/NEWS b/Misc/NEWS
index 0af5a10..e40844e 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,10 @@ What's new in Python 3.0b1?
 Core and Builtins
 -----------------
 
+- Issue 2798: When parsing arguments with PyArg_ParseTuple, the "s" code now
+  allows any unicode string and returns a utf-8 encoded buffer, just like the
+  "s#" code already does.  The "z" code was corrected as well.
+
 - Issue 2801: fix bug in the float.is_integer method where a ValueError
   was sometimes incorrectly raised.
 
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
index cdee975..2e68a66 100644
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -475,6 +475,38 @@ test_k_code(PyObject *self)
 }
 
 
+/* Test the s and z codes for PyArg_ParseTuple.
+*/
+static PyObject *
+test_s_code(PyObject *self)
+{
+    /* Unicode strings should be accepted */
+    PyObject *tuple, *obj;
+    char *value;
+
+    tuple = PyTuple_New(1);
+    if (tuple == NULL)
+        return NULL;
+
+    obj = PyUnicode_Decode("t\xeate", strlen("t\xeate"),
+			   "latin-1", NULL);
+    if (obj == NULL)
+	return NULL;
+
+    PyTuple_SET_ITEM(tuple, 0, obj);
+
+    /* These two blocks used to raise a TypeError:
+     * "argument must be string without null bytes, not str" 
+     */
+    if (PyArg_ParseTuple(tuple, "s:test_s_code1", &value) < 0)
+    	return NULL;
+
+    if (PyArg_ParseTuple(tuple, "z:test_s_code2", &value) < 0)
+    	return NULL;
+
+    Py_RETURN_NONE;
+}
+
 /* Test the u and u# codes for PyArg_ParseTuple. May leak memory in case
    of an error.
 */
@@ -952,6 +984,7 @@ static PyMethodDef TestMethods[] = {
 	{"codec_incrementaldecoder",
 	 (PyCFunction)codec_incrementaldecoder,	 METH_VARARGS},
 #endif
+	{"test_s_code",		(PyCFunction)test_s_code,	 METH_NOARGS},
 	{"test_u_code",		(PyCFunction)test_u_code,	 METH_NOARGS},
 	{"test_Z_code",		(PyCFunction)test_Z_code,	 METH_NOARGS},
 #ifdef WITH_THREAD
diff --git a/Python/getargs.c b/Python/getargs.c
index 2bbafdb..427a951 100644
--- a/Python/getargs.c
+++ b/Python/getargs.c
@@ -822,10 +822,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
 			}
 			else
 				return converterr("string", arg, msgbuf, bufsize);
-			/* XXX(gb): this test is completely wrong -- p is a
-			 * byte string while arg is a Unicode. I *think* it should
-			 * check against the size of uarg... */
-			if ((Py_ssize_t)strlen(*p) != PyUnicode_GetSize(arg))
+			if ((Py_ssize_t) strlen(*p) != PyString_GET_SIZE(uarg))
 				return converterr("string without null bytes",
 						  arg, msgbuf, bufsize);
 		}
@@ -874,11 +871,15 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
 			format++;
 		} else {
 			char **p = va_arg(*p_va, char **);
+			uarg = NULL;
 
 			if (arg == Py_None)
 				*p = 0;
-			else if (PyString_Check(arg))
+			else if (PyString_Check(arg)) {
+				/* Enable null byte check below */
+				uarg = arg;
 				*p = PyString_AS_STRING(arg);
+			}
 			else if (PyUnicode_Check(arg)) {
 				uarg = UNICODE_DEFAULT_ENCODING(arg);
 				if (uarg == NULL)
@@ -900,9 +901,8 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
 				}
 				format++;
 			}
-			/* XXX(gb): same comment as for 's' applies here... */
-			else if (*p != NULL &&
-				 (Py_ssize_t)strlen(*p) != PyUnicode_GetSize(arg))
+			else if (*p != NULL && uarg != NULL &&
+				(Py_ssize_t) strlen(*p) != PyString_GET_SIZE(uarg))
 				return converterr(
 					"string without null bytes or None",
 					arg, msgbuf, bufsize);
-- 
cgit v0.12