From 1205f2774e00d38d3229a3a2742c2fcbc767bdde Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 11 Sep 2010 00:54:47 +0000 Subject: Issue #9738: PyUnicode_FromFormat() and PyErr_Format() raise an error on a non-ASCII byte in the format string. Document also the encoding. --- Doc/c-api/exceptions.rst | 2 +- Doc/c-api/unicode.rst | 2 +- Include/pyerrors.h | 6 +++++- Include/unicodeobject.h | 10 ++++++++-- Lib/test/test_unicode.py | 14 ++++++++++++++ Misc/NEWS | 3 +++ Modules/_testcapimodule.c | 12 ++++++++++++ Objects/unicodeobject.c | 10 +++++++++- 8 files changed, 53 insertions(+), 6 deletions(-) diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst index fcbd50b..f969b96 100644 --- a/Doc/c-api/exceptions.rst +++ b/Doc/c-api/exceptions.rst @@ -146,7 +146,7 @@ in various ways. There is a separate error indicator for each thread. .. cfunction:: PyObject* PyErr_Format(PyObject *exception, const char *format, ...) This function sets the error indicator and returns *NULL*. *exception* should be - a Python exception (class, not an instance). *format* should be a string, + a Python exception (class, not an instance). *format* should be an ASCII-encoded string, containing format codes, similar to :cfunc:`printf`. The ``width.precision`` before a format code is parsed, but the width part is ignored. diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index a4ee03a..1b8402c 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -234,7 +234,7 @@ APIs: arguments, calculate the size of the resulting Python unicode string and return a string with the values formatted into it. The variable arguments must be C types and must correspond exactly to the format characters in the *format* - string. The following format characters are allowed: + ASCII-encoded string. The following format characters are allowed: .. % This should be exactly the same as the table in PyErr_Format. .. % The descriptions for %zd and %zu are wrong, but the truth is complicated diff --git a/Include/pyerrors.h b/Include/pyerrors.h index 1eee16d..243bc01 100644 --- a/Include/pyerrors.h +++ b/Include/pyerrors.h @@ -183,7 +183,11 @@ PyAPI_FUNC(PyObject *) PyErr_SetFromErrnoWithUnicodeFilename( PyObject *, const Py_UNICODE *); #endif /* MS_WINDOWS */ -PyAPI_FUNC(PyObject *) PyErr_Format(PyObject *, const char *, ...); +PyAPI_FUNC(PyObject *) PyErr_Format( + PyObject *exception, + const char *format, /* ASCII-encoded string */ + ... + ); #ifdef MS_WINDOWS PyAPI_FUNC(PyObject *) PyErr_SetFromWindowsErrWithFilenameObject( diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 0de0d66..820850a 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -550,8 +550,14 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromObject( register PyObject *obj /* Object */ ); -PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(const char*, va_list); -PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(const char*, ...); +PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV( + const char *format, /* ASCII-encoded string */ + va_list vargs + ); +PyAPI_FUNC(PyObject *) PyUnicode_FromFormat( + const char *format, /* ASCII-encoded string */ + ... + ); /* Format the object based on the format_spec, as defined in PEP 3101 (Advanced String Formatting). */ diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index ae5f53d..0c23daa 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1385,6 +1385,20 @@ class UnicodeTest(string_tests.CommonTest, self.assertEquals("%s" % s, '__str__ overridden') self.assertEquals("{}".format(s), '__str__ overridden') + def test_from_format(self): + # Ensure that PyUnicode_FromFormat() raises an error for a non-ascii + # format string. + from _testcapi import format_unicode + + # ascii format, non-ascii argument + text = format_unicode(b'ascii\x7f=%U', 'unicode\xe9') + self.assertEqual(text, 'ascii\x7f=unicode\xe9') + + # non-ascii format, ascii argument + self.assertRaisesRegexp(ValueError, + '^PyUnicode_FromFormatV\(\) expects an ASCII-encoded format ' + 'string, got a non-ascii byte: 0xe9$', + format_unicode, b'unicode\xe9=%s', 'ascii') def test_main(): support.run_unittest(__name__) diff --git a/Misc/NEWS b/Misc/NEWS index 04eb82f..ca64883 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ What's New in Python 3.2 Alpha 3? Core and Builtins ----------------- +- Issue #9738: PyUnicode_FromFormat() and PyErr_Format() raise an error on + a non-ASCII byte in the format string. + - Issue #4617: Previously it was illegal to delete a name from the local namespace if it occurs as a free variable in a nested block. This limitation of the compiler has been lifted, and a new opcode introduced (DELETE_DEREF). diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index acbff34..20887b1 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -2193,6 +2193,17 @@ crash_no_current_thread(PyObject *self) return NULL; } +static PyObject * +format_unicode(PyObject *self, PyObject *args) +{ + const char *format; + PyObject *arg; + if (!PyArg_ParseTuple(args, "yU", &format, &arg)) + return NULL; + return PyUnicode_FromFormat(format, arg); + +} + static PyMethodDef TestMethods[] = { {"raise_exception", raise_exception, METH_VARARGS}, {"raise_memoryerror", (PyCFunction)raise_memoryerror, METH_NOARGS}, @@ -2272,6 +2283,7 @@ static PyMethodDef TestMethods[] = { {"make_exception_with_doc", (PyCFunction)make_exception_with_doc, METH_VARARGS | METH_KEYWORDS}, {"crash_no_current_thread", (PyCFunction)crash_no_current_thread, METH_NOARGS}, + {"format_unicode", format_unicode, METH_VARARGS}, {NULL, NULL} /* sentinel */ }; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 9cdf90d..c010b1b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1102,7 +1102,15 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) appendstring(p); goto end; } - } else + } + else if (128 <= (unsigned char)*f) { + PyErr_Format(PyExc_ValueError, + "PyUnicode_FromFormatV() expects an ASCII-encoded format " + "string, got a non-ascii byte: 0x%02x", + (unsigned char)*f); + goto fail; + } + else *s++ = *f; } -- cgit v0.12