summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2010-09-11 00:54:47 (GMT)
committerVictor Stinner <victor.stinner@haypocalc.com>2010-09-11 00:54:47 (GMT)
commit1205f2774e00d38d3229a3a2742c2fcbc767bdde (patch)
tree8f5756aa974326bf503dfaad7512aff103bde9bb
parentcd419abe42b42c626d91d5f839d53bdbde9852e0 (diff)
downloadcpython-1205f2774e00d38d3229a3a2742c2fcbc767bdde.zip
cpython-1205f2774e00d38d3229a3a2742c2fcbc767bdde.tar.gz
cpython-1205f2774e00d38d3229a3a2742c2fcbc767bdde.tar.bz2
Issue #9738: PyUnicode_FromFormat() and PyErr_Format() raise an error on
a non-ASCII byte in the format string. Document also the encoding.
-rw-r--r--Doc/c-api/exceptions.rst2
-rw-r--r--Doc/c-api/unicode.rst2
-rw-r--r--Include/pyerrors.h6
-rw-r--r--Include/unicodeobject.h10
-rw-r--r--Lib/test/test_unicode.py14
-rw-r--r--Misc/NEWS3
-rw-r--r--Modules/_testcapimodule.c12
-rw-r--r--Objects/unicodeobject.c10
8 files changed, 53 insertions, 6 deletions
diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst
index fcbd50b..f969b96 100644
--- a/Doc/c-api/exceptions.rst
+++ b/Doc/c-api/exceptions.rst
@@ -146,7 +146,7 @@ in various ways. There is a separate error indicator for each thread.
.. cfunction:: PyObject* PyErr_Format(PyObject *exception, const char *format, ...)
This function sets the error indicator and returns *NULL*. *exception* should be
- a Python exception (class, not an instance). *format* should be a string,
+ a Python exception (class, not an instance). *format* should be an ASCII-encoded string,
containing format codes, similar to :cfunc:`printf`. The ``width.precision``
before a format code is parsed, but the width part is ignored.
diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index a4ee03a..1b8402c 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -234,7 +234,7 @@ APIs:
arguments, calculate the size of the resulting Python unicode string and return
a string with the values formatted into it. The variable arguments must be C
types and must correspond exactly to the format characters in the *format*
- string. The following format characters are allowed:
+ ASCII-encoded string. The following format characters are allowed:
.. % This should be exactly the same as the table in PyErr_Format.
.. % The descriptions for %zd and %zu are wrong, but the truth is complicated
diff --git a/Include/pyerrors.h b/Include/pyerrors.h
index 1eee16d..243bc01 100644
--- a/Include/pyerrors.h
+++ b/Include/pyerrors.h
@@ -183,7 +183,11 @@ PyAPI_FUNC(PyObject *) PyErr_SetFromErrnoWithUnicodeFilename(
PyObject *, const Py_UNICODE *);
#endif /* MS_WINDOWS */
-PyAPI_FUNC(PyObject *) PyErr_Format(PyObject *, const char *, ...);
+PyAPI_FUNC(PyObject *) PyErr_Format(
+ PyObject *exception,
+ const char *format, /* ASCII-encoded string */
+ ...
+ );
#ifdef MS_WINDOWS
PyAPI_FUNC(PyObject *) PyErr_SetFromWindowsErrWithFilenameObject(
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 0de0d66..820850a 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -550,8 +550,14 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
register PyObject *obj /* Object */
);
-PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(const char*, va_list);
-PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(const char*, ...);
+PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(
+ const char *format, /* ASCII-encoded string */
+ va_list vargs
+ );
+PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
+ const char *format, /* ASCII-encoded string */
+ ...
+ );
/* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index ae5f53d..0c23daa 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -1385,6 +1385,20 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEquals("%s" % s, '__str__ overridden')
self.assertEquals("{}".format(s), '__str__ overridden')
+ def test_from_format(self):
+ # Ensure that PyUnicode_FromFormat() raises an error for a non-ascii
+ # format string.
+ from _testcapi import format_unicode
+
+ # ascii format, non-ascii argument
+ text = format_unicode(b'ascii\x7f=%U', 'unicode\xe9')
+ self.assertEqual(text, 'ascii\x7f=unicode\xe9')
+
+ # non-ascii format, ascii argument
+ self.assertRaisesRegexp(ValueError,
+ '^PyUnicode_FromFormatV\(\) expects an ASCII-encoded format '
+ 'string, got a non-ascii byte: 0xe9$',
+ format_unicode, b'unicode\xe9=%s', 'ascii')
def test_main():
support.run_unittest(__name__)
diff --git a/Misc/NEWS b/Misc/NEWS
index 04eb82f..ca64883 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 3.2 Alpha 3?
Core and Builtins
-----------------
+- Issue #9738: PyUnicode_FromFormat() and PyErr_Format() raise an error on
+ a non-ASCII byte in the format string.
+
- Issue #4617: Previously it was illegal to delete a name from the local
namespace if it occurs as a free variable in a nested block. This limitation
of the compiler has been lifted, and a new opcode introduced (DELETE_DEREF).
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
index acbff34..20887b1 100644
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -2193,6 +2193,17 @@ crash_no_current_thread(PyObject *self)
return NULL;
}
+static PyObject *
+format_unicode(PyObject *self, PyObject *args)
+{
+ const char *format;
+ PyObject *arg;
+ if (!PyArg_ParseTuple(args, "yU", &format, &arg))
+ return NULL;
+ return PyUnicode_FromFormat(format, arg);
+
+}
+
static PyMethodDef TestMethods[] = {
{"raise_exception", raise_exception, METH_VARARGS},
{"raise_memoryerror", (PyCFunction)raise_memoryerror, METH_NOARGS},
@@ -2272,6 +2283,7 @@ static PyMethodDef TestMethods[] = {
{"make_exception_with_doc", (PyCFunction)make_exception_with_doc,
METH_VARARGS | METH_KEYWORDS},
{"crash_no_current_thread", (PyCFunction)crash_no_current_thread, METH_NOARGS},
+ {"format_unicode", format_unicode, METH_VARARGS},
{NULL, NULL} /* sentinel */
};
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 9cdf90d..c010b1b 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1102,7 +1102,15 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
appendstring(p);
goto end;
}
- } else
+ }
+ else if (128 <= (unsigned char)*f) {
+ PyErr_Format(PyExc_ValueError,
+ "PyUnicode_FromFormatV() expects an ASCII-encoded format "
+ "string, got a non-ascii byte: 0x%02x",
+ (unsigned char)*f);
+ goto fail;
+ }
+ else
*s++ = *f;
}