summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Include/unicodeobject.h12
-rw-r--r--Lib/test/test_unicode.py8
-rw-r--r--Misc/NEWS6
-rw-r--r--Objects/unicodeobject.c56
-rw-r--r--Python/bltinmodule.c35
5 files changed, 82 insertions, 35 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index d0a2885..2a92fe5 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -517,6 +517,18 @@ extern DL_IMPORT(int) PyUnicode_AsWideChar(
#endif
+/* --- Unicode ordinals --------------------------------------------------- */
+
+/* Create a Unicode Object from the given Unicode code point ordinal.
+
+ The ordinal must be in range(0x10000) on narrow Python builds
+ (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is
+ raised in case it is not.
+
+*/
+
+extern DL_IMPORT(PyObject*) PyUnicode_FromOrdinal(int ordinal);
+
/* === Builtin Codecs =====================================================
Many of these APIs take two arguments encoding and errors. These
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index a915b2e..f5f4245 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -453,6 +453,14 @@ except KeyError:
else:
verify(value == u'abc, def')
+for ordinal in (-100, 0x20000):
+ try:
+ u"%c" % ordinal
+ except ValueError:
+ pass
+ else:
+ print '*** formatting u"%%c" % %i should give a ValueError' % ordinal
+
# formatting jobs delegated from the string implementation:
verify('...%(foo)s...' % {'foo':u"abc"} == u'...abc...')
verify('...%(foo)s...' % {'foo':"abc"} == '...abc...')
diff --git a/Misc/NEWS b/Misc/NEWS
index e12120d..a454e18 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -6,6 +6,9 @@ Type/class unification and new-style classes
Core and builtins
+- u'%c' will now raise a ValueError in case the argument is an
+ integer outside the valid range of Unicode code point ordinals.
+
- The tempfile module has been overhauled for enhanced security. The
mktemp() function is now deprecated; new, safe replacements are
mkstemp() (for files) and mkdtemp() (for directories), and the
@@ -437,6 +440,9 @@ Build
C API
+- New C API PyUnicode_FromOrdinal() which exposes unichr() at C
+ level.
+
- New functions PyErr_SetExcFromWindowsErr() and
PyErr_SetExcFromWindowsErrWithFilename(). Similar to
PyErr_SetFromWindowsErrWithFilename() and
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 145186e..d0fe24c 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -390,6 +390,45 @@ int PyUnicode_AsWideChar(PyUnicodeObject *unicode,
#endif
+PyObject *PyUnicode_FromOrdinal(int ordinal)
+{
+ Py_UNICODE s[2];
+
+#ifdef Py_UNICODE_WIDE
+ if (ordinal < 0 || ordinal > 0x10ffff) {
+ PyErr_SetString(PyExc_ValueError,
+ "unichr() arg not in range(0x110000) "
+ "(wide Python build)");
+ return NULL;
+ }
+#else
+ if (ordinal < 0 || ordinal > 0xffff) {
+ PyErr_SetString(PyExc_ValueError,
+ "unichr() arg not in range(0x10000) "
+ "(narrow Python build)");
+ return NULL;
+ }
+#endif
+
+ if (ordinal <= 0xffff) {
+ /* UCS-2 character */
+ s[0] = (Py_UNICODE) ordinal;
+ return PyUnicode_FromUnicode(s, 1);
+ }
+ else {
+#ifndef Py_UNICODE_WIDE
+ /* UCS-4 character. store as two surrogate characters */
+ ordinal -= 0x10000L;
+ s[0] = 0xD800 + (Py_UNICODE) (ordinal >> 10);
+ s[1] = 0xDC00 + (Py_UNICODE) (ordinal & 0x03FF);
+ return PyUnicode_FromUnicode(s, 2);
+#else
+ s[0] = (Py_UNICODE)ordinal;
+ return PyUnicode_FromUnicode(s, 1);
+#endif
+ }
+}
+
PyObject *PyUnicode_FromObject(register PyObject *obj)
{
/* XXX Perhaps we should make this API an alias of
@@ -5373,7 +5412,22 @@ formatchar(Py_UNICODE *buf,
x = PyInt_AsLong(v);
if (x == -1 && PyErr_Occurred())
goto onError;
- buf[0] = (char) x;
+#ifdef Py_UNICODE_WIDE
+ if (x < 0 || x > 0x10ffff) {
+ PyErr_SetString(PyExc_ValueError,
+ "%c arg not in range(0x110000) "
+ "(wide Python build)");
+ return -1;
+ }
+#else
+ if (x < 0 || x > 0xffff) {
+ PyErr_SetString(PyExc_ValueError,
+ "%c arg not in range(0x10000) "
+ "(narrow Python build)");
+ return -1;
+ }
+#endif
+ buf[0] = (Py_UNICODE) x;
}
buf[1] = '\0';
return 1;
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c
index fec7554..7a53065 100644
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -260,44 +260,11 @@ static PyObject *
builtin_unichr(PyObject *self, PyObject *args)
{
long x;
- Py_UNICODE s[2];
if (!PyArg_ParseTuple(args, "l:unichr", &x))
return NULL;
-#ifdef Py_UNICODE_WIDE
- if (x < 0 || x > 0x10ffff) {
- PyErr_SetString(PyExc_ValueError,
- "unichr() arg not in range(0x110000) "
- "(wide Python build)");
- return NULL;
- }
-#else
- if (x < 0 || x > 0xffff) {
- PyErr_SetString(PyExc_ValueError,
- "unichr() arg not in range(0x10000) "
- "(narrow Python build)");
- return NULL;
- }
-#endif
-
- if (x <= 0xffff) {
- /* UCS-2 character */
- s[0] = (Py_UNICODE) x;
- return PyUnicode_FromUnicode(s, 1);
- }
- else {
-#ifndef Py_UNICODE_WIDE
- /* UCS-4 character. store as two surrogate characters */
- x -= 0x10000L;
- s[0] = 0xD800 + (Py_UNICODE) (x >> 10);
- s[1] = 0xDC00 + (Py_UNICODE) (x & 0x03FF);
- return PyUnicode_FromUnicode(s, 2);
-#else
- s[0] = (Py_UNICODE)x;
- return PyUnicode_FromUnicode(s, 1);
-#endif
- }
+ return PyUnicode_FromOrdinal(x);
}
PyDoc_STRVAR(unichr_doc,