summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2007-07-15 13:00:05 (GMT)
committerGuido van Rossum <guido@python.org>2007-07-15 13:00:05 (GMT)
commit8ac004e69895e8fd525307fdc1e093f92b15ce09 (patch)
tree947227b6fa3f72f1b6591af1e14181ee30f1bc5d
parent49c12ac04e1bfb238454d9ca2cf3a5acd9991ef5 (diff)
downloadcpython-8ac004e69895e8fd525307fdc1e093f92b15ce09.zip
cpython-8ac004e69895e8fd525307fdc1e093f92b15ce09.tar.gz
cpython-8ac004e69895e8fd525307fdc1e093f92b15ce09.tar.bz2
Make chr() and ord() return/accept surrogate pairs in narrow builds.
The domain of chr() and the range of ord() are now always [0 ... 0x10FFFF].
-rw-r--r--Lib/test/test_builtin.py35
-rw-r--r--Objects/unicodeobject.c19
-rw-r--r--Python/bltinmodule.c25
3 files changed, 59 insertions, 20 deletions
diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py
index 036a9f2..7e37c29 100644
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -169,15 +169,23 @@ class BuiltinTest(unittest.TestCase):
self.assertEqual(chr(97), 'a')
self.assertEqual(chr(0xff), '\xff')
self.assertRaises(ValueError, chr, 1<<24)
- self.assertEqual(
- chr(sys.maxunicode),
- str(('\\U%08x' % (sys.maxunicode)).encode("ascii"), 'unicode-escape')
- )
- self.assertRaises(ValueError, chr, sys.maxunicode+1)
+ self.assertEqual(chr(sys.maxunicode),
+ str(('\\U%08x' % (sys.maxunicode)).encode("ascii"),
+ 'unicode-escape'))
self.assertRaises(TypeError, chr)
-
- def XXX_test_cmp(self):
- # cmp() is no longer supported
+ self.assertEqual(chr(0x0000FFFF), "\U0000FFFF")
+ self.assertEqual(chr(0x00010000), "\U00010000")
+ self.assertEqual(chr(0x00010001), "\U00010001")
+ self.assertEqual(chr(0x000FFFFE), "\U000FFFFE")
+ self.assertEqual(chr(0x000FFFFF), "\U000FFFFF")
+ self.assertEqual(chr(0x00100000), "\U00100000")
+ self.assertEqual(chr(0x00100001), "\U00100001")
+ self.assertEqual(chr(0x0010FFFE), "\U0010FFFE")
+ self.assertEqual(chr(0x0010FFFF), "\U0010FFFF")
+ self.assertRaises(ValueError, chr, -1)
+ self.assertRaises(ValueError, chr, 0x00110000)
+
+ def test_cmp(self):
self.assertEqual(cmp(-1, 1), -1)
self.assertEqual(cmp(1, -1), 1)
self.assertEqual(cmp(1, 1), 0)
@@ -1288,6 +1296,17 @@ class BuiltinTest(unittest.TestCase):
self.assertEqual(ord(chr(sys.maxunicode)), sys.maxunicode)
self.assertRaises(TypeError, ord, 42)
+ self.assertEqual(ord(chr(0x10FFFF)), 0x10FFFF)
+ self.assertEqual(ord("\U0000FFFF"), 0x0000FFFF)
+ self.assertEqual(ord("\U00010000"), 0x00010000)
+ self.assertEqual(ord("\U00010001"), 0x00010001)
+ self.assertEqual(ord("\U000FFFFE"), 0x000FFFFE)
+ self.assertEqual(ord("\U000FFFFF"), 0x000FFFFF)
+ self.assertEqual(ord("\U00100000"), 0x00100000)
+ self.assertEqual(ord("\U00100001"), 0x00100001)
+ self.assertEqual(ord("\U0010FFFE"), 0x0010FFFE)
+ self.assertEqual(ord("\U0010FFFF"), 0x0010FFFF)
+
def test_pow(self):
self.assertEqual(pow(0,0), 1)
self.assertEqual(pow(0,1), 0)
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 2728f1f..a60fa8b 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -915,21 +915,20 @@ Py_ssize_t PyUnicode_AsWideChar(PyUnicodeObject *unicode,
PyObject *PyUnicode_FromOrdinal(int ordinal)
{
- Py_UNICODE s[1];
+ Py_UNICODE s[2];
-#ifdef Py_UNICODE_WIDE
if (ordinal < 0 || ordinal > 0x10ffff) {
PyErr_SetString(PyExc_ValueError,
- "chr() arg not in range(0x110000) "
- "(wide Python build)");
+ "chr() arg not in range(0x110000)");
return NULL;
}
-#else
- if (ordinal < 0 || ordinal > 0xffff) {
- PyErr_SetString(PyExc_ValueError,
- "chr() arg not in range(0x10000) "
- "(narrow Python build)");
- return NULL;
+
+#ifndef Py_UNICODE_WIDE
+ if (ordinal > 0xffff) {
+ ordinal -= 0x10000;
+ s[0] = 0xD800 | (ordinal >> 10);
+ s[1] = 0xDC00 | (ordinal & 0x3FF);
+ return PyUnicode_FromUnicode(s, 2);
}
#endif
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c
index db9ac2c..08c1a00 100644
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -317,7 +317,11 @@ builtin_chr(PyObject *self, PyObject *args)
PyDoc_STRVAR(chr_doc,
"chr(i) -> Unicode character\n\
\n\
-Return a Unicode string of one character with ordinal i; 0 <= i <= 0x10ffff.");
+Return a Unicode string of one character with ordinal i; 0 <= i <= 0x10ffff."
+#ifndef Py_UNICODE_WIDE
+"\nIf 0x10000 <= i, a surrogate pair is returned."
+#endif
+);
static PyObject *
@@ -1179,6 +1183,19 @@ builtin_ord(PyObject *self, PyObject* obj)
ord = (long)*PyUnicode_AS_UNICODE(obj);
return PyInt_FromLong(ord);
}
+#ifndef Py_UNICODE_WIDE
+ if (size == 2) {
+ /* Decode a valid surrogate pair */
+ int c0 = PyUnicode_AS_UNICODE(obj)[0];
+ int c1 = PyUnicode_AS_UNICODE(obj)[1];
+ if (0xD800 <= c0 && c0 <= 0xDBFF &&
+ 0xDC00 <= c1 && c1 <= 0xDFFF) {
+ ord = ((((c0 & 0x03FF) << 10) | (c1 & 0x03FF)) +
+ 0x00010000);
+ return PyInt_FromLong(ord);
+ }
+ }
+#endif
}
else if (PyBytes_Check(obj)) {
/* XXX Hopefully this is temporary */
@@ -1205,7 +1222,11 @@ builtin_ord(PyObject *self, PyObject* obj)
PyDoc_STRVAR(ord_doc,
"ord(c) -> integer\n\
\n\
-Return the integer ordinal of a one-character string.");
+Return the integer ordinal of a one-character string."
+#ifndef Py_UNICODE_WIDE
+"\nA valid surrogate pair is also accepted."
+#endif
+);
static PyObject *