summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFredrik Lundh <fredrik@pythonware.com>2001-06-26 20:01:56 (GMT)
committerFredrik Lundh <fredrik@pythonware.com>2001-06-26 20:01:56 (GMT)
commit0dcf67e56d891832b53a82ee0abb60dcc2e0148e (patch)
tree78a90c2aae2c000b09864df57efee18a3cc75632
parent5b979356044281e3524500a1a6eb8d1cbdf25362 (diff)
downloadcpython-0dcf67e56d891832b53a82ee0abb60dcc2e0148e.zip
cpython-0dcf67e56d891832b53a82ee0abb60dcc2e0148e.tar.gz
cpython-0dcf67e56d891832b53a82ee0abb60dcc2e0148e.tar.bz2
more unicode tweaks: make unichr(0xdddddddd) behave like u"\Udddddddd"
wrt surrogates. (this extends the valid range from 65535 to 1114111)
-rw-r--r--Python/bltinmodule.c23
1 files changed, 17 insertions, 6 deletions
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c
index 4da984f..ed5519f 100644
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -308,23 +308,34 @@ static PyObject *
builtin_unichr(PyObject *self, PyObject *args)
{
long x;
- Py_UNICODE s[1];
+ Py_UNICODE s[2];
if (!PyArg_ParseTuple(args, "l:unichr", &x))
return NULL;
- if (x < 0 || x >= 65536) {
+
+ if (x < 0 || x > 0x10ffff) {
PyErr_SetString(PyExc_ValueError,
- "unichr() arg not in range(65536)");
+ "unichr() arg not in range(0x10ffff)");
return NULL;
}
- s[0] = (Py_UNICODE)x;
- return PyUnicode_FromUnicode(s, 1);
+
+ if (x <= 0xffff) {
+ /* UCS-2 character */
+ s[0] = (Py_UNICODE) x;
+ return PyUnicode_FromUnicode(s, 1);
+ } else {
+ /* UCS-4 character. store as two surrogate characters */
+ x -= 0x10000L;
+ s[0] = 0xD800 + (Py_UNICODE) (x >> 10);
+ s[1] = 0xDC00 + (Py_UNICODE) (x & 0x03FF);
+ return PyUnicode_FromUnicode(s, 2);
+ }
}
static char unichr_doc[] =
"unichr(i) -> Unicode character\n\
\n\
-Return a Unicode string of one character with ordinal i; 0 <= i < 65536.";
+Return a Unicode string of one character with ordinal i; 0 <= i <= 0x10ffff.";
static PyObject *