diff options
author | Fredrik Lundh <fredrik@pythonware.com> | 2001-06-26 20:01:56 (GMT) |
---|---|---|
committer | Fredrik Lundh <fredrik@pythonware.com> | 2001-06-26 20:01:56 (GMT) |
commit | 0dcf67e56d891832b53a82ee0abb60dcc2e0148e (patch) | |
tree | 78a90c2aae2c000b09864df57efee18a3cc75632 /Python/bltinmodule.c | |
parent | 5b979356044281e3524500a1a6eb8d1cbdf25362 (diff) | |
download | cpython-0dcf67e56d891832b53a82ee0abb60dcc2e0148e.zip cpython-0dcf67e56d891832b53a82ee0abb60dcc2e0148e.tar.gz cpython-0dcf67e56d891832b53a82ee0abb60dcc2e0148e.tar.bz2 |
more unicode tweaks: make unichr(0xdddddddd) behave like u"\Udddddddd"
wrt surrogates. (this extends the valid range from 65535 to 1114111)
Diffstat (limited to 'Python/bltinmodule.c')
-rw-r--r-- | Python/bltinmodule.c | 23 |
1 files changed, 17 insertions, 6 deletions
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 4da984f..ed5519f 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -308,23 +308,34 @@ static PyObject * builtin_unichr(PyObject *self, PyObject *args) { long x; - Py_UNICODE s[1]; + Py_UNICODE s[2]; if (!PyArg_ParseTuple(args, "l:unichr", &x)) return NULL; - if (x < 0 || x >= 65536) { + + if (x < 0 || x > 0x10ffff) { PyErr_SetString(PyExc_ValueError, - "unichr() arg not in range(65536)"); + "unichr() arg not in range(0x10ffff)"); return NULL; } - s[0] = (Py_UNICODE)x; - return PyUnicode_FromUnicode(s, 1); + + if (x <= 0xffff) { + /* UCS-2 character */ + s[0] = (Py_UNICODE) x; + return PyUnicode_FromUnicode(s, 1); + } else { + /* UCS-4 character. store as two surrogate characters */ + x -= 0x10000L; + s[0] = 0xD800 + (Py_UNICODE) (x >> 10); + s[1] = 0xDC00 + (Py_UNICODE) (x & 0x03FF); + return PyUnicode_FromUnicode(s, 2); + } } static char unichr_doc[] = "unichr(i) -> Unicode character\n\ \n\ -Return a Unicode string of one character with ordinal i; 0 <= i < 65536."; +Return a Unicode string of one character with ordinal i; 0 <= i <= 0x10ffff."; static PyObject * |