diff options
author | Georg Brandl <georg@python.org> | 2008-06-04 13:01:30 (GMT) |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2008-06-04 13:01:30 (GMT) |
commit | a26f8ca668d09eff8ab03ef3d1cddb9bfbc6d124 (patch) | |
tree | 4b3704507702b98e0ce4107403a72093a5d6b9fc /Objects/unicodeobject.c | |
parent | f954c4b9fb8529cc13a2e24c58137c66ac836b28 (diff) | |
download | cpython-a26f8ca668d09eff8ab03ef3d1cddb9bfbc6d124.zip cpython-a26f8ca668d09eff8ab03ef3d1cddb9bfbc6d124.tar.gz cpython-a26f8ca668d09eff8ab03ef3d1cddb9bfbc6d124.tar.bz2 |
Revert r63934 -- it was mixing two patches.
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 176 |
1 files changed, 59 insertions, 117 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 7f488b8..78e38b5 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -7149,36 +7149,6 @@ unicode_isidentifier(PyObject *self) return PyBool_FromLong(PyUnicode_IsIdentifier(self)); } -PyDoc_STRVAR(isprintable__doc__, -"S.isprintable() -> bool\n\ -\n\ -Return True if all characters in S are considered\n\ -printable in repr() and there is at least one character\n\ -in S, False otherwise."); - -static PyObject* -unicode_isprintable(PyObject *self) -{ - register const Py_UNICODE *p = PyUnicode_AS_UNICODE(self); - register const Py_UNICODE *e; - - /* Shortcut for single character strings */ - if (PyUnicode_GET_SIZE(self) == 1 && - Py_UNICODE_ISPRINTABLE(*p)) - return PyBool_FromLong(1); - - /* Special case for empty strings */ - if (PyUnicode_GET_SIZE(self) == 0) - return PyBool_FromLong(0); - - e = p + PyUnicode_GET_SIZE(self); - for (; p < e; p++) { - if (!Py_UNICODE_ISPRINTABLE(*p)) - return PyBool_FromLong(0); - } - return PyBool_FromLong(1); -} - PyDoc_STRVAR(join__doc__, "S.join(sequence) -> str\n\ \n\ @@ -7556,8 +7526,61 @@ PyObject *unicode_repr(PyObject *unicode) continue; } - /* Map special whitespace to '\t', \n', '\r' */ - if (ch == '\t') { +#ifdef Py_UNICODE_WIDE + /* Map 21-bit characters to '\U00xxxxxx' */ + else if (ch >= 0x10000) { + *p++ = '\\'; + *p++ = 'U'; + *p++ = hexdigits[(ch >> 28) & 0x0000000F]; + *p++ = hexdigits[(ch >> 24) & 0x0000000F]; + *p++ = hexdigits[(ch >> 20) & 0x0000000F]; + *p++ = hexdigits[(ch >> 16) & 0x0000000F]; + *p++ = hexdigits[(ch >> 12) & 0x0000000F]; + *p++ = hexdigits[(ch >> 8) & 0x0000000F]; + *p++ = hexdigits[(ch >> 4) & 0x0000000F]; + *p++ = hexdigits[ch & 0x0000000F]; + continue; + } +#else + /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */ + else if (ch >= 0xD800 && ch < 0xDC00) { + Py_UNICODE ch2; + Py_UCS4 ucs; + + ch2 = *s++; + size--; + if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) { + ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000; + *p++ = '\\'; + *p++ = 'U'; + *p++ = hexdigits[(ucs >> 28) & 0x0000000F]; + *p++ = hexdigits[(ucs >> 24) & 0x0000000F]; + *p++ = hexdigits[(ucs >> 20) & 0x0000000F]; + *p++ = hexdigits[(ucs >> 16) & 0x0000000F]; + *p++ = hexdigits[(ucs >> 12) & 0x0000000F]; + *p++ = hexdigits[(ucs >> 8) & 0x0000000F]; + *p++ = hexdigits[(ucs >> 4) & 0x0000000F]; + *p++ = hexdigits[ucs & 0x0000000F]; + continue; + } + /* Fall through: isolated surrogates are copied as-is */ + s--; + size++; + } +#endif + + /* Map 16-bit characters to '\uxxxx' */ + if (ch >= 256) { + *p++ = '\\'; + *p++ = 'u'; + *p++ = hexdigits[(ch >> 12) & 0x000F]; + *p++ = hexdigits[(ch >> 8) & 0x000F]; + *p++ = hexdigits[(ch >> 4) & 0x000F]; + *p++ = hexdigits[ch & 0x000F]; + } + + /* Map special whitespace to '\t', \n', '\r' */ + else if (ch == '\t') { *p++ = '\\'; *p++ = 't'; } @@ -7571,79 +7594,16 @@ PyObject *unicode_repr(PyObject *unicode) } /* Map non-printable US ASCII to '\xhh' */ - else if (ch < ' ' || ch == 0x7F) { + else if (ch < ' ' || ch >= 0x7F) { *p++ = '\\'; *p++ = 'x'; *p++ = hexdigits[(ch >> 4) & 0x000F]; *p++ = hexdigits[ch & 0x000F]; } - /* Copy ASCII characters as-is */ - else if (ch < 0x7F) { - *p++ = ch; - } - - /* Non-ASCII characters */ - else { - Py_UCS4 ucs = ch; - -#ifndef Py_UNICODE_WIDE - Py_UNICODE ch2 = 0; - /* Get code point from surrogate pair */ - if (size > 0) { - ch2 = *s; - if (ch >= 0xD800 && ch < 0xDC00 && ch2 >= 0xDC00 - && ch2 <= 0xDFFF) { - ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) - + 0x00010000; - s++; - size--; - } - } -#endif - /* Map Unicode whitespace and control characters - (categories Z* and C* except ASCII space) - */ - if (!Py_UNICODE_ISPRINTABLE(ucs)) { - /* Map 8-bit characters to '\xhh' */ - if (ucs <= 0xff) { - *p++ = '\\'; - *p++ = 'x'; - *p++ = hexdigits[(ch >> 4) & 0x000F]; - *p++ = hexdigits[ch & 0x000F]; - } - /* Map 21-bit characters to '\U00xxxxxx' */ - else if (ucs >= 0x10000) { - *p++ = '\\'; - *p++ = 'U'; - *p++ = hexdigits[(ucs >> 28) & 0x0000000F]; - *p++ = hexdigits[(ucs >> 24) & 0x0000000F]; - *p++ = hexdigits[(ucs >> 20) & 0x0000000F]; - *p++ = hexdigits[(ucs >> 16) & 0x0000000F]; - *p++ = hexdigits[(ucs >> 12) & 0x0000000F]; - *p++ = hexdigits[(ucs >> 8) & 0x0000000F]; - *p++ = hexdigits[(ucs >> 4) & 0x0000000F]; - *p++ = hexdigits[ucs & 0x0000000F]; - } - /* Map 16-bit characters to '\uxxxx' */ - else { - *p++ = '\\'; - *p++ = 'u'; - *p++ = hexdigits[(ucs >> 12) & 0x000F]; - *p++ = hexdigits[(ucs >> 8) & 0x000F]; - *p++ = hexdigits[(ucs >> 4) & 0x000F]; - *p++ = hexdigits[ucs & 0x000F]; - } - } - /* Copy characters as-is */ - else { - *p++ = ch; -#ifndef Py_UNICODE_WIDE - if (ucs >= 0x10000) - *p++ = ch2; -#endif - } - } + /* Copy everything else as-is */ + else + *p++ = (char) ch; } /* Add quote */ *p++ = PyUnicode_AS_UNICODE(repr)[0]; @@ -8308,7 +8268,6 @@ static PyMethodDef unicode_methods[] = { {"isalpha", (PyCFunction) unicode_isalpha, METH_NOARGS, isalpha__doc__}, {"isalnum", (PyCFunction) unicode_isalnum, METH_NOARGS, isalnum__doc__}, {"isidentifier", (PyCFunction) unicode_isidentifier, METH_NOARGS, isidentifier__doc__}, - {"isprintable", (PyCFunction) unicode_isprintable, METH_NOARGS, isprintable__doc__}, {"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__}, {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__}, {"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__}, @@ -8894,7 +8853,6 @@ PyObject *PyUnicode_Format(PyObject *format, case 's': case 'r': - case 'a': if (PyUnicode_Check(v) && c == 's') { temp = v; Py_INCREF(temp); @@ -8914,22 +8872,6 @@ PyObject *PyUnicode_Format(PyObject *format, "%s argument has non-string str()"); goto onError; } - if (c == 'a') { - PyObject *ascii = PyUnicode_EncodeASCII( - PyUnicode_AS_UNICODE(temp), - PyUnicode_GET_SIZE(temp), - "backslashreplace"); - - Py_DECREF(temp); - if (ascii == NULL) - goto onError; - - temp = PyUnicode_FromEncodedObject(ascii, - "ASCII", NULL); - Py_DECREF(ascii); - if (temp == NULL) - goto onError; - } } pbuf = PyUnicode_AS_UNICODE(temp); len = PyUnicode_GET_SIZE(temp); |