diff options
author | Georg Brandl <georg@python.org> | 2008-06-04 11:41:32 (GMT) |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2008-06-04 11:41:32 (GMT) |
commit | f954c4b9fb8529cc13a2e24c58137c66ac836b28 (patch) | |
tree | 91575068c14eec261bc4e2c44da9c881eda4efe1 /Objects/unicodeobject.c | |
parent | e5d68aceb529934e75d505bbfaf867e02493a1bc (diff) | |
download | cpython-f954c4b9fb8529cc13a2e24c58137c66ac836b28.zip cpython-f954c4b9fb8529cc13a2e24c58137c66ac836b28.tar.gz cpython-f954c4b9fb8529cc13a2e24c58137c66ac836b28.tar.bz2 |
Remove meaning of -ttt, but still accept -t option on cmdline for compatibility.
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 176 |
1 files changed, 117 insertions, 59 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 78e38b5..7f488b8 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -7149,6 +7149,36 @@ unicode_isidentifier(PyObject *self) return PyBool_FromLong(PyUnicode_IsIdentifier(self)); } +PyDoc_STRVAR(isprintable__doc__, +"S.isprintable() -> bool\n\ +\n\ +Return True if all characters in S are considered\n\ +printable in repr() and there is at least one character\n\ +in S, False otherwise."); + +static PyObject* +unicode_isprintable(PyObject *self) +{ + register const Py_UNICODE *p = PyUnicode_AS_UNICODE(self); + register const Py_UNICODE *e; + + /* Shortcut for single character strings */ + if (PyUnicode_GET_SIZE(self) == 1 && + Py_UNICODE_ISPRINTABLE(*p)) + return PyBool_FromLong(1); + + /* Special case for empty strings */ + if (PyUnicode_GET_SIZE(self) == 0) + return PyBool_FromLong(0); + + e = p + PyUnicode_GET_SIZE(self); + for (; p < e; p++) { + if (!Py_UNICODE_ISPRINTABLE(*p)) + return PyBool_FromLong(0); + } + return PyBool_FromLong(1); +} + PyDoc_STRVAR(join__doc__, "S.join(sequence) -> str\n\ \n\ @@ -7526,61 +7556,8 @@ PyObject *unicode_repr(PyObject *unicode) continue; } -#ifdef Py_UNICODE_WIDE - /* Map 21-bit characters to '\U00xxxxxx' */ - else if (ch >= 0x10000) { - *p++ = '\\'; - *p++ = 'U'; - *p++ = hexdigits[(ch >> 28) & 0x0000000F]; - *p++ = hexdigits[(ch >> 24) & 0x0000000F]; - *p++ = hexdigits[(ch >> 20) & 0x0000000F]; - *p++ = hexdigits[(ch >> 16) & 0x0000000F]; - *p++ = hexdigits[(ch >> 12) & 0x0000000F]; - *p++ = hexdigits[(ch >> 8) & 0x0000000F]; - *p++ = hexdigits[(ch >> 4) & 0x0000000F]; - *p++ = hexdigits[ch & 0x0000000F]; - continue; - } -#else - /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */ - else if (ch >= 0xD800 && ch < 0xDC00) { - Py_UNICODE ch2; - Py_UCS4 ucs; - - ch2 = *s++; - size--; - if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) { - ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000; - *p++ = '\\'; - *p++ = 'U'; - *p++ = hexdigits[(ucs >> 28) & 0x0000000F]; - *p++ = hexdigits[(ucs >> 24) & 0x0000000F]; - *p++ = hexdigits[(ucs >> 20) & 0x0000000F]; - *p++ = hexdigits[(ucs >> 16) & 0x0000000F]; - *p++ = hexdigits[(ucs >> 12) & 0x0000000F]; - *p++ = hexdigits[(ucs >> 8) & 0x0000000F]; - *p++ = hexdigits[(ucs >> 4) & 0x0000000F]; - *p++ = hexdigits[ucs & 0x0000000F]; - continue; - } - /* Fall through: isolated surrogates are copied as-is */ - s--; - size++; - } -#endif - - /* Map 16-bit characters to '\uxxxx' */ - if (ch >= 256) { - *p++ = '\\'; - *p++ = 'u'; - *p++ = hexdigits[(ch >> 12) & 0x000F]; - *p++ = hexdigits[(ch >> 8) & 0x000F]; - *p++ = hexdigits[(ch >> 4) & 0x000F]; - *p++ = hexdigits[ch & 0x000F]; - } - - /* Map special whitespace to '\t', \n', '\r' */ - else if (ch == '\t') { + /* Map special whitespace to '\t', \n', '\r' */ + if (ch == '\t') { *p++ = '\\'; *p++ = 't'; } @@ -7594,16 +7571,79 @@ PyObject *unicode_repr(PyObject *unicode) } /* Map non-printable US ASCII to '\xhh' */ - else if (ch < ' ' || ch >= 0x7F) { + else if (ch < ' ' || ch == 0x7F) { *p++ = '\\'; *p++ = 'x'; *p++ = hexdigits[(ch >> 4) & 0x000F]; *p++ = hexdigits[ch & 0x000F]; } - /* Copy everything else as-is */ - else - *p++ = (char) ch; + /* Copy ASCII characters as-is */ + else if (ch < 0x7F) { + *p++ = ch; + } + + /* Non-ASCII characters */ + else { + Py_UCS4 ucs = ch; + +#ifndef Py_UNICODE_WIDE + Py_UNICODE ch2 = 0; + /* Get code point from surrogate pair */ + if (size > 0) { + ch2 = *s; + if (ch >= 0xD800 && ch < 0xDC00 && ch2 >= 0xDC00 + && ch2 <= 0xDFFF) { + ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + + 0x00010000; + s++; + size--; + } + } +#endif + /* Map Unicode whitespace and control characters + (categories Z* and C* except ASCII space) + */ + if (!Py_UNICODE_ISPRINTABLE(ucs)) { + /* Map 8-bit characters to '\xhh' */ + if (ucs <= 0xff) { + *p++ = '\\'; + *p++ = 'x'; + *p++ = hexdigits[(ch >> 4) & 0x000F]; + *p++ = hexdigits[ch & 0x000F]; + } + /* Map 21-bit characters to '\U00xxxxxx' */ + else if (ucs >= 0x10000) { + *p++ = '\\'; + *p++ = 'U'; + *p++ = hexdigits[(ucs >> 28) & 0x0000000F]; + *p++ = hexdigits[(ucs >> 24) & 0x0000000F]; + *p++ = hexdigits[(ucs >> 20) & 0x0000000F]; + *p++ = hexdigits[(ucs >> 16) & 0x0000000F]; + *p++ = hexdigits[(ucs >> 12) & 0x0000000F]; + *p++ = hexdigits[(ucs >> 8) & 0x0000000F]; + *p++ = hexdigits[(ucs >> 4) & 0x0000000F]; + *p++ = hexdigits[ucs & 0x0000000F]; + } + /* Map 16-bit characters to '\uxxxx' */ + else { + *p++ = '\\'; + *p++ = 'u'; + *p++ = hexdigits[(ucs >> 12) & 0x000F]; + *p++ = hexdigits[(ucs >> 8) & 0x000F]; + *p++ = hexdigits[(ucs >> 4) & 0x000F]; + *p++ = hexdigits[ucs & 0x000F]; + } + } + /* Copy characters as-is */ + else { + *p++ = ch; +#ifndef Py_UNICODE_WIDE + if (ucs >= 0x10000) + *p++ = ch2; +#endif + } + } } /* Add quote */ *p++ = PyUnicode_AS_UNICODE(repr)[0]; @@ -8268,6 +8308,7 @@ static PyMethodDef unicode_methods[] = { {"isalpha", (PyCFunction) unicode_isalpha, METH_NOARGS, isalpha__doc__}, {"isalnum", (PyCFunction) unicode_isalnum, METH_NOARGS, isalnum__doc__}, {"isidentifier", (PyCFunction) unicode_isidentifier, METH_NOARGS, isidentifier__doc__}, + {"isprintable", (PyCFunction) unicode_isprintable, METH_NOARGS, isprintable__doc__}, {"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__}, {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__}, {"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__}, @@ -8853,6 +8894,7 @@ PyObject *PyUnicode_Format(PyObject *format, case 's': case 'r': + case 'a': if (PyUnicode_Check(v) && c == 's') { temp = v; Py_INCREF(temp); @@ -8872,6 +8914,22 @@ PyObject *PyUnicode_Format(PyObject *format, "%s argument has non-string str()"); goto onError; } + if (c == 'a') { + PyObject *ascii = PyUnicode_EncodeASCII( + PyUnicode_AS_UNICODE(temp), + PyUnicode_GET_SIZE(temp), + "backslashreplace"); + + Py_DECREF(temp); + if (ascii == NULL) + goto onError; + + temp = PyUnicode_FromEncodedObject(ascii, + "ASCII", NULL); + Py_DECREF(ascii); + if (temp == NULL) + goto onError; + } } pbuf = PyUnicode_AS_UNICODE(temp); len = PyUnicode_GET_SIZE(temp); |