Revert r63934 -- it was mixing two patches.

author: Georg Brandl <georg@python.org> 2008-06-04 13:01:30 (GMT)
committer: Georg Brandl <georg@python.org> 2008-06-04 13:01:30 (GMT)
commit: a26f8ca668d09eff8ab03ef3d1cddb9bfbc6d124 (patch)
tree: 4b3704507702b98e0ce4107403a72093a5d6b9fc /Objects/unicodeobject.c
parent: f954c4b9fb8529cc13a2e24c58137c66ac836b28 (diff)
download: cpython-a26f8ca668d09eff8ab03ef3d1cddb9bfbc6d124.zip
cpython-a26f8ca668d09eff8ab03ef3d1cddb9bfbc6d124.tar.gz
cpython-a26f8ca668d09eff8ab03ef3d1cddb9bfbc6d124.tar.bz2
1 files changed, 59 insertions, 117 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 7f488b8..78e38b5 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -7149,36 +7149,6 @@ unicode_isidentifier(PyObject *self)
     return PyBool_FromLong(PyUnicode_IsIdentifier(self));
 }
 
-PyDoc_STRVAR(isprintable__doc__,
-"S.isprintable() -> bool\n\
-\n\
-Return True if all characters in S are considered\n\
-printable in repr() and there is at least one character\n\
-in S, False otherwise.");
-
-static PyObject*
-unicode_isprintable(PyObject *self)
-{
-    register const Py_UNICODE *p = PyUnicode_AS_UNICODE(self);
-    register const Py_UNICODE *e;
-
-    /* Shortcut for single character strings */
-    if (PyUnicode_GET_SIZE(self) == 1 &&
-	Py_UNICODE_ISPRINTABLE(*p))
-	return PyBool_FromLong(1);
-
-    /* Special case for empty strings */
-    if (PyUnicode_GET_SIZE(self) == 0)
-	return PyBool_FromLong(0);
-
-    e = p + PyUnicode_GET_SIZE(self);
-    for (; p < e; p++) {
-	if (!Py_UNICODE_ISPRINTABLE(*p))
-	    return PyBool_FromLong(0);
-    }
-    return PyBool_FromLong(1);
-}
-
 PyDoc_STRVAR(join__doc__,
 "S.join(sequence) -> str\n\
 \n\
@@ -7556,8 +7526,61 @@ PyObject *unicode_repr(PyObject *unicode)
             continue;
         }
 
-	/* Map special whitespace to '\t', \n', '\r' */
-        if (ch == '\t') {
+#ifdef Py_UNICODE_WIDE
+        /* Map 21-bit characters to '\U00xxxxxx' */
+        else if (ch >= 0x10000) {
+            *p++ = '\\';
+            *p++ = 'U';
+            *p++ = hexdigits[(ch >> 28) & 0x0000000F];
+            *p++ = hexdigits[(ch >> 24) & 0x0000000F];
+            *p++ = hexdigits[(ch >> 20) & 0x0000000F];
+            *p++ = hexdigits[(ch >> 16) & 0x0000000F];
+            *p++ = hexdigits[(ch >> 12) & 0x0000000F];
+            *p++ = hexdigits[(ch >> 8) & 0x0000000F];
+            *p++ = hexdigits[(ch >> 4) & 0x0000000F];
+            *p++ = hexdigits[ch & 0x0000000F];
+	    continue;
+        }
+#else
+	/* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
+	else if (ch >= 0xD800 && ch < 0xDC00) {
+	    Py_UNICODE ch2;
+	    Py_UCS4 ucs;
+
+	    ch2 = *s++;
+	    size--;
+	    if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
+		ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
+		*p++ = '\\';
+		*p++ = 'U';
+		*p++ = hexdigits[(ucs >> 28) & 0x0000000F];
+		*p++ = hexdigits[(ucs >> 24) & 0x0000000F];
+		*p++ = hexdigits[(ucs >> 20) & 0x0000000F];
+		*p++ = hexdigits[(ucs >> 16) & 0x0000000F];
+		*p++ = hexdigits[(ucs >> 12) & 0x0000000F];
+		*p++ = hexdigits[(ucs >> 8) & 0x0000000F];
+		*p++ = hexdigits[(ucs >> 4) & 0x0000000F];
+		*p++ = hexdigits[ucs & 0x0000000F];
+		continue;
+	    }
+	    /* Fall through: isolated surrogates are copied as-is */
+	    s--;
+	    size++;
+	}
+#endif
+
+        /* Map 16-bit characters to '\uxxxx' */
+        if (ch >= 256) {
+            *p++ = '\\';
+            *p++ = 'u';
+            *p++ = hexdigits[(ch >> 12) & 0x000F];
+            *p++ = hexdigits[(ch >> 8) & 0x000F];
+            *p++ = hexdigits[(ch >> 4) & 0x000F];
+            *p++ = hexdigits[ch & 0x000F];
+        }
+
+        /* Map special whitespace to '\t', \n', '\r' */
+        else if (ch == '\t') {
             *p++ = '\\';
             *p++ = 't';
         }
@@ -7571,79 +7594,16 @@ PyObject *unicode_repr(PyObject *unicode)
         }
 
         /* Map non-printable US ASCII to '\xhh' */
-        else if (ch < ' ' || ch == 0x7F) {
+        else if (ch < ' ' || ch >= 0x7F) {
             *p++ = '\\';
             *p++ = 'x';
             *p++ = hexdigits[(ch >> 4) & 0x000F];
             *p++ = hexdigits[ch & 0x000F];
         }
 
-        /* Copy ASCII characters as-is */
-        else if (ch < 0x7F) {
-            *p++ = ch;
-        }
-
-	/* Non-ASCII characters */
-        else {
-            Py_UCS4 ucs = ch;
-
-#ifndef Py_UNICODE_WIDE
-            Py_UNICODE ch2 = 0;
-            /* Get code point from surrogate pair */
-            if (size > 0) {
-                ch2 = *s;
-                if (ch >= 0xD800 && ch < 0xDC00 && ch2 >= 0xDC00
-                            && ch2 <= 0xDFFF) {
-                    ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) 
-                            + 0x00010000;
-                    s++; 
-                    size--;
-                }
-            }
-#endif
-            /* Map Unicode whitespace and control characters 
-               (categories Z* and C* except ASCII space)
-            */
-            if (!Py_UNICODE_ISPRINTABLE(ucs)) {
-                /* Map 8-bit characters to '\xhh' */
-                if (ucs <= 0xff) {
-                    *p++ = '\\';
-                    *p++ = 'x';
-                    *p++ = hexdigits[(ch >> 4) & 0x000F];
-                    *p++ = hexdigits[ch & 0x000F];
-                }
-                /* Map 21-bit characters to '\U00xxxxxx' */
-                else if (ucs >= 0x10000) {
-                    *p++ = '\\';
-                    *p++ = 'U';
-                    *p++ = hexdigits[(ucs >> 28) & 0x0000000F];
-                    *p++ = hexdigits[(ucs >> 24) & 0x0000000F];
-                    *p++ = hexdigits[(ucs >> 20) & 0x0000000F];
-                    *p++ = hexdigits[(ucs >> 16) & 0x0000000F];
-                    *p++ = hexdigits[(ucs >> 12) & 0x0000000F];
-                    *p++ = hexdigits[(ucs >> 8) & 0x0000000F];
-                    *p++ = hexdigits[(ucs >> 4) & 0x0000000F];
-                    *p++ = hexdigits[ucs & 0x0000000F];
-                }
-                /* Map 16-bit characters to '\uxxxx' */
-                else {
-                    *p++ = '\\';
-                    *p++ = 'u';
-                    *p++ = hexdigits[(ucs >> 12) & 0x000F];
-                    *p++ = hexdigits[(ucs >> 8) & 0x000F];
-                    *p++ = hexdigits[(ucs >> 4) & 0x000F];
-                    *p++ = hexdigits[ucs & 0x000F];
-                }
-            }
-            /* Copy characters as-is */
-            else {
-                *p++ = ch;
-#ifndef Py_UNICODE_WIDE
-                if (ucs >= 0x10000)
-                    *p++ = ch2;
-#endif
-            }
-        }
+        /* Copy everything else as-is */
+        else
+            *p++ = (char) ch;
     }
     /* Add quote */
     *p++ = PyUnicode_AS_UNICODE(repr)[0];
@@ -8308,7 +8268,6 @@ static PyMethodDef unicode_methods[] = {
     {"isalpha", (PyCFunction) unicode_isalpha, METH_NOARGS, isalpha__doc__},
     {"isalnum", (PyCFunction) unicode_isalnum, METH_NOARGS, isalnum__doc__},
     {"isidentifier", (PyCFunction) unicode_isidentifier, METH_NOARGS, isidentifier__doc__},
-    {"isprintable", (PyCFunction) unicode_isprintable, METH_NOARGS, isprintable__doc__},
     {"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__},
     {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
     {"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__},
@@ -8894,7 +8853,6 @@ PyObject *PyUnicode_Format(PyObject *format,
 
 	    case 's':
 	    case 'r':
-	    case 'a':
 		if (PyUnicode_Check(v) && c == 's') {
 		    temp = v;
 		    Py_INCREF(temp);
@@ -8914,22 +8872,6 @@ PyObject *PyUnicode_Format(PyObject *format,
 					"%s argument has non-string str()");
 			goto onError;
 		    }
-		    if (c == 'a') {
-		        PyObject *ascii = PyUnicode_EncodeASCII(
-				PyUnicode_AS_UNICODE(temp),
-				PyUnicode_GET_SIZE(temp),
-				"backslashreplace");
-
-			Py_DECREF(temp);
-			if (ascii == NULL) 
-			    goto onError;
-
-			temp = PyUnicode_FromEncodedObject(ascii, 
-				    "ASCII", NULL);
-			Py_DECREF(ascii);
-			if (temp == NULL)
-			    goto onError;
-		    }
 		}
 		pbuf = PyUnicode_AS_UNICODE(temp);
 		len = PyUnicode_GET_SIZE(temp);
author	Georg Brandl <georg@python.org>	2008-06-04 13:01:30 (GMT)
committer	Georg Brandl <georg@python.org>	2008-06-04 13:01:30 (GMT)
commit	a26f8ca668d09eff8ab03ef3d1cddb9bfbc6d124 (patch)
tree	4b3704507702b98e0ce4107403a72093a5d6b9fc /Objects/unicodeobject.c
parent	f954c4b9fb8529cc13a2e24c58137c66ac836b28 (diff)
download	cpython-a26f8ca668d09eff8ab03ef3d1cddb9bfbc6d124.zip cpython-a26f8ca668d09eff8ab03ef3d1cddb9bfbc6d124.tar.gz cpython-a26f8ca668d09eff8ab03ef3d1cddb9bfbc6d124.tar.bz2