Commit strict str/bytes distinction.

From now on, trying to write str to a binary stream is an error (I'm still working on the reverse). There are still (at least) two failing tests: - test_asynchat - test_urllib2_localnet but I'm sure these will be fixed by someone.
author: Guido van Rossum <guido@python.org> 2007-08-29 04:05:57 (GMT)
committer: Guido van Rossum <guido@python.org> 2007-08-29 04:05:57 (GMT)
commit: a74184eb1d1ed8c1c139ea692b6037a7563d5540 (patch)
tree: ffa099c584ab7857a1ac61d6cd8b8f872c49ffb0
parent: 245b42ec4b07682dd44bb92dbde328c7ce78d90b (diff)
download: cpython-a74184eb1d1ed8c1c139ea692b6037a7563d5540.zip
cpython-a74184eb1d1ed8c1c139ea692b6037a7563d5540.tar.gz
cpython-a74184eb1d1ed8c1c139ea692b6037a7563d5540.tar.bz2
3 files changed, 59 insertions, 119 deletions
diff --git a/Lib/io.py b/Lib/io.py
index 3ebf5ae..7aa79ce 100644
--- a/Lib/io.py
+++ b/Lib/io.py
@@ -659,12 +659,14 @@ class BytesIO(BufferedIOBase):
     def write(self, b):
         if self.closed:
             raise ValueError("write to closed file")
+        if isinstance(b, str):
+            raise TypeError("can't write str to binary stream")
         n = len(b)
         newpos = self._pos + n
         if newpos > len(self._buffer):
             # Inserts null bytes between the current end of the file
             # and the new write position.
-            padding = '\x00' * (newpos - len(self._buffer) - n)
+            padding = b'\x00' * (newpos - len(self._buffer) - n)
             self._buffer[self._pos:newpos - n] = padding
         self._buffer[self._pos:newpos] = b
         self._pos = newpos
@@ -801,11 +803,8 @@ class BufferedWriter(_BufferedIOMixin):
     def write(self, b):
         if self.closed:
             raise ValueError("write to closed file")
-        if not isinstance(b, bytes):
-            if hasattr(b, "__index__"):
-                raise TypeError("Can't write object of type %s" %
-                                type(b).__name__)
-            b = bytes(b)
+        if isinstance(b, str):
+            raise TypeError("can't write str to binary stream")
         # XXX we can implement some more tricks to try and avoid partial writes
         if len(self._write_buf) > self.buffer_size:
             # We're full, so let's pre-flush the buffer
@@ -1099,8 +1098,6 @@ class TextIOWrapper(TextIOBase):
             s = s.replace("\n", self._writenl)
         # XXX What if we were just reading?
         b = s.encode(self._encoding)
-        if isinstance(b, str):
-            b = bytes(b)
         self.buffer.write(b)
         if haslf and self.isatty():
             self.flush()
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 182cbfc..b267cac 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -82,7 +82,13 @@ _getbuffer(PyObject *obj, PyBuffer *view)
 
     if (buffer == NULL ||
         PyUnicode_Check(obj) ||
-        buffer->bf_getbuffer == NULL) return -1;
+        buffer->bf_getbuffer == NULL)
+    {
+        PyErr_Format(PyExc_TypeError,
+                     "Type %.100s doesn't support the buffer API",
+                     Py_Type(obj)->tp_name);
+        return -1;
+    }
 
     if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
             return -1;
@@ -167,7 +173,7 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size)
     else if (size < alloc) {
         /* Within allocated size; quick exit */
         Py_Size(self) = size;
-	((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
+        ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
         return 0;
     }
     else if (size <= alloc * 1.125) {
@@ -181,10 +187,11 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size)
 
     if (((PyBytesObject *)self)->ob_exports > 0) {
             /*
-            fprintf(stderr, "%d: %s", ((PyBytesObject *)self)->ob_exports, ((PyBytesObject *)self)->ob_bytes);
+            fprintf(stderr, "%d: %s", ((PyBytesObject *)self)->ob_exports,
+                    ((PyBytesObject *)self)->ob_bytes);
             */
             PyErr_SetString(PyExc_BufferError,
-                            "Existing exports of data: object cannot be re-sized");
+                    "Existing exports of data: object cannot be re-sized");
             return -1;
     }
 
@@ -262,24 +269,24 @@ bytes_iconcat(PyBytesObject *self, PyObject *other)
     PyBuffer vo;
 
     if (_getbuffer(other, &vo) < 0) {
-            PyErr_Format(PyExc_TypeError,
-                         "can't concat bytes to %.100s", Py_Type(self)->tp_name);
-            return NULL;
+        PyErr_Format(PyExc_TypeError, "can't concat bytes to %.100s",
+                     Py_Type(self)->tp_name);
+        return NULL;
     }
 
     mysize = Py_Size(self);
     size = mysize + vo.len;
     if (size < 0) {
-            PyObject_ReleaseBuffer(other, &vo);
-            return PyErr_NoMemory();
+        PyObject_ReleaseBuffer(other, &vo);
+        return PyErr_NoMemory();
     }
     if (size < self->ob_alloc) {
-            Py_Size(self) = size;
-            self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
+        Py_Size(self) = size;
+        self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
     }
     else if (PyBytes_Resize((PyObject *)self, size) < 0) {
-            PyObject_ReleaseBuffer(other, &vo);
-            return NULL;
+        PyObject_ReleaseBuffer(other, &vo);
+        return NULL;
     }
     memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
     PyObject_ReleaseBuffer(other, &vo);
@@ -327,7 +334,7 @@ bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
         return PyErr_NoMemory();
     if (size < self->ob_alloc) {
         Py_Size(self) = size;
-	self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
+        self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
     }
     else if (PyBytes_Resize((PyObject *)self, size) < 0)
         return NULL;
@@ -507,7 +514,7 @@ bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
             memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
                     Py_Size(self) - hi);
         }
-	/* XXX(nnorwitz): need to verify this can't overflow! */
+        /* XXX(nnorwitz): need to verify this can't overflow! */
         if (PyBytes_Resize((PyObject *)self,
                            Py_Size(self) + needed - avail) < 0) {
                 res = -1;
@@ -757,8 +764,11 @@ bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
     if (PyUnicode_Check(arg)) {
         /* Encode via the codec registry */
         PyObject *encoded, *new;
-        if (encoding == NULL)
-            encoding = PyUnicode_GetDefaultEncoding();
+        if (encoding == NULL) {
+            PyErr_SetString(PyExc_TypeError,
+                            "string argument without an encoding");
+            return -1;
+        }
         encoded = PyCodec_Encode(arg, encoding, errors);
         if (encoded == NULL)
             return -1;
@@ -769,12 +779,12 @@ bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
             Py_DECREF(encoded);
             return -1;
         }
-	new = bytes_iconcat(self, encoded);
-	Py_DECREF(encoded);
-	if (new == NULL)
-	    return -1;
-	Py_DECREF(new);
-	return 0;
+        new = bytes_iconcat(self, encoded);
+        Py_DECREF(encoded);
+        if (new == NULL)
+            return -1;
+        Py_DECREF(new);
+        return 0;
     }
 
     /* If it's not unicode, there can't be encoding or errors */
@@ -954,12 +964,14 @@ bytes_richcompare(PyObject *self, PyObject *other, int op)
 
     self_size = _getbuffer(self, &self_bytes);
     if (self_size < 0) {
+        PyErr_Clear();
         Py_INCREF(Py_NotImplemented);
         return Py_NotImplemented;
     }
 
     other_size = _getbuffer(other, &other_bytes);
     if (other_size < 0) {
+        PyErr_Clear();
         PyObject_ReleaseBuffer(self, &self_bytes);
         Py_INCREF(Py_NotImplemented);
         return Py_NotImplemented;
@@ -1061,10 +1073,11 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
         sub_len = PyBytes_GET_SIZE(subobj);
     }
     /* XXX --> use the modern buffer interface */
-    else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
+    else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len)) {
         /* XXX - the "expected a character buffer object" is pretty
            confusing for a non-expert.  remap to something else ? */
         return -2;
+    }
 
     if (dir > 0)
         return stringlib_find_slice(
@@ -2021,49 +2034,24 @@ bytes_replace(PyBytesObject *self, PyObject *args)
 {
     Py_ssize_t count = -1;
     PyObject *from, *to, *res;
-    const char *from_s, *to_s;
-    Py_ssize_t from_len, to_len;
-    int relfrom=0, relto=0;
     PyBuffer vfrom, vto;
 
     if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
         return NULL;
 
-    if (PyBytes_Check(from)) {
-        from_s = PyBytes_AS_STRING(from);
-        from_len = PyBytes_GET_SIZE(from);
-    }
-    else {
-            if (PyObject_GetBuffer(from, &vfrom, PyBUF_CHARACTER) < 0)
-                    return NULL;
-            from_s = vfrom.buf;
-            from_len = vfrom.len;
-            relfrom = 1;
-    }
-
-    if (PyBytes_Check(to)) {
-        to_s = PyBytes_AS_STRING(to);
-        to_len = PyBytes_GET_SIZE(to);
-    }
-    else {
-            if (PyObject_GetBuffer(to, &vto, PyBUF_CHARACTER) < 0) {
-                    if (relfrom)
-                            PyObject_ReleaseBuffer(from, &vfrom);
-                    return NULL;
-            }
-            to_s = vto.buf;
-            to_len = vto.len;
-            relto = 1;
+    if (_getbuffer(from, &vfrom) < 0)
+        return NULL;
+    if (_getbuffer(to, &vto) < 0) {
+        PyObject_ReleaseBuffer(from, &vfrom);
+        return NULL;
     }
 
     res = (PyObject *)replace((PyBytesObject *) self,
-                              from_s, from_len,
-                              to_s, to_len, count);
+                              vfrom.buf, vfrom.len,
+                              vto.buf, vto.len, count);
 
-    if (relfrom)
-            PyObject_ReleaseBuffer(from, &vfrom);
-    if (relto)
-            PyObject_ReleaseBuffer(to, &vto);
+    PyObject_ReleaseBuffer(from, &vfrom);
+    PyObject_ReleaseBuffer(to, &vto);
     return res;
 }
 
@@ -2799,10 +2787,10 @@ bytes_reduce(PyBytesObject *self)
 {
     PyObject *latin1;
     if (self->ob_bytes)
-	latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
-					Py_Size(self), NULL);
+        latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
+                                        Py_Size(self), NULL);
     else
-	latin1 = PyUnicode_FromString("");
+        latin1 = PyUnicode_FromString("");
     return Py_BuildValue("(O(Ns))", Py_Type(self), latin1, "latin-1");
 }
 
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 1e8f63f..4e8b2ed 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -965,31 +965,11 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
 	return NULL;
     }
 
-#if 0
-    /* For b/w compatibility we also accept Unicode objects provided
-       that no encodings is given and then redirect to
-       PyObject_Unicode() which then applies the additional logic for
-       Unicode subclasses.
-
-       NOTE: This API should really only be used for object which
-             represent *encoded* Unicode !
-
-    */
-	if (PyUnicode_Check(obj)) {
-	    if (encoding) {
-		PyErr_SetString(PyExc_TypeError,
-				"decoding Unicode is not supported");
-	    return NULL;
-	    }
-	return PyObject_Unicode(obj);
-	    }
-#else
     if (PyUnicode_Check(obj)) {
 	PyErr_SetString(PyExc_TypeError,
 			"decoding Unicode is not supported");
 	return NULL;
 	}
-#endif
 
     /* Coerce object */
     if (PyString_Check(obj)) {
@@ -6440,26 +6420,7 @@ able to handle UnicodeDecodeErrors.");
 static PyObject *
 unicode_decode(PyUnicodeObject *self, PyObject *args)
 {
-    char *encoding = NULL;
-    char *errors = NULL;
-    PyObject *v;
-    
-    if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
-        return NULL;
-    v = PyUnicode_AsDecodedObject((PyObject *)self, encoding, errors);
-    if (v == NULL)
-        goto onError;
-    if (!PyString_Check(v) && !PyUnicode_Check(v)) {
-        PyErr_Format(PyExc_TypeError,
-                     "decoder did not return a string/unicode object "
-                     "(type=%.400s)",
-                     Py_Type(v)->tp_name);
-        Py_DECREF(v);
-        return NULL;
-    }
-    return v;
-
- onError:
+    PyErr_Format(PyExc_TypeError, "decoding str is not supported");
     return NULL;
 }
 
@@ -8136,17 +8097,11 @@ unicode_buffer_getbuffer(PyUnicodeObject *self, PyBuffer *view, int flags)
 {
 
     if (flags & PyBUF_CHARACTER) {
-        PyObject *str;
-        
-        str = _PyUnicode_AsDefaultEncodedString((PyObject *)self, NULL);
-        if (str == NULL) return -1;
-        return PyBuffer_FillInfo(view, (void *)PyString_AS_STRING(str),
-                                 PyString_GET_SIZE(str), 1, flags);
-    }
-    else {
-        return PyBuffer_FillInfo(view, (void *)self->str, 
-                                 PyUnicode_GET_DATA_SIZE(self), 1, flags);
+        PyErr_SetString(PyExc_SystemError, "can't use str as char buffer");
+        return -1;
     }
+    return PyBuffer_FillInfo(view, (void *)self->str,
+                             PyUnicode_GET_DATA_SIZE(self), 1, flags);
 }
author	Guido van Rossum <guido@python.org>	2007-08-29 04:05:57 (GMT)
committer	Guido van Rossum <guido@python.org>	2007-08-29 04:05:57 (GMT)
commit	a74184eb1d1ed8c1c139ea692b6037a7563d5540 (patch)
tree	ffa099c584ab7857a1ac61d6cd8b8f872c49ffb0
parent	245b42ec4b07682dd44bb92dbde328c7ce78d90b (diff)
download	cpython-a74184eb1d1ed8c1c139ea692b6037a7563d5540.zip cpython-a74184eb1d1ed8c1c139ea692b6037a7563d5540.tar.gz cpython-a74184eb1d1ed8c1c139ea692b6037a7563d5540.tar.bz2