diff options
author | Guido van Rossum <guido@python.org> | 2007-08-29 04:05:57 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2007-08-29 04:05:57 (GMT) |
commit | a74184eb1d1ed8c1c139ea692b6037a7563d5540 (patch) | |
tree | ffa099c584ab7857a1ac61d6cd8b8f872c49ffb0 | |
parent | 245b42ec4b07682dd44bb92dbde328c7ce78d90b (diff) | |
download | cpython-a74184eb1d1ed8c1c139ea692b6037a7563d5540.zip cpython-a74184eb1d1ed8c1c139ea692b6037a7563d5540.tar.gz cpython-a74184eb1d1ed8c1c139ea692b6037a7563d5540.tar.bz2 |
Commit strict str/bytes distinction.
From now on, trying to write str to a binary stream
is an error (I'm still working on the reverse).
There are still (at least) two failing tests:
- test_asynchat
- test_urllib2_localnet
but I'm sure these will be fixed by someone.
-rw-r--r-- | Lib/io.py | 13 | ||||
-rw-r--r-- | Objects/bytesobject.c | 110 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 55 |
3 files changed, 59 insertions, 119 deletions
@@ -659,12 +659,14 @@ class BytesIO(BufferedIOBase): def write(self, b): if self.closed: raise ValueError("write to closed file") + if isinstance(b, str): + raise TypeError("can't write str to binary stream") n = len(b) newpos = self._pos + n if newpos > len(self._buffer): # Inserts null bytes between the current end of the file # and the new write position. - padding = '\x00' * (newpos - len(self._buffer) - n) + padding = b'\x00' * (newpos - len(self._buffer) - n) self._buffer[self._pos:newpos - n] = padding self._buffer[self._pos:newpos] = b self._pos = newpos @@ -801,11 +803,8 @@ class BufferedWriter(_BufferedIOMixin): def write(self, b): if self.closed: raise ValueError("write to closed file") - if not isinstance(b, bytes): - if hasattr(b, "__index__"): - raise TypeError("Can't write object of type %s" % - type(b).__name__) - b = bytes(b) + if isinstance(b, str): + raise TypeError("can't write str to binary stream") # XXX we can implement some more tricks to try and avoid partial writes if len(self._write_buf) > self.buffer_size: # We're full, so let's pre-flush the buffer @@ -1099,8 +1098,6 @@ class TextIOWrapper(TextIOBase): s = s.replace("\n", self._writenl) # XXX What if we were just reading? b = s.encode(self._encoding) - if isinstance(b, str): - b = bytes(b) self.buffer.write(b) if haslf and self.isatty(): self.flush() diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 182cbfc..b267cac 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -82,7 +82,13 @@ _getbuffer(PyObject *obj, PyBuffer *view) if (buffer == NULL || PyUnicode_Check(obj) || - buffer->bf_getbuffer == NULL) return -1; + buffer->bf_getbuffer == NULL) + { + PyErr_Format(PyExc_TypeError, + "Type %.100s doesn't support the buffer API", + Py_Type(obj)->tp_name); + return -1; + } if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0) return -1; @@ -167,7 +173,7 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size) else if (size < alloc) { /* Within allocated size; quick exit */ Py_Size(self) = size; - ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */ + ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */ return 0; } else if (size <= alloc * 1.125) { @@ -181,10 +187,11 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size) if (((PyBytesObject *)self)->ob_exports > 0) { /* - fprintf(stderr, "%d: %s", ((PyBytesObject *)self)->ob_exports, ((PyBytesObject *)self)->ob_bytes); + fprintf(stderr, "%d: %s", ((PyBytesObject *)self)->ob_exports, + ((PyBytesObject *)self)->ob_bytes); */ PyErr_SetString(PyExc_BufferError, - "Existing exports of data: object cannot be re-sized"); + "Existing exports of data: object cannot be re-sized"); return -1; } @@ -262,24 +269,24 @@ bytes_iconcat(PyBytesObject *self, PyObject *other) PyBuffer vo; if (_getbuffer(other, &vo) < 0) { - PyErr_Format(PyExc_TypeError, - "can't concat bytes to %.100s", Py_Type(self)->tp_name); - return NULL; + PyErr_Format(PyExc_TypeError, "can't concat bytes to %.100s", + Py_Type(self)->tp_name); + return NULL; } mysize = Py_Size(self); size = mysize + vo.len; if (size < 0) { - PyObject_ReleaseBuffer(other, &vo); - return PyErr_NoMemory(); + PyObject_ReleaseBuffer(other, &vo); + return PyErr_NoMemory(); } if (size < self->ob_alloc) { - Py_Size(self) = size; - self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */ + Py_Size(self) = size; + self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */ } else if (PyBytes_Resize((PyObject *)self, size) < 0) { - PyObject_ReleaseBuffer(other, &vo); - return NULL; + PyObject_ReleaseBuffer(other, &vo); + return NULL; } memcpy(self->ob_bytes + mysize, vo.buf, vo.len); PyObject_ReleaseBuffer(other, &vo); @@ -327,7 +334,7 @@ bytes_irepeat(PyBytesObject *self, Py_ssize_t count) return PyErr_NoMemory(); if (size < self->ob_alloc) { Py_Size(self) = size; - self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */ + self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */ } else if (PyBytes_Resize((PyObject *)self, size) < 0) return NULL; @@ -507,7 +514,7 @@ bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi, memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi, Py_Size(self) - hi); } - /* XXX(nnorwitz): need to verify this can't overflow! */ + /* XXX(nnorwitz): need to verify this can't overflow! */ if (PyBytes_Resize((PyObject *)self, Py_Size(self) + needed - avail) < 0) { res = -1; @@ -757,8 +764,11 @@ bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds) if (PyUnicode_Check(arg)) { /* Encode via the codec registry */ PyObject *encoded, *new; - if (encoding == NULL) - encoding = PyUnicode_GetDefaultEncoding(); + if (encoding == NULL) { + PyErr_SetString(PyExc_TypeError, + "string argument without an encoding"); + return -1; + } encoded = PyCodec_Encode(arg, encoding, errors); if (encoded == NULL) return -1; @@ -769,12 +779,12 @@ bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds) Py_DECREF(encoded); return -1; } - new = bytes_iconcat(self, encoded); - Py_DECREF(encoded); - if (new == NULL) - return -1; - Py_DECREF(new); - return 0; + new = bytes_iconcat(self, encoded); + Py_DECREF(encoded); + if (new == NULL) + return -1; + Py_DECREF(new); + return 0; } /* If it's not unicode, there can't be encoding or errors */ @@ -954,12 +964,14 @@ bytes_richcompare(PyObject *self, PyObject *other, int op) self_size = _getbuffer(self, &self_bytes); if (self_size < 0) { + PyErr_Clear(); Py_INCREF(Py_NotImplemented); return Py_NotImplemented; } other_size = _getbuffer(other, &other_bytes); if (other_size < 0) { + PyErr_Clear(); PyObject_ReleaseBuffer(self, &self_bytes); Py_INCREF(Py_NotImplemented); return Py_NotImplemented; @@ -1061,10 +1073,11 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir) sub_len = PyBytes_GET_SIZE(subobj); } /* XXX --> use the modern buffer interface */ - else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len)) + else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len)) { /* XXX - the "expected a character buffer object" is pretty confusing for a non-expert. remap to something else ? */ return -2; + } if (dir > 0) return stringlib_find_slice( @@ -2021,49 +2034,24 @@ bytes_replace(PyBytesObject *self, PyObject *args) { Py_ssize_t count = -1; PyObject *from, *to, *res; - const char *from_s, *to_s; - Py_ssize_t from_len, to_len; - int relfrom=0, relto=0; PyBuffer vfrom, vto; if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count)) return NULL; - if (PyBytes_Check(from)) { - from_s = PyBytes_AS_STRING(from); - from_len = PyBytes_GET_SIZE(from); - } - else { - if (PyObject_GetBuffer(from, &vfrom, PyBUF_CHARACTER) < 0) - return NULL; - from_s = vfrom.buf; - from_len = vfrom.len; - relfrom = 1; - } - - if (PyBytes_Check(to)) { - to_s = PyBytes_AS_STRING(to); - to_len = PyBytes_GET_SIZE(to); - } - else { - if (PyObject_GetBuffer(to, &vto, PyBUF_CHARACTER) < 0) { - if (relfrom) - PyObject_ReleaseBuffer(from, &vfrom); - return NULL; - } - to_s = vto.buf; - to_len = vto.len; - relto = 1; + if (_getbuffer(from, &vfrom) < 0) + return NULL; + if (_getbuffer(to, &vto) < 0) { + PyObject_ReleaseBuffer(from, &vfrom); + return NULL; } res = (PyObject *)replace((PyBytesObject *) self, - from_s, from_len, - to_s, to_len, count); + vfrom.buf, vfrom.len, + vto.buf, vto.len, count); - if (relfrom) - PyObject_ReleaseBuffer(from, &vfrom); - if (relto) - PyObject_ReleaseBuffer(to, &vto); + PyObject_ReleaseBuffer(from, &vfrom); + PyObject_ReleaseBuffer(to, &vto); return res; } @@ -2799,10 +2787,10 @@ bytes_reduce(PyBytesObject *self) { PyObject *latin1; if (self->ob_bytes) - latin1 = PyUnicode_DecodeLatin1(self->ob_bytes, - Py_Size(self), NULL); + latin1 = PyUnicode_DecodeLatin1(self->ob_bytes, + Py_Size(self), NULL); else - latin1 = PyUnicode_FromString(""); + latin1 = PyUnicode_FromString(""); return Py_BuildValue("(O(Ns))", Py_Type(self), latin1, "latin-1"); } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 1e8f63f..4e8b2ed 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -965,31 +965,11 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj, return NULL; } -#if 0 - /* For b/w compatibility we also accept Unicode objects provided - that no encodings is given and then redirect to - PyObject_Unicode() which then applies the additional logic for - Unicode subclasses. - - NOTE: This API should really only be used for object which - represent *encoded* Unicode ! - - */ - if (PyUnicode_Check(obj)) { - if (encoding) { - PyErr_SetString(PyExc_TypeError, - "decoding Unicode is not supported"); - return NULL; - } - return PyObject_Unicode(obj); - } -#else if (PyUnicode_Check(obj)) { PyErr_SetString(PyExc_TypeError, "decoding Unicode is not supported"); return NULL; } -#endif /* Coerce object */ if (PyString_Check(obj)) { @@ -6440,26 +6420,7 @@ able to handle UnicodeDecodeErrors."); static PyObject * unicode_decode(PyUnicodeObject *self, PyObject *args) { - char *encoding = NULL; - char *errors = NULL; - PyObject *v; - - if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors)) - return NULL; - v = PyUnicode_AsDecodedObject((PyObject *)self, encoding, errors); - if (v == NULL) - goto onError; - if (!PyString_Check(v) && !PyUnicode_Check(v)) { - PyErr_Format(PyExc_TypeError, - "decoder did not return a string/unicode object " - "(type=%.400s)", - Py_Type(v)->tp_name); - Py_DECREF(v); - return NULL; - } - return v; - - onError: + PyErr_Format(PyExc_TypeError, "decoding str is not supported"); return NULL; } @@ -8136,17 +8097,11 @@ unicode_buffer_getbuffer(PyUnicodeObject *self, PyBuffer *view, int flags) { if (flags & PyBUF_CHARACTER) { - PyObject *str; - - str = _PyUnicode_AsDefaultEncodedString((PyObject *)self, NULL); - if (str == NULL) return -1; - return PyBuffer_FillInfo(view, (void *)PyString_AS_STRING(str), - PyString_GET_SIZE(str), 1, flags); - } - else { - return PyBuffer_FillInfo(view, (void *)self->str, - PyUnicode_GET_DATA_SIZE(self), 1, flags); + PyErr_SetString(PyExc_SystemError, "can't use str as char buffer"); + return -1; } + return PyBuffer_FillInfo(view, (void *)self->str, + PyUnicode_GET_DATA_SIZE(self), 1, flags); } |