summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2007-08-29 04:05:57 (GMT)
committerGuido van Rossum <guido@python.org>2007-08-29 04:05:57 (GMT)
commita74184eb1d1ed8c1c139ea692b6037a7563d5540 (patch)
treeffa099c584ab7857a1ac61d6cd8b8f872c49ffb0
parent245b42ec4b07682dd44bb92dbde328c7ce78d90b (diff)
downloadcpython-a74184eb1d1ed8c1c139ea692b6037a7563d5540.zip
cpython-a74184eb1d1ed8c1c139ea692b6037a7563d5540.tar.gz
cpython-a74184eb1d1ed8c1c139ea692b6037a7563d5540.tar.bz2
Commit strict str/bytes distinction.
From now on, trying to write str to a binary stream is an error (I'm still working on the reverse). There are still (at least) two failing tests: - test_asynchat - test_urllib2_localnet but I'm sure these will be fixed by someone.
-rw-r--r--Lib/io.py13
-rw-r--r--Objects/bytesobject.c110
-rw-r--r--Objects/unicodeobject.c55
3 files changed, 59 insertions, 119 deletions
diff --git a/Lib/io.py b/Lib/io.py
index 3ebf5ae..7aa79ce 100644
--- a/Lib/io.py
+++ b/Lib/io.py
@@ -659,12 +659,14 @@ class BytesIO(BufferedIOBase):
def write(self, b):
if self.closed:
raise ValueError("write to closed file")
+ if isinstance(b, str):
+ raise TypeError("can't write str to binary stream")
n = len(b)
newpos = self._pos + n
if newpos > len(self._buffer):
# Inserts null bytes between the current end of the file
# and the new write position.
- padding = '\x00' * (newpos - len(self._buffer) - n)
+ padding = b'\x00' * (newpos - len(self._buffer) - n)
self._buffer[self._pos:newpos - n] = padding
self._buffer[self._pos:newpos] = b
self._pos = newpos
@@ -801,11 +803,8 @@ class BufferedWriter(_BufferedIOMixin):
def write(self, b):
if self.closed:
raise ValueError("write to closed file")
- if not isinstance(b, bytes):
- if hasattr(b, "__index__"):
- raise TypeError("Can't write object of type %s" %
- type(b).__name__)
- b = bytes(b)
+ if isinstance(b, str):
+ raise TypeError("can't write str to binary stream")
# XXX we can implement some more tricks to try and avoid partial writes
if len(self._write_buf) > self.buffer_size:
# We're full, so let's pre-flush the buffer
@@ -1099,8 +1098,6 @@ class TextIOWrapper(TextIOBase):
s = s.replace("\n", self._writenl)
# XXX What if we were just reading?
b = s.encode(self._encoding)
- if isinstance(b, str):
- b = bytes(b)
self.buffer.write(b)
if haslf and self.isatty():
self.flush()
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 182cbfc..b267cac 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -82,7 +82,13 @@ _getbuffer(PyObject *obj, PyBuffer *view)
if (buffer == NULL ||
PyUnicode_Check(obj) ||
- buffer->bf_getbuffer == NULL) return -1;
+ buffer->bf_getbuffer == NULL)
+ {
+ PyErr_Format(PyExc_TypeError,
+ "Type %.100s doesn't support the buffer API",
+ Py_Type(obj)->tp_name);
+ return -1;
+ }
if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
return -1;
@@ -167,7 +173,7 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size)
else if (size < alloc) {
/* Within allocated size; quick exit */
Py_Size(self) = size;
- ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
+ ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
return 0;
}
else if (size <= alloc * 1.125) {
@@ -181,10 +187,11 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size)
if (((PyBytesObject *)self)->ob_exports > 0) {
/*
- fprintf(stderr, "%d: %s", ((PyBytesObject *)self)->ob_exports, ((PyBytesObject *)self)->ob_bytes);
+ fprintf(stderr, "%d: %s", ((PyBytesObject *)self)->ob_exports,
+ ((PyBytesObject *)self)->ob_bytes);
*/
PyErr_SetString(PyExc_BufferError,
- "Existing exports of data: object cannot be re-sized");
+ "Existing exports of data: object cannot be re-sized");
return -1;
}
@@ -262,24 +269,24 @@ bytes_iconcat(PyBytesObject *self, PyObject *other)
PyBuffer vo;
if (_getbuffer(other, &vo) < 0) {
- PyErr_Format(PyExc_TypeError,
- "can't concat bytes to %.100s", Py_Type(self)->tp_name);
- return NULL;
+ PyErr_Format(PyExc_TypeError, "can't concat bytes to %.100s",
+ Py_Type(self)->tp_name);
+ return NULL;
}
mysize = Py_Size(self);
size = mysize + vo.len;
if (size < 0) {
- PyObject_ReleaseBuffer(other, &vo);
- return PyErr_NoMemory();
+ PyObject_ReleaseBuffer(other, &vo);
+ return PyErr_NoMemory();
}
if (size < self->ob_alloc) {
- Py_Size(self) = size;
- self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
+ Py_Size(self) = size;
+ self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
}
else if (PyBytes_Resize((PyObject *)self, size) < 0) {
- PyObject_ReleaseBuffer(other, &vo);
- return NULL;
+ PyObject_ReleaseBuffer(other, &vo);
+ return NULL;
}
memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
PyObject_ReleaseBuffer(other, &vo);
@@ -327,7 +334,7 @@ bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
return PyErr_NoMemory();
if (size < self->ob_alloc) {
Py_Size(self) = size;
- self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
+ self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
}
else if (PyBytes_Resize((PyObject *)self, size) < 0)
return NULL;
@@ -507,7 +514,7 @@ bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
Py_Size(self) - hi);
}
- /* XXX(nnorwitz): need to verify this can't overflow! */
+ /* XXX(nnorwitz): need to verify this can't overflow! */
if (PyBytes_Resize((PyObject *)self,
Py_Size(self) + needed - avail) < 0) {
res = -1;
@@ -757,8 +764,11 @@ bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
if (PyUnicode_Check(arg)) {
/* Encode via the codec registry */
PyObject *encoded, *new;
- if (encoding == NULL)
- encoding = PyUnicode_GetDefaultEncoding();
+ if (encoding == NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ "string argument without an encoding");
+ return -1;
+ }
encoded = PyCodec_Encode(arg, encoding, errors);
if (encoded == NULL)
return -1;
@@ -769,12 +779,12 @@ bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
Py_DECREF(encoded);
return -1;
}
- new = bytes_iconcat(self, encoded);
- Py_DECREF(encoded);
- if (new == NULL)
- return -1;
- Py_DECREF(new);
- return 0;
+ new = bytes_iconcat(self, encoded);
+ Py_DECREF(encoded);
+ if (new == NULL)
+ return -1;
+ Py_DECREF(new);
+ return 0;
}
/* If it's not unicode, there can't be encoding or errors */
@@ -954,12 +964,14 @@ bytes_richcompare(PyObject *self, PyObject *other, int op)
self_size = _getbuffer(self, &self_bytes);
if (self_size < 0) {
+ PyErr_Clear();
Py_INCREF(Py_NotImplemented);
return Py_NotImplemented;
}
other_size = _getbuffer(other, &other_bytes);
if (other_size < 0) {
+ PyErr_Clear();
PyObject_ReleaseBuffer(self, &self_bytes);
Py_INCREF(Py_NotImplemented);
return Py_NotImplemented;
@@ -1061,10 +1073,11 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
sub_len = PyBytes_GET_SIZE(subobj);
}
/* XXX --> use the modern buffer interface */
- else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
+ else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len)) {
/* XXX - the "expected a character buffer object" is pretty
confusing for a non-expert. remap to something else ? */
return -2;
+ }
if (dir > 0)
return stringlib_find_slice(
@@ -2021,49 +2034,24 @@ bytes_replace(PyBytesObject *self, PyObject *args)
{
Py_ssize_t count = -1;
PyObject *from, *to, *res;
- const char *from_s, *to_s;
- Py_ssize_t from_len, to_len;
- int relfrom=0, relto=0;
PyBuffer vfrom, vto;
if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
return NULL;
- if (PyBytes_Check(from)) {
- from_s = PyBytes_AS_STRING(from);
- from_len = PyBytes_GET_SIZE(from);
- }
- else {
- if (PyObject_GetBuffer(from, &vfrom, PyBUF_CHARACTER) < 0)
- return NULL;
- from_s = vfrom.buf;
- from_len = vfrom.len;
- relfrom = 1;
- }
-
- if (PyBytes_Check(to)) {
- to_s = PyBytes_AS_STRING(to);
- to_len = PyBytes_GET_SIZE(to);
- }
- else {
- if (PyObject_GetBuffer(to, &vto, PyBUF_CHARACTER) < 0) {
- if (relfrom)
- PyObject_ReleaseBuffer(from, &vfrom);
- return NULL;
- }
- to_s = vto.buf;
- to_len = vto.len;
- relto = 1;
+ if (_getbuffer(from, &vfrom) < 0)
+ return NULL;
+ if (_getbuffer(to, &vto) < 0) {
+ PyObject_ReleaseBuffer(from, &vfrom);
+ return NULL;
}
res = (PyObject *)replace((PyBytesObject *) self,
- from_s, from_len,
- to_s, to_len, count);
+ vfrom.buf, vfrom.len,
+ vto.buf, vto.len, count);
- if (relfrom)
- PyObject_ReleaseBuffer(from, &vfrom);
- if (relto)
- PyObject_ReleaseBuffer(to, &vto);
+ PyObject_ReleaseBuffer(from, &vfrom);
+ PyObject_ReleaseBuffer(to, &vto);
return res;
}
@@ -2799,10 +2787,10 @@ bytes_reduce(PyBytesObject *self)
{
PyObject *latin1;
if (self->ob_bytes)
- latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
- Py_Size(self), NULL);
+ latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
+ Py_Size(self), NULL);
else
- latin1 = PyUnicode_FromString("");
+ latin1 = PyUnicode_FromString("");
return Py_BuildValue("(O(Ns))", Py_Type(self), latin1, "latin-1");
}
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 1e8f63f..4e8b2ed 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -965,31 +965,11 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
return NULL;
}
-#if 0
- /* For b/w compatibility we also accept Unicode objects provided
- that no encodings is given and then redirect to
- PyObject_Unicode() which then applies the additional logic for
- Unicode subclasses.
-
- NOTE: This API should really only be used for object which
- represent *encoded* Unicode !
-
- */
- if (PyUnicode_Check(obj)) {
- if (encoding) {
- PyErr_SetString(PyExc_TypeError,
- "decoding Unicode is not supported");
- return NULL;
- }
- return PyObject_Unicode(obj);
- }
-#else
if (PyUnicode_Check(obj)) {
PyErr_SetString(PyExc_TypeError,
"decoding Unicode is not supported");
return NULL;
}
-#endif
/* Coerce object */
if (PyString_Check(obj)) {
@@ -6440,26 +6420,7 @@ able to handle UnicodeDecodeErrors.");
static PyObject *
unicode_decode(PyUnicodeObject *self, PyObject *args)
{
- char *encoding = NULL;
- char *errors = NULL;
- PyObject *v;
-
- if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
- return NULL;
- v = PyUnicode_AsDecodedObject((PyObject *)self, encoding, errors);
- if (v == NULL)
- goto onError;
- if (!PyString_Check(v) && !PyUnicode_Check(v)) {
- PyErr_Format(PyExc_TypeError,
- "decoder did not return a string/unicode object "
- "(type=%.400s)",
- Py_Type(v)->tp_name);
- Py_DECREF(v);
- return NULL;
- }
- return v;
-
- onError:
+ PyErr_Format(PyExc_TypeError, "decoding str is not supported");
return NULL;
}
@@ -8136,17 +8097,11 @@ unicode_buffer_getbuffer(PyUnicodeObject *self, PyBuffer *view, int flags)
{
if (flags & PyBUF_CHARACTER) {
- PyObject *str;
-
- str = _PyUnicode_AsDefaultEncodedString((PyObject *)self, NULL);
- if (str == NULL) return -1;
- return PyBuffer_FillInfo(view, (void *)PyString_AS_STRING(str),
- PyString_GET_SIZE(str), 1, flags);
- }
- else {
- return PyBuffer_FillInfo(view, (void *)self->str,
- PyUnicode_GET_DATA_SIZE(self), 1, flags);
+ PyErr_SetString(PyExc_SystemError, "can't use str as char buffer");
+ return -1;
}
+ return PyBuffer_FillInfo(view, (void *)self->str,
+ PyUnicode_GET_DATA_SIZE(self), 1, flags);
}