diff options
author | Guido van Rossum <guido@python.org> | 2007-11-06 21:34:58 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2007-11-06 21:34:58 (GMT) |
commit | 98297ee7815939b124156e438b22bd652d67b5db (patch) | |
tree | a9d239ebd87c73af2571ab48003984c4e18e27e5 /Objects | |
parent | a19f80c6df2df5e8a5d0cff37131097835ef971e (diff) | |
download | cpython-98297ee7815939b124156e438b22bd652d67b5db.zip cpython-98297ee7815939b124156e438b22bd652d67b5db.tar.gz cpython-98297ee7815939b124156e438b22bd652d67b5db.tar.bz2 |
Merging the py3k-pep3137 branch back into the py3k branch.
No detailed change log; just check out the change log for the py3k-pep3137
branch. The most obvious changes:
- str8 renamed to bytes (PyString at the C level);
- bytes renamed to buffer (PyBytes at the C level);
- PyString and PyUnicode are no longer compatible.
I.e. we now have an immutable bytes type and a mutable bytes type.
The behavior of PyString was modified quite a bit, to make it more
bytes-like. Some changes are still on the to-do list.
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/abstract.c | 105 | ||||
-rw-r--r-- | Objects/bytesobject.c | 396 | ||||
-rw-r--r-- | Objects/codeobject.c | 12 | ||||
-rw-r--r-- | Objects/exceptions.c | 115 | ||||
-rw-r--r-- | Objects/fileobject.c | 2 | ||||
-rw-r--r-- | Objects/longobject.c | 20 | ||||
-rw-r--r-- | Objects/moduleobject.c | 2 | ||||
-rw-r--r-- | Objects/object.c | 168 | ||||
-rw-r--r-- | Objects/stringlib/transmogrify.h | 14 | ||||
-rw-r--r-- | Objects/stringobject.c | 1604 | ||||
-rw-r--r-- | Objects/typeobject.c | 4 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 225 |
12 files changed, 862 insertions, 1805 deletions
diff --git a/Objects/abstract.c b/Objects/abstract.c index e848f8f..01fbcbf 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -216,7 +216,7 @@ PyObject_DelItemString(PyObject *o, char *key) } /* We release the buffer right after use of this function which could - cause issues later on. Don't use these functions in new code. + cause issues later on. Don't use these functions in new code. */ int PyObject_AsCharBuffer(PyObject *obj, @@ -248,7 +248,7 @@ PyObject_AsCharBuffer(PyObject *obj, int PyObject_CheckReadBuffer(PyObject *obj) { - PyBufferProcs *pb = obj->ob_type->tp_as_buffer; + PyBufferProcs *pb = obj->ob_type->tp_as_buffer; if (pb == NULL || pb->bf_getbuffer == NULL) @@ -305,7 +305,7 @@ int PyObject_AsWriteBuffer(PyObject *obj, if (pb == NULL || pb->bf_getbuffer == NULL || ((*pb->bf_getbuffer)(obj, &view, PyBUF_WRITABLE) != 0)) { - PyErr_SetString(PyExc_TypeError, + PyErr_SetString(PyExc_TypeError, "expected an object with a writable buffer interface"); return -1; } @@ -323,8 +323,9 @@ int PyObject_GetBuffer(PyObject *obj, Py_buffer *view, int flags) { if (!PyObject_CheckBuffer(obj)) { - PyErr_SetString(PyExc_TypeError, - "object does not have the buffer interface"); + PyErr_Format(PyExc_TypeError, + "'%100s' does not have the buffer interface", + Py_Type(obj)->tp_name); return -1; } return (*(obj->ob_type->tp_as_buffer->bf_getbuffer))(obj, view, flags); @@ -333,7 +334,7 @@ PyObject_GetBuffer(PyObject *obj, Py_buffer *view, int flags) void PyObject_ReleaseBuffer(PyObject *obj, Py_buffer *view) { - if (obj->ob_type->tp_as_buffer != NULL && + if (obj->ob_type->tp_as_buffer != NULL && obj->ob_type->tp_as_buffer->bf_releasebuffer != NULL) { (*(obj->ob_type->tp_as_buffer->bf_releasebuffer))(obj, view); } @@ -345,7 +346,7 @@ _IsFortranContiguous(Py_buffer *view) { Py_ssize_t sd, dim; int i; - + if (view->ndim == 0) return 1; if (view->strides == NULL) return (view->ndim == 1); @@ -366,7 +367,7 @@ _IsCContiguous(Py_buffer *view) { Py_ssize_t sd, dim; int i; - + if (view->ndim == 0) return 1; if (view->strides == NULL) return 1; @@ -379,7 +380,7 @@ _IsCContiguous(Py_buffer *view) if (view->strides[i] != sd) return 0; sd *= dim; } - return 1; + return 1; } int @@ -390,7 +391,7 @@ PyBuffer_IsContiguous(Py_buffer *view, char fort) if (fort == 'C') return _IsCContiguous(view); - else if (fort == 'F') + else if (fort == 'F') return _IsFortranContiguous(view); else if (fort == 'A') return (_IsCContiguous(view) || _IsFortranContiguous(view)); @@ -398,7 +399,7 @@ PyBuffer_IsContiguous(Py_buffer *view, char fort) } -void* +void* PyBuffer_GetPointer(Py_buffer *view, Py_ssize_t *indices) { char* pointer; @@ -414,11 +415,11 @@ PyBuffer_GetPointer(Py_buffer *view, Py_ssize_t *indices) } -void +void _add_one_to_index_F(int nd, Py_ssize_t *index, Py_ssize_t *shape) { int k; - + for (k=0; k<nd; k++) { if (index[k] < shape[k]-1) { index[k]++; @@ -430,7 +431,7 @@ _add_one_to_index_F(int nd, Py_ssize_t *index, Py_ssize_t *shape) } } -void +void _add_one_to_index_C(int nd, Py_ssize_t *index, Py_ssize_t *shape) { int k; @@ -447,11 +448,11 @@ _add_one_to_index_C(int nd, Py_ssize_t *index, Py_ssize_t *shape) } /* view is not checked for consistency in either of these. It is - assumed that the size of the buffer is view->len in + assumed that the size of the buffer is view->len in view->len / view->itemsize elements. */ -int +int PyBuffer_ToContiguous(void *buf, Py_buffer *view, Py_ssize_t len, char fort) { int k; @@ -462,7 +463,7 @@ PyBuffer_ToContiguous(void *buf, Py_buffer *view, Py_ssize_t len, char fort) if (len > view->len) { len = view->len; } - + if (PyBuffer_IsContiguous(view, fort)) { /* simplest copy is all that is needed */ memcpy(buf, view->buf, len); @@ -470,7 +471,7 @@ PyBuffer_ToContiguous(void *buf, Py_buffer *view, Py_ssize_t len, char fort) } /* Otherwise a more elaborate scheme is needed */ - + /* XXX(nnorwitz): need to check for overflow! */ indices = (Py_ssize_t *)PyMem_Malloc(sizeof(Py_ssize_t)*(view->ndim)); if (indices == NULL) { @@ -480,7 +481,7 @@ PyBuffer_ToContiguous(void *buf, Py_buffer *view, Py_ssize_t len, char fort) for (k=0; k<view->ndim;k++) { indices[k] = 0; } - + if (fort == 'F') { addone = _add_one_to_index_F; } @@ -489,7 +490,7 @@ PyBuffer_ToContiguous(void *buf, Py_buffer *view, Py_ssize_t len, char fort) } dest = buf; /* XXX : This is not going to be the fastest code in the world - several optimizations are possible. + several optimizations are possible. */ elements = len / view->itemsize; while (elements--) { @@ -497,7 +498,7 @@ PyBuffer_ToContiguous(void *buf, Py_buffer *view, Py_ssize_t len, char fort) ptr = PyBuffer_GetPointer(view, indices); memcpy(dest, ptr, view->itemsize); dest += view->itemsize; - } + } PyMem_Free(indices); return 0; } @@ -521,7 +522,7 @@ PyBuffer_FromContiguous(Py_buffer *view, void *buf, Py_ssize_t len, char fort) } /* Otherwise a more elaborate scheme is needed */ - + /* XXX(nnorwitz): need to check for overflow! */ indices = (Py_ssize_t *)PyMem_Malloc(sizeof(Py_ssize_t)*(view->ndim)); if (indices == NULL) { @@ -531,7 +532,7 @@ PyBuffer_FromContiguous(Py_buffer *view, void *buf, Py_ssize_t len, char fort) for (k=0; k<view->ndim;k++) { indices[k] = 0; } - + if (fort == 'F') { addone = _add_one_to_index_F; } @@ -540,7 +541,7 @@ PyBuffer_FromContiguous(Py_buffer *view, void *buf, Py_ssize_t len, char fort) } src = buf; /* XXX : This is not going to be the fastest code in the world - several optimizations are possible. + several optimizations are possible. */ elements = len / view->itemsize; while (elements--) { @@ -549,12 +550,12 @@ PyBuffer_FromContiguous(Py_buffer *view, void *buf, Py_ssize_t len, char fort) memcpy(ptr, src, view->itemsize); src += view->itemsize; } - + PyMem_Free(indices); return 0; } -int PyObject_CopyData(PyObject *dest, PyObject *src) +int PyObject_CopyData(PyObject *dest, PyObject *src) { Py_buffer view_dest, view_src; int k; @@ -576,16 +577,16 @@ int PyObject_CopyData(PyObject *dest, PyObject *src) } if (view_dest.len < view_src.len) { - PyErr_SetString(PyExc_BufferError, + PyErr_SetString(PyExc_BufferError, "destination is too small to receive data from source"); PyObject_ReleaseBuffer(dest, &view_dest); PyObject_ReleaseBuffer(src, &view_src); return -1; } - if ((PyBuffer_IsContiguous(&view_dest, 'C') && + if ((PyBuffer_IsContiguous(&view_dest, 'C') && PyBuffer_IsContiguous(&view_src, 'C')) || - (PyBuffer_IsContiguous(&view_dest, 'F') && + (PyBuffer_IsContiguous(&view_dest, 'F') && PyBuffer_IsContiguous(&view_src, 'F'))) { /* simplest copy is all that is needed */ memcpy(view_dest.buf, view_src.buf, view_src.len); @@ -595,7 +596,7 @@ int PyObject_CopyData(PyObject *dest, PyObject *src) } /* Otherwise a more elaborate copy scheme is needed */ - + /* XXX(nnorwitz): need to check for overflow! */ indices = (Py_ssize_t *)PyMem_Malloc(sizeof(Py_ssize_t)*view_src.ndim); if (indices == NULL) { @@ -606,7 +607,7 @@ int PyObject_CopyData(PyObject *dest, PyObject *src) } for (k=0; k<view_src.ndim;k++) { indices[k] = 0; - } + } elements = 1; for (k=0; k<view_src.ndim; k++) { /* XXX(nnorwitz): can this overflow? */ @@ -617,7 +618,7 @@ int PyObject_CopyData(PyObject *dest, PyObject *src) dptr = PyBuffer_GetPointer(&view_dest, indices); sptr = PyBuffer_GetPointer(&view_src, indices); memcpy(dptr, sptr, view_src.itemsize); - } + } PyMem_Free(indices); PyObject_ReleaseBuffer(dest, &view_dest); PyObject_ReleaseBuffer(src, &view_src); @@ -631,13 +632,13 @@ PyBuffer_FillContiguousStrides(int nd, Py_ssize_t *shape, { int k; Py_ssize_t sd; - + sd = itemsize; if (fort == 'F') { for (k=0; k<nd; k++) { strides[k] = sd; sd *= shape[k]; - } + } } else { for (k=nd-1; k>=0; k--) { @@ -651,11 +652,11 @@ PyBuffer_FillContiguousStrides(int nd, Py_ssize_t *shape, int PyBuffer_FillInfo(Py_buffer *view, void *buf, Py_ssize_t len, int readonly, int flags) -{ +{ if (view == NULL) return 0; - if (((flags & PyBUF_LOCK) == PyBUF_LOCK) && + if (((flags & PyBUF_LOCK) == PyBUF_LOCK) && readonly >= 0) { - PyErr_SetString(PyExc_BufferError, + PyErr_SetString(PyExc_BufferError, "Cannot lock this object."); return -1; } @@ -665,13 +666,13 @@ PyBuffer_FillInfo(Py_buffer *view, void *buf, Py_ssize_t len, "Object is not writable."); return -1; } - + view->buf = buf; view->len = len; view->readonly = readonly; view->itemsize = 1; view->format = NULL; - if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT) + if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT) view->format = "B"; view->ndim = 1; view->shape = NULL; @@ -1143,9 +1144,9 @@ PyNumber_Absolute(PyObject *o) return type_error("bad operand type for abs(): '%.200s'", o); } -/* Return a Python Int or Long from the object item +/* Return a Python Int or Long from the object item Raise TypeError if the result is not an int-or-long - or if the object cannot be interpreted as an index. + or if the object cannot be interpreted as an index. */ PyObject * PyNumber_Index(PyObject *item) @@ -1193,19 +1194,19 @@ PyNumber_AsSsize_t(PyObject *item, PyObject *err) goto finish; /* Error handling code -- only manage OverflowError differently */ - if (!PyErr_GivenExceptionMatches(runerr, PyExc_OverflowError)) + if (!PyErr_GivenExceptionMatches(runerr, PyExc_OverflowError)) goto finish; PyErr_Clear(); - /* If no error-handling desired then the default clipping + /* If no error-handling desired then the default clipping is sufficient. */ if (!err) { assert(PyLong_Check(value)); - /* Whether or not it is less than or equal to + /* Whether or not it is less than or equal to zero is determined by the sign of ob_size */ - if (_PyLong_Sign(value) < 0) + if (_PyLong_Sign(value) < 0) result = PY_SSIZE_T_MIN; else result = PY_SSIZE_T_MAX; @@ -1213,10 +1214,10 @@ PyNumber_AsSsize_t(PyObject *item, PyObject *err) else { /* Otherwise replace the error with caller's error object. */ PyErr_Format(err, - "cannot fit '%.200s' into an index-sized integer", - item->ob_type->tp_name); + "cannot fit '%.200s' into an index-sized integer", + item->ob_type->tp_name); } - + finish: Py_DECREF(value); return result; @@ -1679,7 +1680,7 @@ PySequence_Tuple(PyObject *v) if (j >= n) { Py_ssize_t oldn = n; /* The over-allocation strategy can grow a bit faster - than for lists because unlike lists the + than for lists because unlike lists the over-allocation isn't permanent -- we reclaim the excess before the end of this routine. So, grow by ten and then add 25%. @@ -1690,7 +1691,7 @@ PySequence_Tuple(PyObject *v) /* Check for overflow */ PyErr_NoMemory(); Py_DECREF(item); - goto Fail; + goto Fail; } if (_PyTuple_Resize(&result, n) != 0) { Py_DECREF(item); @@ -2147,7 +2148,7 @@ PyObject_CallMethod(PyObject *o, char *name, char *format, ...) } if (!PyCallable_Check(func)) { - type_error("attribute of type '%.200s' is not callable", func); + type_error("attribute of type '%.200s' is not callable", func); goto exit; } @@ -2186,7 +2187,7 @@ _PyObject_CallMethod_SizeT(PyObject *o, char *name, char *format, ...) } if (!PyCallable_Check(func)) { - type_error("attribute of type '%.200s' is not callable", func); + type_error("attribute of type '%.200s' is not callable", func); goto exit; } diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 3f2dbc2..b28cacf 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1,7 +1,5 @@ /* Bytes object implementation */ -/* XXX TO DO: optimizations */ - #define PY_SSIZE_T_CLEAN #include "Python.h" #include "structmember.h" @@ -214,26 +212,21 @@ PyBytes_Concat(PyObject *a, PyObject *b) { Py_ssize_t size; Py_buffer va, vb; - PyBytesObject *result; + PyBytesObject *result = NULL; va.len = -1; vb.len = -1; if (_getbuffer(a, &va) < 0 || _getbuffer(b, &vb) < 0) { - if (va.len != -1) - PyObject_ReleaseBuffer(a, &va); - if (vb.len != -1) - PyObject_ReleaseBuffer(b, &vb); PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s", Py_Type(a)->tp_name, Py_Type(b)->tp_name); - return NULL; + goto done; } size = va.len + vb.len; if (size < 0) { - PyObject_ReleaseBuffer(a, &va); - PyObject_ReleaseBuffer(b, &vb); return PyErr_NoMemory(); + goto done; } result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size); @@ -242,8 +235,11 @@ PyBytes_Concat(PyObject *a, PyObject *b) memcpy(result->ob_bytes + va.len, vb.buf, vb.len); } - PyObject_ReleaseBuffer(a, &va); - PyObject_ReleaseBuffer(b, &vb); + done: + if (va.len != -1) + PyObject_ReleaseBuffer(a, &va); + if (vb.len != -1) + PyObject_ReleaseBuffer(b, &vb); return (PyObject *)result; } @@ -256,12 +252,6 @@ bytes_length(PyBytesObject *self) } static PyObject * -bytes_concat(PyBytesObject *self, PyObject *other) -{ - return PyBytes_Concat((PyObject *)self, other); -} - -static PyObject * bytes_iconcat(PyBytesObject *self, PyObject *other) { Py_ssize_t mysize; @@ -351,51 +341,13 @@ bytes_irepeat(PyBytesObject *self, Py_ssize_t count) return (PyObject *)self; } -static int -bytes_substring(PyBytesObject *self, PyBytesObject *other) -{ - Py_ssize_t i; - - if (Py_Size(other) == 1) { - return memchr(self->ob_bytes, other->ob_bytes[0], - Py_Size(self)) != NULL; - } - if (Py_Size(other) == 0) - return 1; /* Edge case */ - for (i = 0; i + Py_Size(other) <= Py_Size(self); i++) { - /* XXX Yeah, yeah, lots of optimizations possible... */ - if (memcmp(self->ob_bytes + i, other->ob_bytes, Py_Size(other)) == 0) - return 1; - } - return 0; -} - -static int -bytes_contains(PyBytesObject *self, PyObject *value) -{ - Py_ssize_t ival; - - if (PyBytes_Check(value)) - return bytes_substring(self, (PyBytesObject *)value); - - ival = PyNumber_AsSsize_t(value, PyExc_ValueError); - if (ival == -1 && PyErr_Occurred()) - return -1; - if (ival < 0 || ival >= 256) { - PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)"); - return -1; - } - - return memchr(self->ob_bytes, ival, Py_Size(self)) != NULL; -} - static PyObject * bytes_getitem(PyBytesObject *self, Py_ssize_t i) { if (i < 0) i += Py_Size(self); if (i < 0 || i >= Py_Size(self)) { - PyErr_SetString(PyExc_IndexError, "bytes index out of range"); + PyErr_SetString(PyExc_IndexError, "buffer index out of range"); return NULL; } return PyInt_FromLong((unsigned char)(self->ob_bytes[i])); @@ -414,7 +366,7 @@ bytes_subscript(PyBytesObject *self, PyObject *item) i += PyBytes_GET_SIZE(self); if (i < 0 || i >= Py_Size(self)) { - PyErr_SetString(PyExc_IndexError, "bytes index out of range"); + PyErr_SetString(PyExc_IndexError, "buffer index out of range"); return NULL; } return PyInt_FromLong((unsigned char)(self->ob_bytes[i])); @@ -451,7 +403,7 @@ bytes_subscript(PyBytesObject *self, PyObject *item) } } else { - PyErr_SetString(PyExc_TypeError, "bytes indices must be integers"); + PyErr_SetString(PyExc_TypeError, "buffer indices must be integers"); return NULL; } } @@ -551,7 +503,7 @@ bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value) i += Py_Size(self); if (i < 0 || i >= Py_Size(self)) { - PyErr_SetString(PyExc_IndexError, "bytes index out of range"); + PyErr_SetString(PyExc_IndexError, "buffer index out of range"); return -1; } @@ -587,7 +539,7 @@ bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values) i += PyBytes_GET_SIZE(self); if (i < 0 || i >= Py_Size(self)) { - PyErr_SetString(PyExc_IndexError, "bytes index out of range"); + PyErr_SetString(PyExc_IndexError, "buffer index out of range"); return -1; } @@ -619,7 +571,7 @@ bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values) } } else { - PyErr_SetString(PyExc_TypeError, "bytes indices must be integer"); + PyErr_SetString(PyExc_TypeError, "buffer indices must be integer"); return -1; } @@ -772,13 +724,7 @@ bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds) encoded = PyCodec_Encode(arg, encoding, errors); if (encoded == NULL) return -1; - if (!PyBytes_Check(encoded) && !PyString_Check(encoded)) { - PyErr_Format(PyExc_TypeError, - "encoder did not return a str8 or bytes object (type=%.400s)", - Py_Type(encoded)->tp_name); - Py_DECREF(encoded); - return -1; - } + assert(PyString_Check(encoded)); new = bytes_iconcat(self, encoded); Py_DECREF(encoded); if (new == NULL) @@ -889,11 +835,15 @@ static PyObject * bytes_repr(PyBytesObject *self) { static const char *hexdigits = "0123456789abcdef"; - size_t newsize = 3 + 4 * Py_Size(self); + const char *quote_prefix = "buffer(b"; + const char *quote_postfix = ")"; + Py_ssize_t length = Py_Size(self); + /* 9 prefix + 2 postfix */ + size_t newsize = 11 + 4 * length; PyObject *v; - if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(self)) { + if (newsize > PY_SSIZE_T_MAX || newsize / 4 - 2 != length) { PyErr_SetString(PyExc_OverflowError, - "bytes object is too large to make repr"); + "buffer object is too large to make repr"); return NULL; } v = PyUnicode_FromUnicode(NULL, newsize); @@ -904,17 +854,36 @@ bytes_repr(PyBytesObject *self) register Py_ssize_t i; register Py_UNICODE c; register Py_UNICODE *p; - int quote = '\''; + int quote; + + /* Figure out which quote to use; single is preferred */ + quote = '\''; + { + char *test, *start; + start = PyBytes_AS_STRING(self); + for (test = start; test < start+length; ++test) { + if (*test == '"') { + quote = '\''; /* back to single */ + goto decided; + } + else if (*test == '\'') + quote = '"'; + } + decided: + ; + } p = PyUnicode_AS_UNICODE(v); - *p++ = 'b'; + while (*quote_prefix) + *p++ = *quote_prefix++; *p++ = quote; - for (i = 0; i < Py_Size(self); i++) { + + for (i = 0; i < length; i++) { /* There's at least enough room for a hex escape and a closing quote. */ assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5); c = self->ob_bytes[i]; - if (c == quote || c == '\\') + if (c == '\'' || c == '\\') *p++ = '\\', *p++ = c; else if (c == '\t') *p++ = '\\', *p++ = 't'; @@ -935,6 +904,9 @@ bytes_repr(PyBytesObject *self) } assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1); *p++ = quote; + while (*quote_postfix) { + *p++ = *quote_postfix++; + } *p = '\0'; if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) { Py_DECREF(v); @@ -945,9 +917,14 @@ bytes_repr(PyBytesObject *self) } static PyObject * -bytes_str(PyBytesObject *self) +bytes_str(PyObject *op) { - return PyString_FromStringAndSize(self->ob_bytes, Py_Size(self)); + if (Py_BytesWarningFlag) { + if (PyErr_WarnEx(PyExc_BytesWarning, + "str() on a buffer instance", 1)) + return NULL; + } + return bytes_repr((PyBytesObject*)op); } static PyObject * @@ -964,6 +941,12 @@ bytes_richcompare(PyObject *self, PyObject *other, int op) error, even if the comparison is for equality. */ if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) || PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) { + if (Py_BytesWarningFlag && op == Py_EQ) { + if (PyErr_WarnEx(PyExc_BytesWarning, + "Comparsion between buffer and string", 1)) + return NULL; + } + Py_INCREF(Py_NotImplemented); return Py_NotImplemented; } @@ -1112,7 +1095,7 @@ bytes_find(PyBytesObject *self, PyObject *args) } PyDoc_STRVAR(count__doc__, -"B.count(sub[, start[, end]]) -> int\n\ +"B.count(sub [,start [,end]]) -> int\n\ \n\ Return the number of non-overlapping occurrences of subsection sub in\n\ bytes B[start:end]. Optional arguments start and end are interpreted\n\ @@ -1203,6 +1186,30 @@ bytes_rindex(PyBytesObject *self, PyObject *args) } +static int +bytes_contains(PyObject *self, PyObject *arg) +{ + Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError); + if (ival == -1 && PyErr_Occurred()) { + Py_buffer varg; + int pos; + PyErr_Clear(); + if (_getbuffer(arg, &varg) < 0) + return -1; + pos = stringlib_find(PyBytes_AS_STRING(self), Py_Size(self), + varg.buf, varg.len, 0); + PyObject_ReleaseBuffer(arg, &varg); + return pos >= 0; + } + if (ival < 0 || ival >= 256) { + PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)"); + return -1; + } + + return memchr(PyBytes_AS_STRING(self), ival, Py_Size(self)) != NULL; +} + + /* Matches the end (direction >= 0) or start (direction < 0) of self * against substr, using the start and end arguments. Returns * -1 on error, 0 if not found and 1 if found. @@ -1247,7 +1254,7 @@ done: PyDoc_STRVAR(startswith__doc__, -"B.startswith(prefix[, start[, end]]) -> bool\n\ +"B.startswith(prefix [,start [,end]]) -> bool\n\ \n\ Return True if B starts with the specified prefix, False otherwise.\n\ With optional start, test B beginning at that position.\n\ @@ -1287,7 +1294,7 @@ bytes_startswith(PyBytesObject *self, PyObject *args) } PyDoc_STRVAR(endswith__doc__, -"B.endswith(suffix[, start[, end]]) -> bool\n\ +"B.endswith(suffix [,start [,end]]) -> bool\n\ \n\ Return True if B ends with the specified suffix, False otherwise.\n\ With optional start, test B beginning at that position.\n\ @@ -1328,12 +1335,12 @@ bytes_endswith(PyBytesObject *self, PyObject *args) PyDoc_STRVAR(translate__doc__, -"B.translate(table [,deletechars]) -> bytes\n\ +"B.translate(table[, deletechars]) -> buffer\n\ \n\ -Return a copy of the bytes B, where all characters occurring\n\ -in the optional argument deletechars are removed, and the\n\ -remaining characters have been mapped through the given\n\ -translation table, which must be a bytes of length 256."); +Return a copy of B, where all characters occurring in the\n\ +optional argument deletechars are removed, and the remaining\n\ +characters have been mapped through the given translation\n\ +table, which must be a bytes object of length 256."); static PyObject * bytes_translate(PyBytesObject *self, PyObject *args) @@ -2026,9 +2033,9 @@ replace(PyBytesObject *self, PyDoc_STRVAR(replace__doc__, -"B.replace (old, new[, count]) -> bytes\n\ +"B.replace(old, new[, count]) -> bytes\n\ \n\ -Return a copy of bytes B with all occurrences of subsection\n\ +Return a copy of B with all occurrences of subsection\n\ old replaced by new. If the optional argument count is\n\ given, only the first count occurrences are replaced."); @@ -2149,23 +2156,23 @@ split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount) return NULL; for (i = j = 0; i < len; ) { - /* find a token */ - while (i < len && ISSPACE(s[i])) - i++; - j = i; - while (i < len && !ISSPACE(s[i])) - i++; - if (j < i) { - if (maxcount-- <= 0) - break; - SPLIT_ADD(s, j, i); - while (i < len && ISSPACE(s[i])) - i++; - j = i; - } + /* find a token */ + while (i < len && ISSPACE(s[i])) + i++; + j = i; + while (i < len && !ISSPACE(s[i])) + i++; + if (j < i) { + if (maxcount-- <= 0) + break; + SPLIT_ADD(s, j, i); + while (i < len && ISSPACE(s[i])) + i++; + j = i; + } } if (j < len) { - SPLIT_ADD(s, j, len); + SPLIT_ADD(s, j, len); } FIX_PREALLOC_SIZE(list); return list; @@ -2176,10 +2183,10 @@ split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount) } PyDoc_STRVAR(split__doc__, -"B.split([sep [, maxsplit]]) -> list of bytes\n\ +"B.split([sep[, maxsplit]]) -> list of buffer\n\ \n\ -Return a list of the bytes in the string B, using sep as the delimiter.\n\ -If sep is not given, B is split on ASCII whitespace charcters\n\ +Return a list of the sections in B, using sep as the delimiter.\n\ +If sep is not given, B is split on ASCII whitespace characters\n\ (space, tab, return, newline, formfeed, vertical tab).\n\ If maxsplit is given, at most maxsplit splits are done."); @@ -2255,12 +2262,37 @@ bytes_split(PyBytesObject *self, PyObject *args) return NULL; } +/* stringlib's partition shares nullbytes in some cases. + undo this, we don't want the nullbytes to be shared. */ +static PyObject * +make_nullbytes_unique(PyObject *result) +{ + if (result != NULL) { + int i; + assert(PyTuple_Check(result)); + assert(PyTuple_GET_SIZE(result) == 3); + for (i = 0; i < 3; i++) { + if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) { + PyObject *new = PyBytes_FromStringAndSize(NULL, 0); + if (new == NULL) { + Py_DECREF(result); + result = NULL; + break; + } + Py_DECREF(nullbytes); + PyTuple_SET_ITEM(result, i, new); + } + } + } + return result; +} + PyDoc_STRVAR(partition__doc__, "B.partition(sep) -> (head, sep, tail)\n\ \n\ Searches for the separator sep in B, and returns the part before it,\n\ the separator itself, and the part after it. If the separator is not\n\ -found, returns B and two empty bytes."); +found, returns B and two empty buffer."); static PyObject * bytes_partition(PyBytesObject *self, PyObject *sep_obj) @@ -2279,15 +2311,16 @@ bytes_partition(PyBytesObject *self, PyObject *sep_obj) ); Py_DECREF(bytesep); - return result; + return make_nullbytes_unique(result); } PyDoc_STRVAR(rpartition__doc__, "B.rpartition(sep) -> (tail, sep, head)\n\ \n\ -Searches for the separator sep in B, starting at the end of B, and returns\n\ -the part before it, the separator itself, and the part after it. If the\n\ -separator is not found, returns two empty bytes and B."); +Searches for the separator sep in B, starting at the end of B,\n\ +and returns the part before it, the separator itself, and the\n\ +part after it. If the separator is not found, returns two empty\n\ +buffer objects and B."); static PyObject * bytes_rpartition(PyBytesObject *self, PyObject *sep_obj) @@ -2306,7 +2339,7 @@ bytes_rpartition(PyBytesObject *self, PyObject *sep_obj) ); Py_DECREF(bytesep); - return result; + return make_nullbytes_unique(result); } Py_LOCAL_INLINE(PyObject *) @@ -2354,23 +2387,23 @@ rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount) return NULL; for (i = j = len - 1; i >= 0; ) { - /* find a token */ - while (i >= 0 && Py_UNICODE_ISSPACE(s[i])) - i--; - j = i; - while (i >= 0 && !Py_UNICODE_ISSPACE(s[i])) - i--; - if (j > i) { - if (maxcount-- <= 0) - break; - SPLIT_ADD(s, i + 1, j + 1); - while (i >= 0 && Py_UNICODE_ISSPACE(s[i])) - i--; - j = i; - } + /* find a token */ + while (i >= 0 && Py_UNICODE_ISSPACE(s[i])) + i--; + j = i; + while (i >= 0 && !Py_UNICODE_ISSPACE(s[i])) + i--; + if (j > i) { + if (maxcount-- <= 0) + break; + SPLIT_ADD(s, i + 1, j + 1); + while (i >= 0 && Py_UNICODE_ISSPACE(s[i])) + i--; + j = i; + } } if (j >= 0) { - SPLIT_ADD(s, 0, j + 1); + SPLIT_ADD(s, 0, j + 1); } FIX_PREALLOC_SIZE(list); if (PyList_Reverse(list) < 0) @@ -2384,10 +2417,10 @@ rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount) } PyDoc_STRVAR(rsplit__doc__, -"B.rsplit(sep [,maxsplit]) -> list of bytes\n\ +"B.rsplit(sep[, maxsplit]) -> list of buffer\n\ \n\ -Return a list of the sections in the byte B, using sep as the delimiter,\n\ -starting at the end of the bytes and working to the front.\n\ +Return a list of the sections in B, using sep as the delimiter,\n\ +starting at the end of B and working to the front.\n\ If sep is not given, B is split on ASCII whitespace characters\n\ (space, tab, return, newline, formfeed, vertical tab).\n\ If maxsplit is given, at most maxsplit splits are done."); @@ -2458,7 +2491,7 @@ PyDoc_STRVAR(extend__doc__, "B.extend(iterable int) -> None\n\ \n\ Append all the elements from the iterator or sequence to the\n\ -end of the bytes."); +end of B."); static PyObject * bytes_extend(PyBytesObject *self, PyObject *arg) { @@ -2475,7 +2508,7 @@ bytes_extend(PyBytesObject *self, PyObject *arg) PyDoc_STRVAR(reverse__doc__, "B.reverse() -> None\n\ \n\ -Reverse the order of the values in bytes in place."); +Reverse the order of the values in B in place."); static PyObject * bytes_reverse(PyBytesObject *self, PyObject *unused) { @@ -2497,7 +2530,7 @@ bytes_reverse(PyBytesObject *self, PyObject *unused) PyDoc_STRVAR(insert__doc__, "B.insert(index, int) -> None\n\ \n\ -Insert a single item into the bytes before the given index."); +Insert a single item into the buffer before the given index."); static PyObject * bytes_insert(PyBytesObject *self, PyObject *args) { @@ -2536,7 +2569,7 @@ bytes_insert(PyBytesObject *self, PyObject *args) PyDoc_STRVAR(append__doc__, "B.append(int) -> None\n\ \n\ -Append a single item to the end of the bytes."); +Append a single item to the end of B."); static PyObject * bytes_append(PyBytesObject *self, PyObject *arg) { @@ -2561,7 +2594,7 @@ bytes_append(PyBytesObject *self, PyObject *arg) PyDoc_STRVAR(pop__doc__, "B.pop([index]) -> int\n\ \n\ -Remove and return a single item from the bytes. If no index\n\ +Remove and return a single item from B. If no index\n\ argument is give, will pop the last value."); static PyObject * bytes_pop(PyBytesObject *self, PyObject *args) @@ -2595,7 +2628,7 @@ bytes_pop(PyBytesObject *self, PyObject *args) PyDoc_STRVAR(remove__doc__, "B.remove(int) -> None\n\ \n\ -Remove the first occurance of a value in bytes"); +Remove the first occurance of a value in B."); static PyObject * bytes_remove(PyBytesObject *self, PyObject *arg) { @@ -2644,7 +2677,7 @@ rstrip_helper(unsigned char *myptr, Py_ssize_t mysize, } PyDoc_STRVAR(strip__doc__, -"B.strip([bytes]) -> bytes\n\ +"B.strip([bytes]) -> buffer\n\ \n\ Strip leading and trailing bytes contained in the argument.\n\ If the argument is omitted, strip ASCII whitespace."); @@ -2662,10 +2695,10 @@ bytes_strip(PyBytesObject *self, PyObject *args) argsize = 6; } else { - if (_getbuffer(arg, &varg) < 0) - return NULL; - argptr = varg.buf; - argsize = varg.len; + if (_getbuffer(arg, &varg) < 0) + return NULL; + argptr = varg.buf; + argsize = varg.len; } myptr = self->ob_bytes; mysize = Py_Size(self); @@ -2675,12 +2708,12 @@ bytes_strip(PyBytesObject *self, PyObject *args) else right = rstrip_helper(myptr, mysize, argptr, argsize); if (arg != Py_None) - PyObject_ReleaseBuffer(arg, &varg); + PyObject_ReleaseBuffer(arg, &varg); return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left); } PyDoc_STRVAR(lstrip__doc__, -"B.lstrip([bytes]) -> bytes\n\ +"B.lstrip([bytes]) -> buffer\n\ \n\ Strip leading bytes contained in the argument.\n\ If the argument is omitted, strip leading ASCII whitespace."); @@ -2698,22 +2731,22 @@ bytes_lstrip(PyBytesObject *self, PyObject *args) argsize = 6; } else { - if (_getbuffer(arg, &varg) < 0) - return NULL; - argptr = varg.buf; - argsize = varg.len; + if (_getbuffer(arg, &varg) < 0) + return NULL; + argptr = varg.buf; + argsize = varg.len; } myptr = self->ob_bytes; mysize = Py_Size(self); left = lstrip_helper(myptr, mysize, argptr, argsize); right = mysize; if (arg != Py_None) - PyObject_ReleaseBuffer(arg, &varg); + PyObject_ReleaseBuffer(arg, &varg); return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left); } PyDoc_STRVAR(rstrip__doc__, -"B.rstrip([bytes]) -> bytes\n\ +"B.rstrip([bytes]) -> buffer\n\ \n\ Strip trailing bytes contained in the argument.\n\ If the argument is omitted, strip trailing ASCII whitespace."); @@ -2731,27 +2764,27 @@ bytes_rstrip(PyBytesObject *self, PyObject *args) argsize = 6; } else { - if (_getbuffer(arg, &varg) < 0) - return NULL; - argptr = varg.buf; - argsize = varg.len; + if (_getbuffer(arg, &varg) < 0) + return NULL; + argptr = varg.buf; + argsize = varg.len; } myptr = self->ob_bytes; mysize = Py_Size(self); left = 0; right = rstrip_helper(myptr, mysize, argptr, argsize); if (arg != Py_None) - PyObject_ReleaseBuffer(arg, &varg); + PyObject_ReleaseBuffer(arg, &varg); return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left); } PyDoc_STRVAR(decode_doc, -"B.decode([encoding[,errors]]) -> unicode obect.\n\ +"B.decode([encoding[, errors]]) -> unicode object.\n\ \n\ Decodes B using the codec registered for encoding. encoding defaults\n\ to the default encoding. errors may be given to set a different error\n\ -handling scheme. Default is 'strict' meaning that encoding errors raise\n\ -a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\ +handling scheme. Default is 'strict' meaning that encoding errors raise\n\ +a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\ as well as any other name registerd with codecs.register_error that is\n\ able to handle UnicodeDecodeErrors."); @@ -2782,8 +2815,7 @@ bytes_alloc(PyBytesObject *self) PyDoc_STRVAR(join_doc, "B.join(iterable_of_bytes) -> bytes\n\ \n\ -Concatenates any number of bytes objects, with B in between each pair.\n\ -Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'."); +Concatenates any number of buffer objects, with B in between each pair."); static PyObject * bytes_join(PyBytesObject *self, PyObject *it) @@ -2804,9 +2836,10 @@ bytes_join(PyBytesObject *self, PyObject *it) items = PySequence_Fast_ITEMS(seq); /* Compute the total size, and check that they are all bytes */ + /* XXX Shouldn't we use _getbuffer() on these items instead? */ for (i = 0; i < n; i++) { PyObject *obj = items[i]; - if (!PyBytes_Check(obj)) { + if (!PyBytes_Check(obj) && !PyString_Check(obj)) { PyErr_Format(PyExc_TypeError, "can only join an iterable of bytes " "(item %ld has type '%.100s')", @@ -2816,7 +2849,7 @@ bytes_join(PyBytesObject *self, PyObject *it) } if (i > 0) totalsize += mysize; - totalsize += PyBytes_GET_SIZE(obj); + totalsize += Py_Size(obj); if (totalsize < 0) { PyErr_NoMemory(); goto error; @@ -2830,12 +2863,17 @@ bytes_join(PyBytesObject *self, PyObject *it) dest = PyBytes_AS_STRING(result); for (i = 0; i < n; i++) { PyObject *obj = items[i]; - Py_ssize_t size = PyBytes_GET_SIZE(obj); - if (i > 0) { + Py_ssize_t size = Py_Size(obj); + char *buf; + if (PyBytes_Check(obj)) + buf = PyBytes_AS_STRING(obj); + else + buf = PyString_AS_STRING(obj); + if (i) { memcpy(dest, self->ob_bytes, mysize); dest += mysize; } - memcpy(dest, PyBytes_AS_STRING(obj), size); + memcpy(dest, buf, size); dest += size; } @@ -2850,11 +2888,11 @@ bytes_join(PyBytesObject *self, PyObject *it) } PyDoc_STRVAR(fromhex_doc, -"bytes.fromhex(string) -> bytes\n\ +"buffer.fromhex(string) -> buffer\n\ \n\ -Create a bytes object from a string of hexadecimal numbers.\n\ -Spaces between two numbers are accepted. Example:\n\ -bytes.fromhex('10 1112') -> b'\\x10\\x11\\x12'."); +Create a buffer object from a string of hexadecimal numbers.\n\ +Spaces between two numbers are accepted.\n\ +Example: buffer.fromhex('B9 01EF') -> buffer(b'\\xb9\\x01\\xef')."); static int hex_digit_to_int(Py_UNICODE c) @@ -2940,7 +2978,7 @@ bytes_reduce(PyBytesObject *self) static PySequenceMethods bytes_as_sequence = { (lenfunc)bytes_length, /* sq_length */ - (binaryfunc)bytes_concat, /* sq_concat */ + (binaryfunc)PyBytes_Concat, /* sq_concat */ (ssizeargfunc)bytes_repeat, /* sq_repeat */ (ssizeargfunc)bytes_getitem, /* sq_item */ 0, /* sq_slice */ @@ -3027,15 +3065,27 @@ bytes_methods[] = { }; PyDoc_STRVAR(bytes_doc, -"bytes([iterable]) -> new array of bytes.\n\ +"buffer(iterable_of_ints) -> buffer.\n\ +buffer(string, encoding[, errors]) -> buffer.\n\ +buffer(bytes_or_buffer) -> mutable copy of bytes_or_buffer.\n\ +buffer(memory_view) -> buffer.\n\ +\n\ +Construct an mutable buffer object from:\n\ + - an iterable yielding integers in range(256)\n\ + - a text string encoded using the specified encoding\n\ + - a bytes or a buffer object\n\ + - any object implementing the buffer API.\n\ \n\ -If an argument is given it must be an iterable yielding ints in range(256)."); +buffer(int) -> buffer.\n\ +\n\ +Construct a zero-initialized buffer of the given length."); + static PyObject *bytes_iter(PyObject *seq); PyTypeObject PyBytes_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - "bytes", + "buffer", sizeof(PyBytesObject), 0, (destructor)bytes_dealloc, /* tp_dealloc */ @@ -3049,7 +3099,7 @@ PyTypeObject PyBytes_Type = { &bytes_as_mapping, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ - (reprfunc)bytes_str, /* tp_str */ + bytes_str, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ &bytes_as_buffer, /* tp_as_buffer */ diff --git a/Objects/codeobject.c b/Objects/codeobject.c index b9a26ba..80c2df9 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -8,7 +8,7 @@ /* all_name_chars(s): true iff all chars in s are valid NAME_CHARS */ static int -all_name_chars(unsigned char *s) +all_name_chars(Py_UNICODE *s) { static char ok_name_char[256]; static unsigned char *name_chars = (unsigned char *)NAME_CHARS; @@ -19,6 +19,8 @@ all_name_chars(unsigned char *s) ok_name_char[*p] = 1; } while (*s) { + if (*s >= 128) + return 0; if (ok_name_char[*s++] == 0) return 0; } @@ -73,11 +75,11 @@ PyCode_New(int argcount, int kwonlyargcount, /* Intern selected string constants */ for (i = PyTuple_Size(consts); --i >= 0; ) { PyObject *v = PyTuple_GetItem(consts, i); - if (!PyString_Check(v)) + if (!PyUnicode_Check(v)) continue; - if (!all_name_chars((unsigned char *)PyString_AS_STRING(v))) + if (!all_name_chars(PyUnicode_AS_UNICODE(v))) continue; - PyString_InternInPlace(&PyTuple_GET_ITEM(consts, i)); + PyUnicode_InternInPlace(&PyTuple_GET_ITEM(consts, i)); } co = PyObject_NEW(PyCodeObject, &PyCode_Type); if (co != NULL) { @@ -202,7 +204,7 @@ code_new(PyTypeObject *type, PyObject *args, PyObject *kw) int firstlineno; PyObject *lnotab; - if (!PyArg_ParseTuple(args, "iiiiiSO!O!O!SSiS|O!O!:code", + if (!PyArg_ParseTuple(args, "iiiiiSO!O!O!UUiS|O!O!:code", &argcount, &kwonlyargcount, &nlocals, &stacksize, &flags, &code, diff --git a/Objects/exceptions.c b/Objects/exceptions.c index abe4bde..6ef765b 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -1045,14 +1045,14 @@ SimpleExtendsException(PyExc_ValueError, UnicodeError, "Unicode related error."); static PyObject * -get_bytes(PyObject *attr, const char *name) +get_string(PyObject *attr, const char *name) { if (!attr) { PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name); return NULL; } - if (!PyBytes_Check(attr)) { + if (!PyString_Check(attr)) { PyErr_Format(PyExc_TypeError, "%.200s attribute must be bytes", name); return NULL; } @@ -1109,7 +1109,7 @@ PyUnicodeEncodeError_GetObject(PyObject *exc) PyObject * PyUnicodeDecodeError_GetObject(PyObject *exc) { - return get_bytes(((PyUnicodeErrorObject *)exc)->object, "object"); + return get_string(((PyUnicodeErrorObject *)exc)->object, "object"); } PyObject * @@ -1141,10 +1141,10 @@ int PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start) { Py_ssize_t size; - PyObject *obj = get_bytes(((PyUnicodeErrorObject *)exc)->object, "object"); + PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object, "object"); if (!obj) return -1; - size = PyBytes_GET_SIZE(obj); + size = PyString_GET_SIZE(obj); *start = ((PyUnicodeErrorObject *)exc)->start; if (*start<0) *start = 0; @@ -1209,10 +1209,10 @@ int PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end) { Py_ssize_t size; - PyObject *obj = get_bytes(((PyUnicodeErrorObject *)exc)->object, "object"); + PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object, "object"); if (!obj) return -1; - size = PyBytes_GET_SIZE(obj); + size = PyString_GET_SIZE(obj); *end = ((PyUnicodeErrorObject *)exc)->end; if (*end<1) *end = 1; @@ -1299,31 +1299,6 @@ PyUnicodeTranslateError_SetReason(PyObject *exc, const char *reason) static int -UnicodeError_init(PyUnicodeErrorObject *self, PyObject *args, PyObject *kwds, - PyTypeObject *objecttype) -{ - Py_CLEAR(self->encoding); - Py_CLEAR(self->object); - Py_CLEAR(self->reason); - - if (!PyArg_ParseTuple(args, "O!O!nnO!", - &PyUnicode_Type, &self->encoding, - objecttype, &self->object, - &self->start, - &self->end, - &PyUnicode_Type, &self->reason)) { - self->encoding = self->object = self->reason = NULL; - return -1; - } - - Py_INCREF(self->encoding); - Py_INCREF(self->object); - Py_INCREF(self->reason); - - return 0; -} - -static int UnicodeError_clear(PyUnicodeErrorObject *self) { Py_CLEAR(self->encoding); @@ -1371,10 +1346,32 @@ static PyMemberDef UnicodeError_members[] = { static int UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds) { + PyUnicodeErrorObject *err; + if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) return -1; - return UnicodeError_init((PyUnicodeErrorObject *)self, args, - kwds, &PyUnicode_Type); + + err = (PyUnicodeErrorObject *)self; + + Py_CLEAR(err->encoding); + Py_CLEAR(err->object); + Py_CLEAR(err->reason); + + if (!PyArg_ParseTuple(args, "O!O!nnO!", + &PyUnicode_Type, &err->encoding, + &PyUnicode_Type, &err->object, + &err->start, + &err->end, + &PyUnicode_Type, &err->reason)) { + err->encoding = err->object = err->reason = NULL; + return -1; + } + + Py_INCREF(err->encoding); + Py_INCREF(err->object); + Py_INCREF(err->reason); + + return 0; } static PyObject * @@ -1439,10 +1436,44 @@ PyUnicodeEncodeError_Create( static int UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds) { + PyUnicodeErrorObject *ude; + const char *data; + Py_ssize_t size; + if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) return -1; - return UnicodeError_init((PyUnicodeErrorObject *)self, args, - kwds, &PyBytes_Type); + + ude = (PyUnicodeErrorObject *)self; + + Py_CLEAR(ude->encoding); + Py_CLEAR(ude->object); + Py_CLEAR(ude->reason); + + if (!PyArg_ParseTuple(args, "O!OnnO!", + &PyUnicode_Type, &ude->encoding, + &ude->object, + &ude->start, + &ude->end, + &PyUnicode_Type, &ude->reason)) { + ude->encoding = ude->object = ude->reason = NULL; + return -1; + } + + if (!PyString_Check(ude->object)) { + if (PyObject_AsReadBuffer(ude->object, (const void **)&data, &size)) { + ude->encoding = ude->object = ude->reason = NULL; + return -1; + } + ude->object = PyString_FromStringAndSize(data, size); + } + else { + Py_INCREF(ude->object); + } + + Py_INCREF(ude->encoding); + Py_INCREF(ude->reason); + + return 0; } static PyObject * @@ -1451,7 +1482,7 @@ UnicodeDecodeError_str(PyObject *self) PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self; if (uself->end==uself->start+1) { - int byte = (int)(PyBytes_AS_STRING(((PyUnicodeErrorObject *)self)->object)[uself->start]&0xff); + int byte = (int)(PyString_AS_STRING(((PyUnicodeErrorObject *)self)->object)[uself->start]&0xff); return PyUnicode_FromFormat( "'%U' codec can't decode byte 0x%02x in position %zd: %U", ((PyUnicodeErrorObject *)self)->encoding, @@ -1709,6 +1740,14 @@ SimpleExtendsException(PyExc_Warning, UnicodeWarning, "Base class for warnings about Unicode related problems, mostly\n" "related to conversion problems."); +/* + * BytesWarning extends Warning + */ +SimpleExtendsException(PyExc_Warning, BytesWarning, + "Base class for warnings about bytes and buffer related problems, mostly\n" + "related to conversion from str or comparing to str."); + + /* Pre-computed MemoryError instance. Best to create this as early as * possible and not wait until a MemoryError is actually raised! @@ -1808,6 +1847,7 @@ _PyExc_Init(void) PRE_INIT(FutureWarning) PRE_INIT(ImportWarning) PRE_INIT(UnicodeWarning) + PRE_INIT(BytesWarning) bltinmod = PyImport_ImportModule("__builtin__"); if (bltinmod == NULL) @@ -1868,6 +1908,7 @@ _PyExc_Init(void) POST_INIT(FutureWarning) POST_INIT(ImportWarning) POST_INIT(UnicodeWarning) + POST_INIT(BytesWarning) PyExc_MemoryErrorInst = BaseException_new(&_PyExc_MemoryError, NULL, NULL); if (!PyExc_MemoryErrorInst) diff --git a/Objects/fileobject.c b/Objects/fileobject.c index 97c2756..c6c7d8e 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -146,7 +146,7 @@ PyFile_WriteObject(PyObject *v, PyObject *f, int flags) if (writer == NULL) return -1; if (flags & Py_PRINT_RAW) { - value = _PyObject_Str(v); + value = PyObject_Str(v); } else value = PyObject_Repr(v); diff --git a/Objects/longobject.c b/Objects/longobject.c index 8ebc31c..d827e7e 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -3462,14 +3462,22 @@ long_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return PyLong_FromLong(0L); if (base == -909) return PyNumber_Long(x); - else if (PyBytes_Check(x)) { + else if (PyUnicode_Check(x)) + return PyLong_FromUnicode(PyUnicode_AS_UNICODE(x), + PyUnicode_GET_SIZE(x), + base); + else if (PyBytes_Check(x) || PyString_Check(x)) { /* Since PyLong_FromString doesn't have a length parameter, * check here for possible NULs in the string. */ - char *string = PyBytes_AS_STRING(x); - int size = PyBytes_GET_SIZE(x); + char *string; + int size = Py_Size(x); + if (PyBytes_Check(x)) + string = PyBytes_AS_STRING(x); + else + string = PyString_AS_STRING(x); if (strlen(string) != size) { /* We only see this if there's a null byte in x, - x is a str8 or a bytes, *and* a base is given. */ + x is a bytes or buffer, *and* a base is given. */ PyErr_Format(PyExc_ValueError, "invalid literal for int() with base %d: %R", base, x); @@ -3477,10 +3485,6 @@ long_new(PyTypeObject *type, PyObject *args, PyObject *kwds) } return PyLong_FromString(string, NULL, base); } - else if (PyUnicode_Check(x)) - return PyLong_FromUnicode(PyUnicode_AS_UNICODE(x), - PyUnicode_GET_SIZE(x), - base); else { PyErr_SetString(PyExc_TypeError, "int() can't convert non-string with explicit base"); diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index 13c1ab4..b8b2b8e 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -151,7 +151,7 @@ module_init(PyModuleObject *m, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"name", "doc", NULL}; PyObject *dict, *name = Py_None, *doc = Py_None; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "S|O:module.__init__", + if (!PyArg_ParseTupleAndKeywords(args, kwds, "U|O:module.__init__", kwlist, &name, &doc)) return -1; dict = m->md_dict; diff --git a/Objects/object.c b/Objects/object.c index 40b8b42..df93a19 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -372,50 +372,34 @@ PyObject_Repr(PyObject *v) #endif if (v == NULL) return PyUnicode_FromString("<NULL>"); - else if (Py_Type(v)->tp_repr == NULL) - return PyUnicode_FromFormat("<%s object at %p>", v->ob_type->tp_name, v); - else { - res = (*v->ob_type->tp_repr)(v); - if (res != NULL && !PyUnicode_Check(res)) { - PyErr_Format(PyExc_TypeError, - "__repr__ returned non-string (type %.200s)", - res->ob_type->tp_name); - Py_DECREF(res); - return NULL; - } - return res; - } -} - -PyObject * -PyObject_ReprStr8(PyObject *v) -{ - PyObject *resu = PyObject_Repr(v); - if (resu) { - PyObject *resb = PyUnicode_AsEncodedString(resu, NULL, NULL); - Py_DECREF(resu); - if (resb) { - PyObject *ress = PyString_FromStringAndSize( - PyBytes_AS_STRING(resb), - PyBytes_GET_SIZE(resb) - ); - Py_DECREF(resb); - return ress; - } - } - return NULL; + if (Py_Type(v)->tp_repr == NULL) + return PyUnicode_FromFormat("<%s object at %p>", + v->ob_type->tp_name, v); + res = (*v->ob_type->tp_repr)(v); + if (res != NULL && !PyUnicode_Check(res)) { + PyErr_Format(PyExc_TypeError, + "__repr__ returned non-string (type %.200s)", + res->ob_type->tp_name); + Py_DECREF(res); + return NULL; + } + return res; } PyObject * -_PyObject_Str(PyObject *v) +PyObject_Str(PyObject *v) { PyObject *res; + if (PyErr_CheckSignals()) + return NULL; +#ifdef USE_STACKCHECK + if (PyOS_CheckStack()) { + PyErr_SetString(PyExc_MemoryError, "stack overflow"); + return NULL; + } +#endif if (v == NULL) return PyUnicode_FromString("<NULL>"); - if (PyString_CheckExact(v)) { - Py_INCREF(v); - return v; - } if (PyUnicode_CheckExact(v)) { Py_INCREF(v); return v; @@ -431,7 +415,7 @@ _PyObject_Str(PyObject *v) Py_LeaveRecursiveCall(); if (res == NULL) return NULL; - if (!(PyString_Check(res) || PyUnicode_Check(res))) { + if (!PyUnicode_Check(res)) { PyErr_Format(PyExc_TypeError, "__str__ returned non-string (type %.200s)", Py_Type(res)->tp_name); @@ -441,90 +425,12 @@ _PyObject_Str(PyObject *v) return res; } -PyObject * -PyObject_Str(PyObject *v) -{ - PyObject *res = _PyObject_Str(v); - if (res == NULL) - return NULL; - if (PyUnicode_Check(res)) { - PyObject* str; - str = _PyUnicode_AsDefaultEncodedString(res, NULL); - Py_XINCREF(str); - Py_DECREF(res); - if (str) - res = str; - else - return NULL; - } - assert(PyString_Check(res)); - return res; -} - -PyObject * -PyObject_Unicode(PyObject *v) -{ - PyObject *res; - PyObject *func; - PyObject *str; - static PyObject *unicodestr; - - if (v == NULL) - return PyUnicode_FromString("<NULL>"); - else if (PyUnicode_CheckExact(v)) { - Py_INCREF(v); - return v; - } - /* XXX As soon as we have a tp_unicode slot, we should - check this before trying the __unicode__ - method. */ - if (unicodestr == NULL) { - unicodestr= PyUnicode_InternFromString("__unicode__"); - if (unicodestr == NULL) - return NULL; - } - func = PyObject_GetAttr(v, unicodestr); - if (func != NULL) { - res = PyEval_CallObject(func, (PyObject *)NULL); - Py_DECREF(func); - } - else { - PyErr_Clear(); - if (PyUnicode_Check(v) && - v->ob_type->tp_str == PyUnicode_Type.tp_str) { - /* For a Unicode subtype that's didn't overwrite - __unicode__ or __str__, - return a true Unicode object with the same data. */ - return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(v), - PyUnicode_GET_SIZE(v)); - } - if (PyString_CheckExact(v)) { - Py_INCREF(v); - res = v; - } - else { - if (Py_Type(v)->tp_str != NULL) - res = (*Py_Type(v)->tp_str)(v); - else - res = PyObject_Repr(v); - } - } - if (res == NULL) - return NULL; - if (!PyUnicode_Check(res)) { - str = PyUnicode_FromEncodedObject(res, NULL, "strict"); - Py_DECREF(res); - res = str; - } - return res; -} - /* The new comparison philosophy is: we completely separate three-way comparison from rich comparison. That is, PyObject_Compare() and PyObject_Cmp() *just* use the tp_compare slot. And PyObject_RichCompare() and PyObject_RichCompareBool() *just* use the tp_richcompare slot. - + See (*) below for practical amendments. IOW, only cmp() uses tp_compare; the comparison operators (==, !=, <=, <, @@ -580,7 +486,7 @@ do_compare(PyObject *v, PyObject *w) cmpfunc f; int ok; - if (v->ob_type == w->ob_type && + if (v->ob_type == w->ob_type && (f = v->ob_type->tp_compare) != NULL) { return (*f)(v, w); } @@ -738,25 +644,25 @@ Py_CmpToRich(int op, int cmp) return NULL; switch (op) { case Py_LT: - ok = cmp < 0; + ok = cmp < 0; break; case Py_LE: - ok = cmp <= 0; + ok = cmp <= 0; break; case Py_EQ: - ok = cmp == 0; + ok = cmp == 0; break; case Py_NE: - ok = cmp != 0; + ok = cmp != 0; break; - case Py_GT: - ok = cmp > 0; + case Py_GT: + ok = cmp > 0; break; case Py_GE: - ok = cmp >= 0; + ok = cmp >= 0; break; default: - PyErr_BadArgument(); + PyErr_BadArgument(); return NULL; } res = ok ? Py_True : Py_False; @@ -1335,10 +1241,10 @@ _dir_locals(void) } /* Helper for PyObject_Dir of type objects: returns __dict__ and __bases__. - We deliberately don't suck up its __class__, as methods belonging to the - metaclass would probably be more confusing than helpful. + We deliberately don't suck up its __class__, as methods belonging to the + metaclass would probably be more confusing than helpful. */ -static PyObject * +static PyObject * _specialized_dir_type(PyObject *obj) { PyObject *result = NULL; @@ -1381,7 +1287,7 @@ _generic_dir(PyObject *obj) PyObject *result = NULL; PyObject *dict = NULL; PyObject *itsclass = NULL; - + /* Get __dict__ (which may or may not be a real dict...) */ dict = PyObject_GetAttrString(obj, "__dict__"); if (dict == NULL) { @@ -1486,7 +1392,7 @@ PyObject_Dir(PyObject *obj) Py_DECREF(result); result = NULL; } - + return result; } diff --git a/Objects/stringlib/transmogrify.h b/Objects/stringlib/transmogrify.h index 1ee8e75..fe478c3 100644 --- a/Objects/stringlib/transmogrify.h +++ b/Objects/stringlib/transmogrify.h @@ -12,7 +12,7 @@ shared code in bytes_methods.c to cut down on duplicate code bloat. */ PyDoc_STRVAR(expandtabs__doc__, -"B.expandtabs([tabsize]) -> modified copy of B\n\ +"B.expandtabs([tabsize]) -> copy of B\n\ \n\ Return a copy of B where all tab characters are expanded using spaces.\n\ If tabsize is not given, a tab size of 8 characters is assumed."); @@ -133,7 +133,7 @@ pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill) } PyDoc_STRVAR(ljust__doc__, -"B.ljust(width[, fillchar]) -> modified copy of B\n" +"B.ljust(width[, fillchar]) -> copy of B\n" "\n" "Return B left justified in a string of length width. Padding is\n" "done using the specified fill character (default is a space)."); @@ -163,7 +163,7 @@ stringlib_ljust(PyObject *self, PyObject *args) PyDoc_STRVAR(rjust__doc__, -"B.rjust(width[, fillchar]) -> modified copy of B\n" +"B.rjust(width[, fillchar]) -> copy of B\n" "\n" "Return B right justified in a string of length width. Padding is\n" "done using the specified fill character (default is a space)"); @@ -193,10 +193,10 @@ stringlib_rjust(PyObject *self, PyObject *args) PyDoc_STRVAR(center__doc__, -"B.center(width[, fillchar]) -> modified copy of B\n" +"B.center(width[, fillchar]) -> copy of B\n" "\n" -"Return B centered in a string of length width. Padding is\n" -"done using the specified fill character (default is a space)"); +"Return B centered in a string of length width. Padding is\n" +"done using the specified fill character (default is a space)."); static PyObject * stringlib_center(PyObject *self, PyObject *args) @@ -226,7 +226,7 @@ stringlib_center(PyObject *self, PyObject *args) } PyDoc_STRVAR(zfill__doc__, -"B.zfill(width) -> modified copy of B\n" +"B.zfill(width) -> copy of B\n" "\n" "Pad a numeric string B with zeros on the left, to fill a field\n" "of the specified width. B is never truncated."); diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 3dd1051..8761477 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -1,11 +1,32 @@ /* String object implementation */ +/* XXX This is now called 'bytes' as far as the user is concerned. + Many docstrings and error messages need to be cleaned up. */ + #define PY_SSIZE_T_CLEAN #include "Python.h" #include "bytes_methods.h" +static Py_ssize_t +_getbuffer(PyObject *obj, Py_buffer *view) +{ + PyBufferProcs *buffer = Py_Type(obj)->tp_as_buffer; + + if (buffer == NULL || buffer->bf_getbuffer == NULL) + { + PyErr_Format(PyExc_TypeError, + "Type %.100s doesn't support the buffer API", + Py_Type(obj)->tp_name); + return -1; + } + + if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0) + return -1; + return view->len; +} + #ifdef COUNT_ALLOCS int null_strings, one_strings; #endif @@ -13,16 +34,6 @@ int null_strings, one_strings; static PyStringObject *characters[UCHAR_MAX + 1]; static PyStringObject *nullstring; -/* This dictionary holds all interned strings. Note that references to - strings in this dictionary are *not* counted in the string's ob_refcnt. - When the interned string reaches a refcnt of 0 the string deallocation - function will delete the reference from this dictionary. - - Another way to look at this is that to say that the actual reference - count of a string is: s->ob_refcnt + (s->ob_sstate?2:0) -*/ -static PyObject *interned; - /* For both PyString_FromString() and PyString_FromStringAndSize(), the parameter `size' denotes number of characters to allocate, not counting any @@ -77,21 +88,14 @@ PyString_FromStringAndSize(const char *str, Py_ssize_t size) return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; - op->ob_sstate = SSTATE_NOT_INTERNED; if (str != NULL) Py_MEMCPY(op->ob_sval, str, size); op->ob_sval[size] = '\0'; /* share short strings */ if (size == 0) { - PyObject *t = (PyObject *)op; - PyString_InternInPlace(&t); - op = (PyStringObject *)t; nullstring = op; Py_INCREF(op); } else if (size == 1 && str != NULL) { - PyObject *t = (PyObject *)op; - PyString_InternInPlace(&t); - op = (PyStringObject *)t; characters[*str & UCHAR_MAX] = op; Py_INCREF(op); } @@ -132,19 +136,12 @@ PyString_FromString(const char *str) return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; - op->ob_sstate = SSTATE_NOT_INTERNED; Py_MEMCPY(op->ob_sval, str, size+1); /* share short strings */ if (size == 0) { - PyObject *t = (PyObject *)op; - PyString_InternInPlace(&t); - op = (PyStringObject *)t; nullstring = op; Py_INCREF(op); } else if (size == 1) { - PyObject *t = (PyObject *)op; - PyString_InternInPlace(&t); - op = (PyStringObject *)t; characters[*str & UCHAR_MAX] = op; Py_INCREF(op); } @@ -351,174 +348,9 @@ PyString_FromFormat(const char *format, ...) return ret; } - -PyObject *PyString_Decode(const char *s, - Py_ssize_t size, - const char *encoding, - const char *errors) -{ - PyObject *v, *str; - - str = PyString_FromStringAndSize(s, size); - if (str == NULL) - return NULL; - v = PyString_AsDecodedString(str, encoding, errors); - Py_DECREF(str); - return v; -} - -PyObject *PyString_AsDecodedObject(PyObject *str, - const char *encoding, - const char *errors) -{ - PyObject *v; - - if (!PyString_Check(str)) { - PyErr_BadArgument(); - goto onError; - } - - if (encoding == NULL) { - encoding = PyUnicode_GetDefaultEncoding(); - } - - /* Decode via the codec registry */ - v = PyCodec_Decode(str, encoding, errors); - if (v == NULL) - goto onError; - - return v; - - onError: - return NULL; -} - -PyObject *PyString_AsDecodedString(PyObject *str, - const char *encoding, - const char *errors) -{ - PyObject *v; - - v = PyString_AsDecodedObject(str, encoding, errors); - if (v == NULL) - goto onError; - - /* Convert Unicode to a string using the default encoding */ - if (PyUnicode_Check(v)) { - PyObject *temp = v; - v = PyUnicode_AsEncodedString(v, NULL, NULL); - Py_DECREF(temp); - if (v == NULL) - goto onError; - } - if (!PyString_Check(v)) { - PyErr_Format(PyExc_TypeError, - "decoder did not return a string object (type=%.400s)", - Py_Type(v)->tp_name); - Py_DECREF(v); - goto onError; - } - - return v; - - onError: - return NULL; -} - -PyObject *PyString_Encode(const char *s, - Py_ssize_t size, - const char *encoding, - const char *errors) -{ - PyObject *v, *str; - - str = PyString_FromStringAndSize(s, size); - if (str == NULL) - return NULL; - v = PyString_AsEncodedString(str, encoding, errors); - Py_DECREF(str); - return v; -} - -PyObject *PyString_AsEncodedObject(PyObject *str, - const char *encoding, - const char *errors) -{ - PyObject *v; - - if (!PyString_Check(str)) { - PyErr_BadArgument(); - goto onError; - } - - if (encoding == NULL) { - encoding = PyUnicode_GetDefaultEncoding(); - } - - /* Encode via the codec registry */ - v = PyCodec_Encode(str, encoding, errors); - if (v == NULL) - goto onError; - - return v; - - onError: - return NULL; -} - -PyObject *PyString_AsEncodedString(PyObject *str, - const char *encoding, - const char *errors) -{ - PyObject *v; - - v = PyString_AsEncodedObject(str, encoding, errors); - if (v == NULL) - goto onError; - - /* Convert Unicode to a string using the default encoding */ - if (PyUnicode_Check(v)) { - PyObject *temp = v; - v = PyUnicode_AsEncodedString(v, NULL, NULL); - Py_DECREF(temp); - if (v == NULL) - goto onError; - } - if (!PyString_Check(v)) { - PyErr_Format(PyExc_TypeError, - "encoder did not return a string object (type=%.400s)", - Py_Type(v)->tp_name); - Py_DECREF(v); - goto onError; - } - - return v; - - onError: - return NULL; -} - static void string_dealloc(PyObject *op) { - switch (PyString_CHECK_INTERNED(op)) { - case SSTATE_NOT_INTERNED: - break; - - case SSTATE_INTERNED_MORTAL: - /* revive dead object temporarily for DelItem */ - Py_Refcnt(op) = 3; - if (PyDict_DelItem(interned, op) != 0) - Py_FatalError( - "deletion of interned string failed"); - break; - - case SSTATE_INTERNED_IMMORTAL: - Py_FatalError("Immortal interned string died."); - - default: - Py_FatalError("Inconsistent interned string state."); - } Py_Type(op)->tp_free(op); } @@ -577,7 +409,7 @@ PyObject *PyString_DecodeEscape(const char *s, continue; } s++; - if (s==end) { + if (s==end) { PyErr_SetString(PyExc_ValueError, "Trailing \\ in string"); goto failed; @@ -639,8 +471,8 @@ PyObject *PyString_DecodeEscape(const char *s, /* do nothing */; else { PyErr_Format(PyExc_ValueError, - "decoding error; " - "unknown error handling code: %.400s", + "decoding error; unknown " + "error handling code: %.400s", errors); goto failed; } @@ -665,8 +497,8 @@ PyObject *PyString_DecodeEscape(const char *s, static Py_ssize_t string_getsize(register PyObject *op) { - char *s; - Py_ssize_t len; + char *s; + Py_ssize_t len; if (PyString_AsStringAndSize(op, &s, &len)) return -1; return len; @@ -675,8 +507,8 @@ string_getsize(register PyObject *op) static /*const*/ char * string_getbuffer(register PyObject *op) { - char *s; - Py_ssize_t len; + char *s; + Py_ssize_t len; if (PyString_AsStringAndSize(op, &s, &len)) return NULL; return s; @@ -753,7 +585,7 @@ PyString_AsStringAndSize(register PyObject *obj, #define STRINGLIB_LEN PyString_GET_SIZE #define STRINGLIB_NEW PyString_FromStringAndSize #define STRINGLIB_STR PyString_AS_STRING -#define STRINGLIB_WANT_CONTAINS_OBJ 1 +/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */ #define STRINGLIB_EMPTY nullstring #define STRINGLIB_CHECK_EXACT PyString_CheckExact @@ -773,12 +605,12 @@ PyString_Repr(PyObject *obj, int smartquotes) { static const char *hexdigits = "0123456789abcdef"; register PyStringObject* op = (PyStringObject*) obj; - Py_ssize_t length = PyString_GET_SIZE(op); - size_t newsize = 3 + 4 * Py_Size(op); + Py_ssize_t length = Py_Size(op); + size_t newsize = 3 + 4 * length; PyObject *v; - if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(op)) { + if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) { PyErr_SetString(PyExc_OverflowError, - "string is too large to make repr"); + "bytes object is too large to make repr"); } v = PyUnicode_FromUnicode(NULL, newsize); if (v == NULL) { @@ -790,14 +622,14 @@ PyString_Repr(PyObject *obj, int smartquotes) register Py_UNICODE *p = PyUnicode_AS_UNICODE(v); int quote; - /* figure out which quote to use; single is preferred */ + /* Figure out which quote to use; single is preferred */ quote = '\''; if (smartquotes) { char *test, *start; start = PyString_AS_STRING(op); for (test = start; test < start+length; ++test) { if (*test == '"') { - quote = '\''; /* switch back to single quote */ + quote = '\''; /* back to single */ goto decided; } else if (*test == '\'') @@ -807,8 +639,8 @@ PyString_Repr(PyObject *obj, int smartquotes) ; } - *p++ = 's', *p++ = quote; - for (i = 0; i < Py_Size(op); i++) { + *p++ = 'b', *p++ = quote; + for (i = 0; i < length; i++) { /* There's at least enough room for a hex escape and a closing quote. */ assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5); @@ -848,18 +680,14 @@ string_repr(PyObject *op) } static PyObject * -string_str(PyObject *s) +string_str(PyObject *op) { - assert(PyString_Check(s)); - if (PyString_CheckExact(s)) { - Py_INCREF(s); - return s; - } - else { - /* Subtype -- return genuine string with the same value. */ - PyStringObject *t = (PyStringObject *) s; - return PyString_FromStringAndSize(t->ob_sval, Py_Size(t)); + if (Py_BytesWarningFlag) { + if (PyErr_WarnEx(PyExc_BytesWarning, + "str() on a bytes instance", 1)) + return NULL; } + return string_repr(op); } static Py_ssize_t @@ -868,51 +696,53 @@ string_length(PyStringObject *a) return Py_Size(a); } +/* This is also used by PyString_Concat() */ static PyObject * -string_concat(register PyStringObject *a, register PyObject *bb) +string_concat(PyObject *a, PyObject *b) { - register Py_ssize_t size; - register PyStringObject *op; - if (!PyString_Check(bb)) { - if (PyUnicode_Check(bb)) - return PyUnicode_Concat((PyObject *)a, bb); - if (PyBytes_Check(bb)) - return PyBytes_Concat((PyObject *)a, bb); - PyErr_Format(PyExc_TypeError, - "cannot concatenate 'str8' and '%.200s' objects", - Py_Type(bb)->tp_name); - return NULL; + Py_ssize_t size; + Py_buffer va, vb; + PyObject *result = NULL; + + va.len = -1; + vb.len = -1; + if (_getbuffer(a, &va) < 0 || + _getbuffer(b, &vb) < 0) { + PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s", + Py_Type(a)->tp_name, Py_Type(b)->tp_name); + goto done; } -#define b ((PyStringObject *)bb) - /* Optimize cases with empty left or right operand */ - if ((Py_Size(a) == 0 || Py_Size(b) == 0) && - PyString_CheckExact(a) && PyString_CheckExact(b)) { - if (Py_Size(a) == 0) { - Py_INCREF(bb); - return bb; - } - Py_INCREF(a); - return (PyObject *)a; + + /* Optimize end cases */ + if (va.len == 0 && PyString_CheckExact(b)) { + result = b; + Py_INCREF(result); + goto done; + } + if (vb.len == 0 && PyString_CheckExact(a)) { + result = a; + Py_INCREF(result); + goto done; } - size = Py_Size(a) + Py_Size(b); + + size = va.len + vb.len; if (size < 0) { - PyErr_SetString(PyExc_OverflowError, - "strings are too large to concat"); - return NULL; + PyErr_NoMemory(); + goto done; } - /* Inline PyObject_NewVar */ - op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size); - if (op == NULL) - return PyErr_NoMemory(); - PyObject_INIT_VAR(op, &PyString_Type, size); - op->ob_shash = -1; - op->ob_sstate = SSTATE_NOT_INTERNED; - Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a)); - Py_MEMCPY(op->ob_sval + Py_Size(a), b->ob_sval, Py_Size(b)); - op->ob_sval[size] = '\0'; - return (PyObject *) op; -#undef b + result = PyString_FromStringAndSize(NULL, size); + if (result != NULL) { + memcpy(PyString_AS_STRING(result), va.buf, va.len); + memcpy(PyString_AS_STRING(result) + va.len, vb.buf, vb.len); + } + + done: + if (va.len != -1) + PyObject_ReleaseBuffer(a, &va); + if (vb.len != -1) + PyObject_ReleaseBuffer(b, &vb); + return result; } static PyObject * @@ -950,7 +780,6 @@ string_repeat(register PyStringObject *a, register Py_ssize_t n) return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; - op->ob_sstate = SSTATE_NOT_INTERNED; op->ob_sval[size] = '\0'; if (Py_Size(a) == 1 && n > 0) { memset(op->ob_sval, a->ob_sval[0] , n); @@ -970,20 +799,36 @@ string_repeat(register PyStringObject *a, register Py_ssize_t n) } static int -string_contains(PyObject *str_obj, PyObject *sub_obj) +string_contains(PyObject *self, PyObject *arg) +{ + Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError); + if (ival == -1 && PyErr_Occurred()) { + Py_buffer varg; + int pos; + PyErr_Clear(); + if (_getbuffer(arg, &varg) < 0) + return -1; + pos = stringlib_find(PyString_AS_STRING(self), Py_Size(self), + varg.buf, varg.len, 0); + PyObject_ReleaseBuffer(arg, &varg); + return pos >= 0; + } + if (ival < 0 || ival >= 256) { + PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)"); + return -1; + } + + return memchr(PyString_AS_STRING(self), ival, Py_Size(self)) != NULL; +} + +static PyObject * +string_item(PyStringObject *a, register Py_ssize_t i) { - if (!PyString_CheckExact(sub_obj)) { - if (PyUnicode_Check(sub_obj)) - return PyUnicode_Contains(str_obj, sub_obj); - if (!PyString_Check(sub_obj)) { - PyErr_Format(PyExc_TypeError, - "'in <string>' requires string as left operand, " - "not %.200s", Py_Type(sub_obj)->tp_name); - return -1; - } + if (i < 0 || i >= Py_Size(a)) { + PyErr_SetString(PyExc_IndexError, "string index out of range"); + return NULL; } - - return stringlib_contains_obj(str_obj, sub_obj); + return PyInt_FromLong((unsigned char)a->ob_sval[i]); } static PyObject* @@ -996,6 +841,15 @@ string_richcompare(PyStringObject *a, PyStringObject *b, int op) /* Make sure both arguments are strings. */ if (!(PyString_Check(a) && PyString_Check(b))) { + if (Py_BytesWarningFlag && (op == Py_EQ) && + (PyObject_IsInstance((PyObject*)a, + (PyObject*)&PyUnicode_Type) || + PyObject_IsInstance((PyObject*)b, + (PyObject*)&PyUnicode_Type))) { + if (PyErr_WarnEx(PyExc_BytesWarning, + "Comparsion between bytes and string", 1)) + return NULL; + } result = Py_NotImplemented; goto out; } @@ -1053,9 +907,9 @@ _PyString_Eq(PyObject *o1, PyObject *o2) { PyStringObject *a = (PyStringObject*) o1; PyStringObject *b = (PyStringObject*) o2; - return Py_Size(a) == Py_Size(b) - && *a->ob_sval == *b->ob_sval - && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0; + return Py_Size(a) == Py_Size(b) + && *a->ob_sval == *b->ob_sval + && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0; } static long @@ -1088,12 +942,12 @@ string_subscript(PyStringObject* self, PyObject* item) return NULL; if (i < 0) i += PyString_GET_SIZE(self); - if (i < 0 || i >= PyString_GET_SIZE(self)) { + if (i < 0 || i >= PyString_GET_SIZE(self)) { PyErr_SetString(PyExc_IndexError, "string index out of range"); return NULL; - } - return PyInt_FromLong((unsigned char)self->ob_sval[i]); + } + return PyInt_FromLong((unsigned char)self->ob_sval[i]); } else if (PySlice_Check(item)) { Py_ssize_t start, stop, step, slicelength, cur, i; @@ -1149,14 +1003,15 @@ string_subscript(PyStringObject* self, PyObject* item) static int string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags) { - return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_Size(self), 0, flags); + return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_Size(self), + 0, flags); } static PySequenceMethods string_as_sequence = { (lenfunc)string_length, /*sq_length*/ (binaryfunc)string_concat, /*sq_concat*/ (ssizeargfunc)string_repeat, /*sq_repeat*/ - 0, /*sq_item*/ + (ssizeargfunc)string_item, /*sq_item*/ 0, /*sq_slice*/ 0, /*sq_ass_item*/ 0, /*sq_ass_slice*/ @@ -1171,7 +1026,7 @@ static PyMappingMethods string_as_mapping = { static PyBufferProcs string_as_buffer = { (getbufferproc)string_buffer_getbuffer, - NULL, + NULL, }; @@ -1297,12 +1152,12 @@ split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount) } PyDoc_STRVAR(split__doc__, -"S.split([sep [,maxsplit]]) -> list of strings\n\ +"B.split([sep[, maxsplit]]) -> list of bytes\n\ \n\ -Return a list of the words in the string S, using sep as the\n\ -delimiter string. If maxsplit is given, at most maxsplit\n\ -splits are done. If sep is not specified or is None, any\n\ -whitespace string is a separator."); +Return a list of the sections in B, using sep as the delimiter.\n\ +If sep is not given, B is split on ASCII whitespace characters\n\ +(space, tab, return, newline, formfeed, vertical tab).\n\ +If maxsplit is given, at most maxsplit splits are done."); static PyObject * string_split(PyStringObject *self, PyObject *args) @@ -1310,6 +1165,7 @@ string_split(PyStringObject *self, PyObject *args) Py_ssize_t len = PyString_GET_SIZE(self), n, i, j; Py_ssize_t maxsplit = -1, count=0; const char *s = PyString_AS_STRING(self), *sub; + Py_buffer vsub; PyObject *list, *str, *subobj = Py_None; #ifdef USE_FAST Py_ssize_t pos; @@ -1321,25 +1177,27 @@ string_split(PyStringObject *self, PyObject *args) maxsplit = PY_SSIZE_T_MAX; if (subobj == Py_None) return split_whitespace(s, len, maxsplit); - if (PyString_Check(subobj)) { - sub = PyString_AS_STRING(subobj); - n = PyString_GET_SIZE(subobj); - } - else if (PyUnicode_Check(subobj)) - return PyUnicode_Split((PyObject *)self, subobj, maxsplit); - else if (PyObject_AsCharBuffer(subobj, &sub, &n)) + if (_getbuffer(subobj, &vsub) < 0) return NULL; + sub = vsub.buf; + n = vsub.len; if (n == 0) { PyErr_SetString(PyExc_ValueError, "empty separator"); + PyObject_ReleaseBuffer(subobj, &vsub); return NULL; } - else if (n == 1) - return split_char(s, len, sub[0], maxsplit); + else if (n == 1) { + char ch = sub[0]; + PyObject_ReleaseBuffer(subobj, &vsub); + return split_char(s, len, ch, maxsplit); + } list = PyList_New(PREALLOC_SIZE(maxsplit)); - if (list == NULL) + if (list == NULL) { + PyObject_ReleaseBuffer(subobj, &vsub); return NULL; + } #ifdef USE_FAST i = j = 0; @@ -1365,19 +1223,21 @@ string_split(PyStringObject *self, PyObject *args) #endif SPLIT_ADD(s, i, len); FIX_PREALLOC_SIZE(list); + PyObject_ReleaseBuffer(subobj, &vsub); return list; onError: Py_DECREF(list); + PyObject_ReleaseBuffer(subobj, &vsub); return NULL; } PyDoc_STRVAR(partition__doc__, -"S.partition(sep) -> (head, sep, tail)\n\ +"B.partition(sep) -> (head, sep, tail)\n\ \n\ -Searches for the separator sep in S, and returns the part before it,\n\ +Searches for the separator sep in B, and returns the part before it,\n\ the separator itself, and the part after it. If the separator is not\n\ -found, returns S and two empty strings."); +found, returns B and two empty bytes objects."); static PyObject * string_partition(PyStringObject *self, PyObject *sep_obj) @@ -1402,11 +1262,12 @@ string_partition(PyStringObject *self, PyObject *sep_obj) } PyDoc_STRVAR(rpartition__doc__, -"S.rpartition(sep) -> (tail, sep, head)\n\ +"B.rpartition(sep) -> (tail, sep, head)\n\ \n\ -Searches for the separator sep in S, starting at the end of S, and returns\n\ -the part before it, the separator itself, and the part after it. If the\n\ -separator is not found, returns two empty strings and S."); +Searches for the separator sep in B, starting at the end of B,\n\ +and returns the part before it, the separator itself, and the\n\ +part after it. If the separator is not found, returns two empty\n\ +bytes objects and B."); static PyObject * string_rpartition(PyStringObject *self, PyObject *sep_obj) @@ -1450,8 +1311,8 @@ rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit) SPLIT_ADD(s, i + 1, j + 1); } if (i >= 0) { - /* Only occurs when maxsplit was reached */ - /* Skip any remaining whitespace and copy to beginning of string */ + /* Only occurs when maxsplit was reached. Skip any remaining + whitespace and copy to beginning of string. */ RSKIP_SPACE(s, i); if (i >= 0) SPLIT_ADD(s, 0, i + 1); @@ -1500,13 +1361,14 @@ rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount) } PyDoc_STRVAR(rsplit__doc__, -"S.rsplit([sep [,maxsplit]]) -> list of strings\n\ +"B.rsplit([sep[, maxsplit]]) -> list of strings\n\ \n\ -Return a list of the words in the string S, using sep as the\n\ -delimiter string, starting at the end of the string and working\n\ -to the front. If maxsplit is given, at most maxsplit splits are\n\ -done. If sep is not specified or is None, any whitespace string\n\ -is a separator."); +Return a list of the sections in B, using sep as the delimiter,\n\ +starting at the end of B and working to the front.\n\ +If sep is not given, B is split on ASCII whitespace characters\n\ +(space, tab, return, newline, formfeed, vertical tab).\n\ +If maxsplit is given, at most maxsplit splits are done."); + static PyObject * string_rsplit(PyStringObject *self, PyObject *args) @@ -1514,6 +1376,7 @@ string_rsplit(PyStringObject *self, PyObject *args) Py_ssize_t len = PyString_GET_SIZE(self), n, i, j; Py_ssize_t maxsplit = -1, count=0; const char *s = PyString_AS_STRING(self), *sub; + Py_buffer vsub; PyObject *list, *str, *subobj = Py_None; if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit)) @@ -1522,25 +1385,27 @@ string_rsplit(PyStringObject *self, PyObject *args) maxsplit = PY_SSIZE_T_MAX; if (subobj == Py_None) return rsplit_whitespace(s, len, maxsplit); - if (PyString_Check(subobj)) { - sub = PyString_AS_STRING(subobj); - n = PyString_GET_SIZE(subobj); - } - else if (PyUnicode_Check(subobj)) - return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit); - else if (PyObject_AsCharBuffer(subobj, &sub, &n)) + if (_getbuffer(subobj, &vsub) < 0) return NULL; + sub = vsub.buf; + n = vsub.len; if (n == 0) { PyErr_SetString(PyExc_ValueError, "empty separator"); + PyObject_ReleaseBuffer(subobj, &vsub); return NULL; } - else if (n == 1) - return rsplit_char(s, len, sub[0], maxsplit); + else if (n == 1) { + char ch = sub[0]; + PyObject_ReleaseBuffer(subobj, &vsub); + return rsplit_char(s, len, ch, maxsplit); + } list = PyList_New(PREALLOC_SIZE(maxsplit)); - if (list == NULL) + if (list == NULL) { + PyObject_ReleaseBuffer(subobj, &vsub); return NULL; + } j = len; i = j - n; @@ -1559,10 +1424,12 @@ string_rsplit(PyStringObject *self, PyObject *args) FIX_PREALLOC_SIZE(list); if (PyList_Reverse(list) < 0) goto onError; + PyObject_ReleaseBuffer(subobj, &vsub); return list; onError: Py_DECREF(list); + PyObject_ReleaseBuffer(subobj, &vsub); return NULL; } @@ -1572,13 +1439,13 @@ onError: PyDoc_STRVAR(join__doc__, -"S.join(sequence) -> string\n\ +"B.join(iterable_of_bytes) -> bytes\n\ \n\ -Return a string which is the concatenation of the strings in the\n\ -sequence. The separator between elements is S."); +Concatenates any number of bytes objects, with B in between each pair.\n\ +Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'."); static PyObject * -string_join(PyStringObject *self, PyObject *orig) +string_join(PyObject *self, PyObject *orig) { char *sep = PyString_AS_STRING(self); const Py_ssize_t seplen = PyString_GET_SIZE(self); @@ -1601,7 +1468,7 @@ string_join(PyStringObject *self, PyObject *orig) } if (seqlen == 1) { item = PySequence_Fast_GET_ITEM(seq, 0); - if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) { + if (PyString_CheckExact(item)) { Py_INCREF(item); Py_DECREF(seq); return item; @@ -1611,37 +1478,26 @@ string_join(PyStringObject *self, PyObject *orig) /* There are at least two things to join, or else we have a subclass * of the builtin types in the sequence. * Do a pre-pass to figure out the total amount of space we'll - * need (sz), see whether any argument is absurd, and defer to - * the Unicode join if appropriate. + * need (sz), and see whether all argument are bytes. */ + /* XXX Shouldn't we use _getbuffer() on these items instead? */ for (i = 0; i < seqlen; i++) { const size_t old_sz = sz; item = PySequence_Fast_GET_ITEM(seq, i); - if (!PyString_Check(item)){ - if (PyUnicode_Check(item)) { - /* Defer to Unicode join. - * CAUTION: There's no gurantee that the - * original sequence can be iterated over - * again, so we must pass seq here. - */ - PyObject *result; - result = PyUnicode_Join((PyObject *)self, seq); - Py_DECREF(seq); - return result; - } + if (!PyString_Check(item) && !PyBytes_Check(item)) { PyErr_Format(PyExc_TypeError, - "sequence item %zd: expected string," + "sequence item %zd: expected bytes," " %.80s found", i, Py_Type(item)->tp_name); Py_DECREF(seq); return NULL; } - sz += PyString_GET_SIZE(item); + sz += Py_Size(item); if (i != 0) sz += seplen; if (sz < old_sz || sz > PY_SSIZE_T_MAX) { PyErr_SetString(PyExc_OverflowError, - "join() result is too long for a Python string"); + "join() result is too long for a Python string"); Py_DECREF(seq); return NULL; } @@ -1655,17 +1511,24 @@ string_join(PyStringObject *self, PyObject *orig) } /* Catenate everything. */ + /* I'm not worried about a PyBytes item growing because there's + nowhere in this function where we release the GIL. */ p = PyString_AS_STRING(res); for (i = 0; i < seqlen; ++i) { size_t n; - item = PySequence_Fast_GET_ITEM(seq, i); - n = PyString_GET_SIZE(item); - Py_MEMCPY(p, PyString_AS_STRING(item), n); - p += n; - if (i < seqlen - 1) { + char *q; + if (i) { Py_MEMCPY(p, sep, seplen); p += seplen; } + item = PySequence_Fast_GET_ITEM(seq, i); + n = Py_Size(item); + if (PyString_Check(item)) + q = PyString_AS_STRING(item); + else + q = PyBytes_AS_STRING(item); + Py_MEMCPY(p, q, n); + p += n; } Py_DECREF(seq); @@ -1677,7 +1540,7 @@ _PyString_Join(PyObject *sep, PyObject *x) { assert(sep != NULL && PyString_Check(sep)); assert(x != NULL); - return string_join((PyStringObject *)sep, x); + return string_join(sep, x); } Py_LOCAL_INLINE(void) @@ -1730,7 +1593,7 @@ string_find_internal(PyStringObject *self, PyObject *args, int dir) PyDoc_STRVAR(find__doc__, -"S.find(sub [,start [,end]]) -> int\n\ +"B.find(sub [,start [,end]]) -> int\n\ \n\ Return the lowest index in S where substring sub is found,\n\ such that sub is contained within s[start:end]. Optional\n\ @@ -1749,9 +1612,9 @@ string_find(PyStringObject *self, PyObject *args) PyDoc_STRVAR(index__doc__, -"S.index(sub [,start [,end]]) -> int\n\ +"B.index(sub [,start [,end]]) -> int\n\ \n\ -Like S.find() but raise ValueError when the substring is not found."); +Like B.find() but raise ValueError when the substring is not found."); static PyObject * string_index(PyStringObject *self, PyObject *args) @@ -1769,9 +1632,9 @@ string_index(PyStringObject *self, PyObject *args) PyDoc_STRVAR(rfind__doc__, -"S.rfind(sub [,start [,end]]) -> int\n\ +"B.rfind(sub [,start [,end]]) -> int\n\ \n\ -Return the highest index in S where substring sub is found,\n\ +Return the highest index in B where substring sub is found,\n\ such that sub is contained within s[start:end]. Optional\n\ arguments start and end are interpreted as in slice notation.\n\ \n\ @@ -1788,9 +1651,9 @@ string_rfind(PyStringObject *self, PyObject *args) PyDoc_STRVAR(rindex__doc__, -"S.rindex(sub [,start [,end]]) -> int\n\ +"B.rindex(sub [,start [,end]]) -> int\n\ \n\ -Like S.rfind() but raise ValueError when the substring is not found."); +Like B.rfind() but raise ValueError when the substring is not found."); static PyObject * string_rindex(PyStringObject *self, PyObject *args) @@ -1810,12 +1673,18 @@ string_rindex(PyStringObject *self, PyObject *args) Py_LOCAL_INLINE(PyObject *) do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj) { + Py_buffer vsep; char *s = PyString_AS_STRING(self); Py_ssize_t len = PyString_GET_SIZE(self); - char *sep = PyString_AS_STRING(sepobj); - Py_ssize_t seplen = PyString_GET_SIZE(sepobj); + char *sep; + Py_ssize_t seplen; Py_ssize_t i, j; + if (_getbuffer(sepobj, &vsep) < 0) + return NULL; + sep = vsep.buf; + seplen = vsep.len; + i = 0; if (striptype != RIGHTSTRIP) { while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) { @@ -1831,6 +1700,8 @@ do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj) j++; } + PyObject_ReleaseBuffer(sepobj, &vsep); + if (i == 0 && j == len && PyString_CheckExact(self)) { Py_INCREF(self); return (PyObject*)self; @@ -1879,36 +1750,17 @@ do_argstrip(PyStringObject *self, int striptype, PyObject *args) return NULL; if (sep != NULL && sep != Py_None) { - if (PyString_Check(sep)) - return do_xstrip(self, striptype, sep); - else if (PyUnicode_Check(sep)) { - PyObject *uniself = PyUnicode_FromObject((PyObject *)self); - PyObject *res; - if (uniself==NULL) - return NULL; - res = _PyUnicode_XStrip((PyUnicodeObject *)uniself, - striptype, sep); - Py_DECREF(uniself); - return res; - } - PyErr_Format(PyExc_TypeError, - "%s arg must be None or string", - STRIPNAME(striptype)); - return NULL; + return do_xstrip(self, striptype, sep); } - return do_strip(self, striptype); } PyDoc_STRVAR(strip__doc__, -"S.strip([chars]) -> string\n\ +"B.strip([bytes]) -> bytes\n\ \n\ -Return a copy of the string S with leading and trailing\n\ -whitespace removed.\n\ -If chars is given and not None, remove characters in chars instead.\n\ -If chars is unicode, S will be converted to unicode before stripping"); - +Strip leading and trailing bytes contained in the argument.\n\ +If the argument is omitted, strip trailing ASCII whitespace."); static PyObject * string_strip(PyStringObject *self, PyObject *args) { @@ -1920,12 +1772,10 @@ string_strip(PyStringObject *self, PyObject *args) PyDoc_STRVAR(lstrip__doc__, -"S.lstrip([chars]) -> string\n\ +"B.lstrip([bytes]) -> bytes\n\ \n\ -Return a copy of the string S with leading whitespace removed.\n\ -If chars is given and not None, remove characters in chars instead.\n\ -If chars is unicode, S will be converted to unicode before stripping"); - +Strip leading bytes contained in the argument.\n\ +If the argument is omitted, strip leading ASCII whitespace."); static PyObject * string_lstrip(PyStringObject *self, PyObject *args) { @@ -1937,12 +1787,10 @@ string_lstrip(PyStringObject *self, PyObject *args) PyDoc_STRVAR(rstrip__doc__, -"S.rstrip([chars]) -> string\n\ +"B.rstrip([bytes]) -> bytes\n\ \n\ -Return a copy of the string S with trailing whitespace removed.\n\ -If chars is given and not None, remove characters in chars instead.\n\ -If chars is unicode, S will be converted to unicode before stripping"); - +Strip trailing bytes contained in the argument.\n\ +If the argument is omitted, strip trailing ASCII whitespace."); static PyObject * string_rstrip(PyStringObject *self, PyObject *args) { @@ -1954,7 +1802,7 @@ string_rstrip(PyStringObject *self, PyObject *args) PyDoc_STRVAR(count__doc__, -"S.count(sub[, start[, end]]) -> int\n\ +"B.count(sub [,start [,end]]) -> int\n\ \n\ Return the number of non-overlapping occurrences of substring sub in\n\ string S[start:end]. Optional arguments start and end are interpreted\n\ @@ -1996,12 +1844,12 @@ string_count(PyStringObject *self, PyObject *args) PyDoc_STRVAR(translate__doc__, -"S.translate(table [,deletechars]) -> string\n\ +"B.translate(table[, deletechars]) -> bytes\n\ \n\ -Return a copy of the string S, where all characters occurring\n\ -in the optional argument deletechars are removed, and the\n\ -remaining characters have been mapped through the given\n\ -translation table, which must be a string of length 256."); +Return a copy of B, where all characters occurring in the\n\ +optional argument deletechars are removed, and the remaining\n\ +characters have been mapped through the given translation\n\ +table, which must be a bytes object of length 256."); static PyObject * string_translate(PyStringObject *self, PyObject *args) @@ -2187,7 +2035,7 @@ findstring(const char *target, Py_ssize_t target_len, return end; } else { for (; start <= end; start++) - if (Py_STRING_MATCH(target, start, pattern, pattern_len)) + if (Py_STRING_MATCH(target, start,pattern,pattern_len)) return start; } return -1; @@ -2225,14 +2073,15 @@ countstring(const char *target, Py_ssize_t target_len, end -= pattern_len; if (direction < 0) { for (; (end >= start); end--) - if (Py_STRING_MATCH(target, end, pattern, pattern_len)) { + if (Py_STRING_MATCH(target, end,pattern,pattern_len)) { count++; if (--maxcount <= 0) break; end -= pattern_len-1; } } else { for (; (start <= end); start++) - if (Py_STRING_MATCH(target, start, pattern, pattern_len)) { + if (Py_STRING_MATCH(target, start, + pattern, pattern_len)) { count++; if (--maxcount <= 0) break; @@ -2522,12 +2371,14 @@ replace_single_character(PyStringObject *self, /* result_len = self_len + count * (to_len-1) */ product = count * (to_len-1); if (product / (to_len-1) != count) { - PyErr_SetString(PyExc_OverflowError, "replace string is too long"); + PyErr_SetString(PyExc_OverflowError, + "replace string is too long"); return NULL; } result_len = self_len + product; if (result_len < 0) { - PyErr_SetString(PyExc_OverflowError, "replace string is too long"); + PyErr_SetString(PyExc_OverflowError, + "replace string is too long"); return NULL; } @@ -2590,12 +2441,14 @@ replace_substring(PyStringObject *self, /* result_len = self_len + count * (to_len-from_len) */ product = count * (to_len-from_len); if (product / (to_len-from_len) != count) { - PyErr_SetString(PyExc_OverflowError, "replace string is too long"); + PyErr_SetString(PyExc_OverflowError, + "replace string is too long"); return NULL; } result_len = self_len + product; if (result_len < 0) { - PyErr_SetString(PyExc_OverflowError, "replace string is too long"); + PyErr_SetString(PyExc_OverflowError, + "replace string is too long"); return NULL; } @@ -2675,7 +2528,8 @@ replace(PyStringObject *self, return replace_delete_single_character( self, from_s[0], maxcount); } else { - return replace_delete_substring(self, from_s, from_len, maxcount); + return replace_delete_substring(self, from_s, + from_len, maxcount); } } @@ -2690,7 +2544,8 @@ replace(PyStringObject *self, maxcount); } else { return replace_substring_in_place( - self, from_s, from_len, to_s, to_len, maxcount); + self, from_s, from_len, to_s, to_len, + maxcount); } } @@ -2700,14 +2555,15 @@ replace(PyStringObject *self, to_s, to_len, maxcount); } else { /* len('from')>=2, len('to')>=1 */ - return replace_substring(self, from_s, from_len, to_s, to_len, maxcount); + return replace_substring(self, from_s, from_len, to_s, to_len, + maxcount); } } PyDoc_STRVAR(replace__doc__, -"S.replace (old, new[, count]) -> string\n\ +"B.replace(old, new[, count]) -> bytes\n\ \n\ -Return a copy of string S with all occurrences of substring\n\ +Return a copy of B with all occurrences of subsection\n\ old replaced by new. If the optional argument count is\n\ given, only the first count occurrences are replaced."); @@ -2794,11 +2650,11 @@ _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start, PyDoc_STRVAR(startswith__doc__, -"S.startswith(prefix[, start[, end]]) -> bool\n\ +"B.startswith(prefix [,start [,end]]) -> bool\n\ \n\ -Return True if S starts with the specified prefix, False otherwise.\n\ -With optional start, test S beginning at that position.\n\ -With optional end, stop comparing S at that position.\n\ +Return True if B starts with the specified prefix, False otherwise.\n\ +With optional start, test B beginning at that position.\n\ +With optional end, stop comparing B at that position.\n\ prefix can also be a tuple of strings to try."); static PyObject * @@ -2835,11 +2691,11 @@ string_startswith(PyStringObject *self, PyObject *args) PyDoc_STRVAR(endswith__doc__, -"S.endswith(suffix[, start[, end]]) -> bool\n\ +"B.endswith(suffix [,start [,end]]) -> bool\n\ \n\ -Return True if S ends with the specified suffix, False otherwise.\n\ -With optional start, test S beginning at that position.\n\ -With optional end, stop comparing S at that position.\n\ +Return True if B ends with the specified suffix, False otherwise.\n\ +With optional start, test B beginning at that position.\n\ +With optional end, stop comparing B at that position.\n\ suffix can also be a tuple of strings to try."); static PyObject * @@ -2876,63 +2732,50 @@ string_endswith(PyStringObject *self, PyObject *args) PyDoc_STRVAR(decode__doc__, -"S.decode([encoding[,errors]]) -> object\n\ +"B.decode([encoding[, errors]]) -> object\n\ \n\ Decodes S using the codec registered for encoding. encoding defaults\n\ to the default encoding. errors may be given to set a different error\n\ -handling scheme. Default is 'strict' meaning that encoding errors raise\n\ -a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\ +handling scheme. Default is 'strict' meaning that encoding errors raise\n\ +a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\ as well as any other name registerd with codecs.register_error that is\n\ able to handle UnicodeDecodeErrors."); static PyObject * -string_decode(PyStringObject *self, PyObject *args) +string_decode(PyObject *self, PyObject *args) { - char *encoding = NULL; - char *errors = NULL; - PyObject *v; - - if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors)) - return NULL; - v = PyString_AsDecodedObject((PyObject *)self, encoding, errors); - if (v == NULL) - goto onError; - if (!PyString_Check(v) && !PyUnicode_Check(v)) { - PyErr_Format(PyExc_TypeError, - "decoder did not return a string/unicode object " - "(type=%.400s)", - Py_Type(v)->tp_name); - Py_DECREF(v); - return NULL; - } - return v; + const char *encoding = NULL; + const char *errors = NULL; - onError: - return NULL; + if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors)) + return NULL; + if (encoding == NULL) + encoding = PyUnicode_GetDefaultEncoding(); + return PyCodec_Decode(self, encoding, errors); } PyDoc_STRVAR(fromhex_doc, -"str8.fromhex(string) -> str8\n\ +"bytes.fromhex(string) -> bytes\n\ \n\ -Create a str8 object from a string of hexadecimal numbers.\n\ -Spaces between two numbers are accepted. Example:\n\ -str8.fromhex('10 1112') -> s'\\x10\\x11\\x12'."); +Create a bytes object from a string of hexadecimal numbers.\n\ +Spaces between two numbers are accepted.\n\ +Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'."); static int hex_digit_to_int(Py_UNICODE c) { - if (c >= 128) - return -1; - if (ISDIGIT(c)) - return c - '0'; - else { - if (ISUPPER(c)) - c = TOLOWER(c); - if (c >= 'a' && c <= 'f') - return c - 'a' + 10; - } - return -1; + if (c >= 128) + return -1; + if (ISDIGIT(c)) + return c - '0'; + else { + if (ISUPPER(c)) + c = TOLOWER(c); + if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + } + return -1; } static PyObject * @@ -2975,7 +2818,7 @@ string_fromhex(PyObject *cls, PyObject *args) return newstring; error: - Py_DECREF(newstring); + Py_XDECREF(newstring); return NULL; } @@ -3058,11 +2901,11 @@ string_new(PyTypeObject *type, PyObject *args, PyObject *kwds) const char *errors = NULL; PyObject *new = NULL; Py_ssize_t i, size; - static char *kwlist[] = {"object", "encoding", "errors", 0}; + static char *kwlist[] = {"source", "encoding", "errors", 0}; if (type != &PyString_Type) return str_subtype_new(type, args, kwds); - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:str8", kwlist, &x, + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x, &encoding, &errors)) return NULL; if (x == NULL) { @@ -3085,34 +2928,37 @@ string_new(PyTypeObject *type, PyObject *args, PyObject *kwds) new = PyCodec_Encode(x, encoding, errors); if (new == NULL) return NULL; - /* XXX(gb): must accept bytes here since codecs output bytes - at the moment */ - if (PyBytes_Check(new)) { - PyObject *str; - str = PyString_FromString(PyBytes_AsString(new)); - Py_DECREF(new); - if (!str) - return NULL; - return str; - } - if (!PyString_Check(new)) { - PyErr_Format(PyExc_TypeError, - "encoder did not return a str8 " - "object (type=%.400s)", - Py_Type(new)->tp_name); - Py_DECREF(new); - return NULL; - } + assert(PyString_Check(new)); return new; } /* If it's not unicode, there can't be encoding or errors */ if (encoding != NULL || errors != NULL) { PyErr_SetString(PyExc_TypeError, - "encoding or errors without a string argument"); + "encoding or errors without a string argument"); return NULL; } + /* Is it an int? */ + size = PyNumber_AsSsize_t(x, PyExc_ValueError); + if (size == -1 && PyErr_Occurred()) { + PyErr_Clear(); + } + else { + if (size < 0) { + PyErr_SetString(PyExc_ValueError, "negative count"); + return NULL; + } + new = PyString_FromStringAndSize(NULL, size); + if (new == NULL) { + return NULL; + } + if (size > 0) { + memset(((PyStringObject*)new)->ob_sval, 0, size); + } + return new; + } + /* Use the modern buffer interface */ if (PyObject_CheckBuffer(x)) { Py_buffer view; @@ -3133,8 +2979,10 @@ string_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; } - /* For the iterator version, create a string object and resize as needed. */ - /* XXX(gb): is 64 a good value? also, optimize this if length is known */ + /* For iterator version, create a string object and resize as needed */ + /* XXX(gb): is 64 a good value? also, optimize if length is known */ + /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the + input being a truly long iterator. */ size = 64; new = PyString_FromStringAndSize(NULL, size); if (new == NULL) @@ -3158,9 +3006,9 @@ string_new(PyTypeObject *type, PyObject *args, PyObject *kwds) item = iternext(it); if (item == NULL) { if (PyErr_Occurred()) { - if (!PyErr_ExceptionMatches(PyExc_StopIteration)) - goto error; - PyErr_Clear(); + if (!PyErr_ExceptionMatches(PyExc_StopIteration)) + goto error; + PyErr_Clear(); } break; } @@ -3193,7 +3041,7 @@ string_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return new; error: - /* Error handling when it != NULL */ + /* Error handling when new != NULL */ Py_XDECREF(it); Py_DECREF(new); return NULL; @@ -3213,43 +3061,32 @@ str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) n = PyString_GET_SIZE(tmp); pnew = type->tp_alloc(type, n); if (pnew != NULL) { - Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1); + Py_MEMCPY(PyString_AS_STRING(pnew), + PyString_AS_STRING(tmp), n+1); ((PyStringObject *)pnew)->ob_shash = ((PyStringObject *)tmp)->ob_shash; - ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED; } Py_DECREF(tmp); return pnew; } -static PyObject * -string_mod(PyObject *v, PyObject *w) -{ - if (!PyString_Check(v)) { - Py_INCREF(Py_NotImplemented); - return Py_NotImplemented; - } - return PyString_Format(v, w); -} - -static PyNumberMethods string_as_number = { - 0, /*nb_add*/ - 0, /*nb_subtract*/ - 0, /*nb_multiply*/ - string_mod, /*nb_remainder*/ -}; - PyDoc_STRVAR(string_doc, -"str(object) -> string\n\ +"bytes(iterable_of_ints) -> bytes.\n\ +bytes(string, encoding[, errors]) -> bytes\n\ +bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer.\n\ +bytes(memory_view) -> bytes.\n\ \n\ -Return a nice string representation of the object.\n\ -If the argument is a string, the return value is the same object."); +Construct an immutable array of bytes from:\n\ + - an iterable yielding integers in range(256)\n\ + - a text string encoded using the specified encoding\n\ + - a bytes or a buffer object\n\ + - any object implementing the buffer API."); static PyObject *str_iter(PyObject *seq); PyTypeObject PyString_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - "str8", + "bytes", sizeof(PyStringObject), sizeof(char), string_dealloc, /* tp_dealloc */ @@ -3257,8 +3094,8 @@ PyTypeObject PyString_Type = { 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ - string_repr, /* tp_repr */ - &string_as_number, /* tp_as_number */ + (reprfunc)string_repr, /* tp_repr */ + 0, /* tp_as_number */ &string_as_sequence, /* tp_as_sequence */ &string_as_mapping, /* tp_as_mapping */ (hashfunc)string_hash, /* tp_hash */ @@ -3294,14 +3131,15 @@ void PyString_Concat(register PyObject **pv, register PyObject *w) { register PyObject *v; + assert(pv != NULL); if (*pv == NULL) return; - if (w == NULL || !PyString_Check(*pv)) { + if (w == NULL) { Py_DECREF(*pv); *pv = NULL; return; } - v = string_concat((PyStringObject *) *pv, w); + v = string_concat(*pv, w); Py_DECREF(*pv); *pv = v; } @@ -3334,8 +3172,7 @@ _PyString_Resize(PyObject **pv, Py_ssize_t newsize) register PyObject *v; register PyStringObject *sv; v = *pv; - if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0 || - PyString_CHECK_INTERNED(v)) { + if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0) { *pv = 0; Py_DECREF(v); PyErr_BadInternalCall(); @@ -3359,85 +3196,6 @@ _PyString_Resize(PyObject **pv, Py_ssize_t newsize) return 0; } -/* Helpers for formatstring */ - -Py_LOCAL_INLINE(PyObject *) -getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) -{ - Py_ssize_t argidx = *p_argidx; - if (argidx < arglen) { - (*p_argidx)++; - if (arglen < 0) - return args; - else - return PyTuple_GetItem(args, argidx); - } - PyErr_SetString(PyExc_TypeError, - "not enough arguments for format string"); - return NULL; -} - -/* Format codes - * F_LJUST '-' - * F_SIGN '+' - * F_BLANK ' ' - * F_ALT '#' - * F_ZERO '0' - */ -#define F_LJUST (1<<0) -#define F_SIGN (1<<1) -#define F_BLANK (1<<2) -#define F_ALT (1<<3) -#define F_ZERO (1<<4) - -Py_LOCAL_INLINE(int) -formatfloat(char *buf, size_t buflen, int flags, - int prec, int type, PyObject *v) -{ - /* fmt = '%#.' + `prec` + `type` - worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/ - char fmt[20]; - double x; - x = PyFloat_AsDouble(v); - if (x == -1.0 && PyErr_Occurred()) { - PyErr_Format(PyExc_TypeError, "float argument required, " - "not %.200s", Py_Type(v)->tp_name); - return -1; - } - if (prec < 0) - prec = 6; - if (type == 'f' && fabs(x)/1e25 >= 1e25) - type = 'g'; - /* Worst case length calc to ensure no buffer overrun: - - 'g' formats: - fmt = %#.<prec>g - buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp - for any double rep.) - len = 1 + prec + 1 + 2 + 5 = 9 + prec - - 'f' formats: - buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50) - len = 1 + 50 + 1 + prec = 52 + prec - - If prec=0 the effective precision is 1 (the leading digit is - always given), therefore increase the length by one. - - */ - if (((type == 'g' || type == 'G') && - buflen <= (size_t)10 + (size_t)prec) || - (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) { - PyErr_SetString(PyExc_OverflowError, - "formatted float is too long (precision too large?)"); - return -1; - } - PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c", - (flags&F_ALT) ? "#" : "", - prec, type); - PyOS_ascii_formatd(buf, buflen, fmt, x); - return (int)strlen(buf); -} - /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and * the F_ALT flag, for Python's long (unbounded) ints. It's not used for * Python's regular ints. @@ -3516,7 +3274,8 @@ _PyString_FormatLong(PyObject *val, int flags, int prec, int type, } llen = PyString_Size(result); if (llen > INT_MAX) { - PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong"); + PyErr_SetString(PyExc_ValueError, + "string too large in _PyString_FormatLong"); return NULL; } len = (int)llen; @@ -3534,7 +3293,7 @@ _PyString_FormatLong(PyObject *val, int flags, int prec, int type, (type == 'o' || type == 'x' || type == 'X'))) { assert(buf[sign] == '0'); assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' || - buf[sign+1] == 'o'); + buf[sign+1] == 'o'); numnondigits -= 2; buf += 2; len -= 2; @@ -3580,623 +3339,6 @@ _PyString_FormatLong(PyObject *val, int flags, int prec, int type, return result; } -Py_LOCAL_INLINE(int) -formatint(char *buf, size_t buflen, int flags, - int prec, int type, PyObject *v) -{ - /* fmt = '%#.' + `prec` + 'l' + `type` - worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine) - + 1 + 1 = 24 */ - char fmt[64]; /* plenty big enough! */ - char *sign; - long x; - - x = PyInt_AsLong(v); - if (x == -1 && PyErr_Occurred()) { - PyErr_Format(PyExc_TypeError, "int argument required, not %.200s", - Py_Type(v)->tp_name); - return -1; - } - if (x < 0 && type == 'u') { - type = 'd'; - } - if (x < 0 && (type == 'x' || type == 'X' || type == 'o')) - sign = "-"; - else - sign = ""; - if (prec < 0) - prec = 1; - - if ((flags & F_ALT) && - (type == 'x' || type == 'X' || type == 'o')) { - /* When converting under %#o, %#x or %#X, there are a number - * of issues that cause pain: - * - for %#o, we want a different base marker than C - * - when 0 is being converted, the C standard leaves off - * the '0x' or '0X', which is inconsistent with other - * %#x/%#X conversions and inconsistent with Python's - * hex() function - * - there are platforms that violate the standard and - * convert 0 with the '0x' or '0X' - * (Metrowerks, Compaq Tru64) - * - there are platforms that give '0x' when converting - * under %#X, but convert 0 in accordance with the - * standard (OS/2 EMX) - * - * We can achieve the desired consistency by inserting our - * own '0x' or '0X' prefix, and substituting %x/%X in place - * of %#x/%#X. - * - * Note that this is the same approach as used in - * formatint() in unicodeobject.c - */ - PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c", - sign, type, prec, type); - } - else { - PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c", - sign, (flags&F_ALT) ? "#" : "", - prec, type); - } - - /* buf = '+'/'-'/'' + '0o'/'0x'/'' + '[0-9]'*max(prec, len(x in octal)) - * worst case buf = '-0x' + [0-9]*prec, where prec >= 11 - */ - if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) { - PyErr_SetString(PyExc_OverflowError, - "formatted integer is too long (precision too large?)"); - return -1; - } - if (sign[0]) - PyOS_snprintf(buf, buflen, fmt, -x); - else - PyOS_snprintf(buf, buflen, fmt, x); - return (int)strlen(buf); -} - -Py_LOCAL_INLINE(int) -formatchar(char *buf, size_t buflen, PyObject *v) -{ - /* presume that the buffer is at least 2 characters long */ - if (PyString_Check(v)) { - if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0])) - return -1; - } - else { - if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0])) - return -1; - } - buf[1] = '\0'; - return 1; -} - -/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) - - FORMATBUFLEN is the length of the buffer in which the floats, ints, & - chars are formatted. XXX This is a magic number. Each formatting - routine does bounds checking to ensure no overflow, but a better - solution may be to malloc a buffer of appropriate size for each - format. For now, the current solution is sufficient. -*/ -#define FORMATBUFLEN (size_t)120 - -PyObject * -PyString_Format(PyObject *format, PyObject *args) -{ - char *fmt, *res; - Py_ssize_t arglen, argidx; - Py_ssize_t reslen, rescnt, fmtcnt; - int args_owned = 0; - PyObject *result, *orig_args; - PyObject *v, *w; - PyObject *dict = NULL; - if (format == NULL || !PyString_Check(format) || args == NULL) { - PyErr_BadInternalCall(); - return NULL; - } - orig_args = args; - fmt = PyString_AS_STRING(format); - fmtcnt = PyString_GET_SIZE(format); - reslen = rescnt = fmtcnt + 100; - result = PyString_FromStringAndSize((char *)NULL, reslen); - if (result == NULL) - return NULL; - res = PyString_AsString(result); - if (PyTuple_Check(args)) { - arglen = PyTuple_GET_SIZE(args); - argidx = 0; - } - else { - arglen = -1; - argidx = -2; - } - if (Py_Type(args)->tp_as_mapping && !PyTuple_Check(args) && - !PyString_Check(args) && !PyUnicode_Check(args)) - dict = args; - while (--fmtcnt >= 0) { - if (*fmt != '%') { - if (--rescnt < 0) { - rescnt = fmtcnt + 100; - reslen += rescnt; - if (_PyString_Resize(&result, reslen) < 0) - return NULL; - res = PyString_AS_STRING(result) - + reslen - rescnt; - --rescnt; - } - *res++ = *fmt++; - } - else { - /* Got a format specifier */ - int flags = 0; - Py_ssize_t width = -1; - int prec = -1; - int c = '\0'; - int fill; - PyObject *v = NULL; - PyObject *temp = NULL; - char *pbuf; - int sign; - Py_ssize_t len; - char formatbuf[FORMATBUFLEN]; - /* For format{float,int,char}() */ - char *fmt_start = fmt; - Py_ssize_t argidx_start = argidx; - - fmt++; - if (*fmt == '(') { - char *keystart; - Py_ssize_t keylen; - PyObject *key; - int pcount = 1; - - if (dict == NULL) { - PyErr_SetString(PyExc_TypeError, - "format requires a mapping"); - goto error; - } - ++fmt; - --fmtcnt; - keystart = fmt; - /* Skip over balanced parentheses */ - while (pcount > 0 && --fmtcnt >= 0) { - if (*fmt == ')') - --pcount; - else if (*fmt == '(') - ++pcount; - fmt++; - } - keylen = fmt - keystart - 1; - if (fmtcnt < 0 || pcount > 0) { - PyErr_SetString(PyExc_ValueError, - "incomplete format key"); - goto error; - } - key = PyString_FromStringAndSize(keystart, - keylen); - if (key == NULL) - goto error; - if (args_owned) { - Py_DECREF(args); - args_owned = 0; - } - args = PyObject_GetItem(dict, key); - Py_DECREF(key); - if (args == NULL) { - goto error; - } - args_owned = 1; - arglen = -1; - argidx = -2; - } - while (--fmtcnt >= 0) { - switch (c = *fmt++) { - case '-': flags |= F_LJUST; continue; - case '+': flags |= F_SIGN; continue; - case ' ': flags |= F_BLANK; continue; - case '#': flags |= F_ALT; continue; - case '0': flags |= F_ZERO; continue; - } - break; - } - if (c == '*') { - v = getnextarg(args, arglen, &argidx); - if (v == NULL) - goto error; - if (!PyInt_Check(v)) { - PyErr_SetString(PyExc_TypeError, - "* wants int"); - goto error; - } - width = PyInt_AsLong(v); - if (width == -1 && PyErr_Occurred()) - goto error; - if (width < 0) { - flags |= F_LJUST; - width = -width; - } - if (--fmtcnt >= 0) - c = *fmt++; - } - else if (c >= 0 && ISDIGIT(c)) { - width = c - '0'; - while (--fmtcnt >= 0) { - c = Py_CHARMASK(*fmt++); - if (!ISDIGIT(c)) - break; - if ((width*10) / 10 != width) { - PyErr_SetString( - PyExc_ValueError, - "width too big"); - goto error; - } - width = width*10 + (c - '0'); - } - } - if (c == '.') { - prec = 0; - if (--fmtcnt >= 0) - c = *fmt++; - if (c == '*') { - v = getnextarg(args, arglen, &argidx); - if (v == NULL) - goto error; - if (!PyInt_Check(v)) { - PyErr_SetString( - PyExc_TypeError, - "* wants int"); - goto error; - } - prec = PyInt_AsLong(v); - if (prec == -1 && PyErr_Occurred()) - goto error; - if (prec < 0) - prec = 0; - if (--fmtcnt >= 0) - c = *fmt++; - } - else if (c >= 0 && ISDIGIT(c)) { - prec = c - '0'; - while (--fmtcnt >= 0) { - c = Py_CHARMASK(*fmt++); - if (!ISDIGIT(c)) - break; - if ((prec*10) / 10 != prec) { - PyErr_SetString( - PyExc_ValueError, - "prec too big"); - goto error; - } - prec = prec*10 + (c - '0'); - } - } - } /* prec */ - if (fmtcnt >= 0) { - if (c == 'h' || c == 'l' || c == 'L') { - if (--fmtcnt >= 0) - c = *fmt++; - } - } - if (fmtcnt < 0) { - PyErr_SetString(PyExc_ValueError, - "incomplete format"); - goto error; - } - if (c != '%') { - v = getnextarg(args, arglen, &argidx); - if (v == NULL) - goto error; - } - sign = 0; - fill = ' '; - switch (c) { - case '%': - pbuf = "%"; - len = 1; - break; - case 's': - if (PyUnicode_Check(v)) { - fmt = fmt_start; - argidx = argidx_start; - goto unicode; - } - temp = _PyObject_Str(v); - if (temp != NULL && PyUnicode_Check(temp)) { - Py_DECREF(temp); - fmt = fmt_start; - argidx = argidx_start; - goto unicode; - } - /* Fall through */ - case 'r': - if (c == 'r') - temp = PyObject_ReprStr8(v); - if (temp == NULL) - goto error; - if (!PyString_Check(temp)) { - PyErr_SetString(PyExc_TypeError, - "%s argument has non-string str()/repr()"); - Py_DECREF(temp); - goto error; - } - pbuf = PyString_AS_STRING(temp); - len = PyString_GET_SIZE(temp); - if (prec >= 0 && len > prec) - len = prec; - break; - case 'i': - case 'd': - case 'u': - case 'o': - case 'x': - case 'X': - if (c == 'i') - c = 'd'; - if (PyLong_Check(v)) { - int ilen; - temp = _PyString_FormatLong(v, flags, - prec, c, &pbuf, &ilen); - len = ilen; - if (!temp) - goto error; - sign = 1; - } - else { - pbuf = formatbuf; - len = formatint(pbuf, - sizeof(formatbuf), - flags, prec, c, v); - if (len < 0) - goto error; - sign = 1; - } - if (flags & F_ZERO) - fill = '0'; - break; - case 'e': - case 'E': - case 'f': - case 'F': - case 'g': - case 'G': - if (c == 'F') - c = 'f'; - pbuf = formatbuf; - len = formatfloat(pbuf, sizeof(formatbuf), - flags, prec, c, v); - if (len < 0) - goto error; - sign = 1; - if (flags & F_ZERO) - fill = '0'; - break; - case 'c': - if (PyUnicode_Check(v)) { - fmt = fmt_start; - argidx = argidx_start; - goto unicode; - } - pbuf = formatbuf; - len = formatchar(pbuf, sizeof(formatbuf), v); - if (len < 0) - goto error; - break; - default: - PyErr_Format(PyExc_ValueError, - "unsupported format character '%c' (0x%x) " - "at index %zd", - c, c, - (Py_ssize_t)(fmt - 1 - - PyString_AsString(format))); - goto error; - } - if (sign) { - if (*pbuf == '-' || *pbuf == '+') { - sign = *pbuf++; - len--; - } - else if (flags & F_SIGN) - sign = '+'; - else if (flags & F_BLANK) - sign = ' '; - else - sign = 0; - } - if (width < len) - width = len; - if (rescnt - (sign != 0) < width) { - reslen -= rescnt; - rescnt = width + fmtcnt + 100; - reslen += rescnt; - if (reslen < 0) { - Py_DECREF(result); - Py_XDECREF(temp); - return PyErr_NoMemory(); - } - if (_PyString_Resize(&result, reslen) < 0) { - Py_XDECREF(temp); - return NULL; - } - res = PyString_AS_STRING(result) - + reslen - rescnt; - } - if (sign) { - if (fill != ' ') - *res++ = sign; - rescnt--; - if (width > len) - width--; - } - if ((flags & F_ALT) && - (c == 'x' || c == 'X' || c == 'o')) { - assert(pbuf[0] == '0'); - assert(pbuf[1] == c); - if (fill != ' ') { - *res++ = *pbuf++; - *res++ = *pbuf++; - } - rescnt -= 2; - width -= 2; - if (width < 0) - width = 0; - len -= 2; - } - if (width > len && !(flags & F_LJUST)) { - do { - --rescnt; - *res++ = fill; - } while (--width > len); - } - if (fill == ' ') { - if (sign) - *res++ = sign; - if ((flags & F_ALT) && - (c == 'x' || c == 'X' || c == 'o')) { - assert(pbuf[0] == '0'); - assert(pbuf[1] == c); - *res++ = *pbuf++; - *res++ = *pbuf++; - } - } - Py_MEMCPY(res, pbuf, len); - res += len; - rescnt -= len; - while (--width >= len) { - --rescnt; - *res++ = ' '; - } - if (dict && (argidx < arglen) && c != '%') { - PyErr_SetString(PyExc_TypeError, - "not all arguments converted during string formatting"); - Py_XDECREF(temp); - goto error; - } - Py_XDECREF(temp); - } /* '%' */ - } /* until end */ - if (argidx < arglen && !dict) { - PyErr_SetString(PyExc_TypeError, - "not all arguments converted during string formatting"); - goto error; - } - if (args_owned) { - Py_DECREF(args); - } - _PyString_Resize(&result, reslen - rescnt); - return result; - - unicode: - if (args_owned) { - Py_DECREF(args); - args_owned = 0; - } - /* Fiddle args right (remove the first argidx arguments) */ - if (PyTuple_Check(orig_args) && argidx > 0) { - PyObject *v; - Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx; - v = PyTuple_New(n); - if (v == NULL) - goto error; - while (--n >= 0) { - PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx); - Py_INCREF(w); - PyTuple_SET_ITEM(v, n, w); - } - args = v; - } else { - Py_INCREF(orig_args); - args = orig_args; - } - args_owned = 1; - /* Take what we have of the result and let the Unicode formatting - function format the rest of the input. */ - rescnt = res - PyString_AS_STRING(result); - if (_PyString_Resize(&result, rescnt)) - goto error; - fmtcnt = PyString_GET_SIZE(format) - \ - (fmt - PyString_AS_STRING(format)); - format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL); - if (format == NULL) - goto error; - v = PyUnicode_Format(format, args); - Py_DECREF(format); - if (v == NULL) - goto error; - /* Paste what we have (result) to what the Unicode formatting - function returned (v) and return the result (or error) */ - w = PyUnicode_Concat(result, v); - Py_DECREF(result); - Py_DECREF(v); - Py_DECREF(args); - return w; - - error: - Py_DECREF(result); - if (args_owned) { - Py_DECREF(args); - } - return NULL; -} - -void -PyString_InternInPlace(PyObject **p) -{ - register PyStringObject *s = (PyStringObject *)(*p); - PyObject *t; - if (s == NULL || !PyString_Check(s)) - Py_FatalError("PyString_InternInPlace: strings only please!"); - /* If it's a string subclass, we don't really know what putting - it in the interned dict might do. */ - if (!PyString_CheckExact(s)) - return; - if (PyString_CHECK_INTERNED(s)) - return; - if (interned == NULL) { - interned = PyDict_New(); - if (interned == NULL) { - PyErr_Clear(); /* Don't leave an exception */ - return; - } - } - t = PyDict_GetItem(interned, (PyObject *)s); - if (t) { - Py_INCREF(t); - Py_DECREF(*p); - *p = t; - return; - } - - if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) { - PyErr_Clear(); - return; - } - /* The two references in interned are not counted by refcnt. - The string deallocator will take care of this */ - Py_Refcnt(s) -= 2; - PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL; -} - -void -PyString_InternImmortal(PyObject **p) -{ - PyString_InternInPlace(p); - if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) { - PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL; - Py_INCREF(*p); - } -} - - -PyObject * -PyString_InternFromString(const char *cp) -{ - PyObject *s = PyString_FromString(cp); - if (s == NULL) - return NULL; - PyString_InternInPlace(&s); - return s; -} - void PyString_Fini(void) { @@ -4209,58 +3351,6 @@ PyString_Fini(void) nullstring = NULL; } -void _Py_ReleaseInternedStrings(void) -{ - PyObject *keys; - PyStringObject *s; - Py_ssize_t i, n; - Py_ssize_t immortal_size = 0, mortal_size = 0; - - if (interned == NULL || !PyDict_Check(interned)) - return; - keys = PyDict_Keys(interned); - if (keys == NULL || !PyList_Check(keys)) { - PyErr_Clear(); - return; - } - - /* Since _Py_ReleaseInternedStrings() is intended to help a leak - detector, interned strings are not forcibly deallocated; rather, we - give them their stolen references back, and then clear and DECREF - the interned dict. */ - - n = PyList_GET_SIZE(keys); - fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n", - n); - for (i = 0; i < n; i++) { - s = (PyStringObject *) PyList_GET_ITEM(keys, i); - switch (s->ob_sstate) { - case SSTATE_NOT_INTERNED: - /* XXX Shouldn't happen */ - break; - case SSTATE_INTERNED_IMMORTAL: - Py_Refcnt(s) += 1; - immortal_size += Py_Size(s); - break; - case SSTATE_INTERNED_MORTAL: - Py_Refcnt(s) += 2; - mortal_size += Py_Size(s); - break; - default: - Py_FatalError("Inconsistent interned string state."); - } - s->ob_sstate = SSTATE_NOT_INTERNED; - } - fprintf(stderr, "total size of all interned strings: " - "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d " - "mortal/immortal\n", mortal_size, immortal_size); - Py_DECREF(keys); - PyDict_Clear(interned); - Py_DECREF(interned); - interned = NULL; -} - - /*********************** Str Iterator ****************************/ typedef struct { diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 44cf5f1..4266a7c 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1015,7 +1015,7 @@ class_name(PyObject *cls) if (name == NULL) { PyErr_Clear(); Py_XDECREF(name); - name = PyObject_ReprStr8(cls); + name = PyObject_Repr(cls); } if (name == NULL) return NULL; @@ -1654,7 +1654,7 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds) } /* Check arguments: (name, bases, dict) */ - if (!PyArg_ParseTupleAndKeywords(args, kwds, "SO!O!:type", kwlist, + if (!PyArg_ParseTupleAndKeywords(args, kwds, "UO!O!:type", kwlist, &name, &PyTuple_Type, &bases, &PyDict_Type, &dict)) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index c568a8e..ae34c9e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -101,7 +101,7 @@ extern "C" { function will delete the reference from this dictionary. Another way to look at this is that to say that the actual reference - count of a string is: s->ob_refcnt + (s->ob_sstate?2:0) + count of a string is: s->ob_refcnt + (s->state ? 2 : 0) */ static PyObject *interned; @@ -998,7 +998,10 @@ PyObject *PyUnicode_FromObject(register PyObject *obj) return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(obj), PyUnicode_GET_SIZE(obj)); } - return PyUnicode_FromEncodedObject(obj, NULL, "strict"); + PyErr_Format(PyExc_TypeError, + "Can't convert '%.100s' object to str implicitly", + Py_Type(obj)->tp_name); + return NULL; } PyObject *PyUnicode_FromEncodedObject(register PyObject *obj, @@ -1219,22 +1222,7 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode, v = PyCodec_Encode(unicode, encoding, errors); if (v == NULL) goto onError; - if (!PyBytes_Check(v)) { - if (PyString_Check(v)) { - /* Old codec, turn it into bytes */ - PyObject *b = PyBytes_FromObject(v); - Py_DECREF(v); - return b; - } - PyErr_Format(PyExc_TypeError, - "encoder did not return a bytes object " - "(type=%.400s, encoding=%.20s, errors=%.20s)", - v->ob_type->tp_name, - encoding ? encoding : "NULL", - errors ? errors : "NULL"); - Py_DECREF(v); - goto onError; - } + assert(PyString_Check(v)); return v; onError: @@ -1245,19 +1233,15 @@ PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode, const char *errors) { PyObject *v = ((PyUnicodeObject *)unicode)->defenc; - PyObject *b; if (v) return v; if (errors != NULL) Py_FatalError("non-NULL encoding in _PyUnicode_AsDefaultEncodedString"); - b = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), + v = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), PyUnicode_GET_SIZE(unicode), NULL); - if (!b) + if (!v) return NULL; - v = PyString_FromStringAndSize(PyBytes_AsString(b), - PyBytes_Size(b)); - Py_DECREF(b); ((PyUnicodeObject *)unicode)->defenc = v; return v; } @@ -1420,11 +1404,11 @@ int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler inputobj = PyUnicodeDecodeError_GetObject(*exceptionObject); if (!inputobj) goto onError; - if (!PyBytes_Check(inputobj)) { + if (!PyString_Check(inputobj)) { PyErr_Format(PyExc_TypeError, "exception attribute object must be bytes"); } - *input = PyBytes_AS_STRING(inputobj); - insize = PyBytes_GET_SIZE(inputobj); + *input = PyString_AS_STRING(inputobj); + insize = PyString_GET_SIZE(inputobj); *inend = *input + insize; /* we can DECREF safely, as the exception has another reference, so the object won't go away. */ @@ -1674,7 +1658,7 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s, int encodeWhiteSpace, const char *errors) { - PyObject *v; + PyObject *v, *result; /* It might be possible to tighten this worst case */ Py_ssize_t cbAllocated = 5 * size; int inShift = 0; @@ -1685,7 +1669,7 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s, char * start; if (size == 0) - return PyBytes_FromStringAndSize(NULL, 0); + return PyString_FromStringAndSize(NULL, 0); v = PyBytes_FromStringAndSize(NULL, cbAllocated); if (v == NULL) @@ -1757,11 +1741,9 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s, *out++ = '-'; } - if (PyBytes_Resize(v, out - start)) { - Py_DECREF(v); - return NULL; - } - return v; + result = PyString_FromStringAndSize(PyBytes_AS_STRING(v), out - start); + Py_DECREF(v); + return result; } #undef SPECIAL @@ -2001,11 +1983,11 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s, { #define MAX_SHORT_UNICHARS 300 /* largest size we'll do on the stack */ - Py_ssize_t i; /* index into s of next input byte */ - PyObject *v; /* result string object */ - char *p; /* next free byte in output buffer */ - Py_ssize_t nallocated; /* number of result bytes allocated */ - Py_ssize_t nneeded; /* number of result bytes needed */ + Py_ssize_t i; /* index into s of next input byte */ + PyObject *result; /* result string object */ + char *p; /* next free byte in output buffer */ + Py_ssize_t nallocated; /* number of result bytes allocated */ + Py_ssize_t nneeded; /* number of result bytes needed */ char stackbuf[MAX_SHORT_UNICHARS * 4]; assert(s != NULL); @@ -2017,7 +1999,7 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s, * turns out we need. */ nallocated = Py_SAFE_DOWNCAST(sizeof(stackbuf), size_t, int); - v = NULL; /* will allocate after we're done */ + result = NULL; /* will allocate after we're done */ p = stackbuf; } else { @@ -2025,10 +2007,10 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s, nallocated = size * 4; if (nallocated / 4 != size) /* overflow! */ return PyErr_NoMemory(); - v = PyBytes_FromStringAndSize(NULL, nallocated); - if (v == NULL) + result = PyString_FromStringAndSize(NULL, nallocated); + if (result == NULL) return NULL; - p = PyBytes_AS_STRING(v); + p = PyString_AS_STRING(result); } for (i = 0; i < size;) { @@ -2072,19 +2054,19 @@ encodeUCS4: } } - if (v == NULL) { + if (result == NULL) { /* This was stack allocated. */ nneeded = p - stackbuf; assert(nneeded <= nallocated); - v = PyBytes_FromStringAndSize(stackbuf, nneeded); + result = PyString_FromStringAndSize(stackbuf, nneeded); } else { /* Cut back to size actually needed. */ - nneeded = p - PyBytes_AS_STRING(v); + nneeded = p - PyString_AS_STRING(result); assert(nneeded <= nallocated); - PyBytes_Resize(v, nneeded); + _PyString_Resize(&result, nneeded); } - return v; + return result; #undef MAX_SHORT_UNICHARS } @@ -2279,7 +2261,7 @@ PyUnicode_EncodeUTF32(const Py_UNICODE *s, const char *errors, int byteorder) { - PyObject *v; + PyObject *v, *result; unsigned char *p; #ifndef Py_UNICODE_WIDE int i, pairs; @@ -2319,7 +2301,7 @@ PyUnicode_EncodeUTF32(const Py_UNICODE *s, if (byteorder == 0) STORECHAR(0xFEFF); if (size == 0) - return v; + goto done; if (byteorder == -1) { /* force LE */ @@ -2350,7 +2332,11 @@ PyUnicode_EncodeUTF32(const Py_UNICODE *s, #endif STORECHAR(ch); } - return v; + + done: + result = PyString_FromStringAndSize(PyBytes_AS_STRING(v), Py_Size(v)); + Py_DECREF(v); + return result; #undef STORECHAR } @@ -2549,7 +2535,7 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s, const char *errors, int byteorder) { - PyObject *v; + PyObject *v, *result; unsigned char *p; #ifdef Py_UNICODE_WIDE int i, pairs; @@ -2584,7 +2570,7 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s, if (byteorder == 0) STORECHAR(0xFEFF); if (size == 0) - return v; + goto done; if (byteorder == -1) { /* force LE */ @@ -2610,7 +2596,11 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s, if (ch2) STORECHAR(ch2); } - return v; + + done: + result = PyString_FromStringAndSize(PyBytes_AS_STRING(v), Py_Size(v)); + Py_DECREF(v); + return result; #undef STORECHAR } @@ -2900,7 +2890,7 @@ static const char *hexdigits = "0123456789abcdef"; PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s, Py_ssize_t size) { - PyObject *repr; + PyObject *repr, *result; char *p; /* XXX(nnorwitz): rather than over-allocating, it would be @@ -3023,12 +3013,10 @@ PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s, *p++ = (char) ch; } - *p = '\0'; - if (PyBytes_Resize(repr, p - PyBytes_AS_STRING(repr))) { - Py_DECREF(repr); - return NULL; - } - return repr; + result = PyString_FromStringAndSize(PyBytes_AS_STRING(repr), + p - PyBytes_AS_STRING(repr)); + Py_DECREF(repr); + return result; } PyObject *PyUnicode_AsUnicodeEscapeString(PyObject *unicode) @@ -3159,7 +3147,7 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s, PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s, Py_ssize_t size) { - PyObject *repr; + PyObject *repr, *result; char *p; char *q; @@ -3171,7 +3159,7 @@ PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s, if (repr == NULL) return NULL; if (size == 0) - return repr; + goto done; p = q = PyBytes_AS_STRING(repr); while (size-- > 0) { @@ -3205,12 +3193,12 @@ PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s, else *p++ = (char) ch; } - *p = '\0'; - if (PyBytes_Resize(repr, p - q)) { - Py_DECREF(repr); - return NULL; - } - return repr; + size = p - q; + + done: + result = PyString_FromStringAndSize(PyBytes_AS_STRING(repr), size); + Py_DECREF(repr); + return result; } PyObject *PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode) @@ -3445,23 +3433,23 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, /* pointer into the output */ char *str; /* current output position */ - Py_ssize_t respos = 0; Py_ssize_t ressize; const char *encoding = (limit == 256) ? "latin-1" : "ascii"; const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)"; PyObject *errorHandler = NULL; PyObject *exc = NULL; + PyObject *result = NULL; /* the following variable is used for caching string comparisons * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */ int known_errorHandler = -1; /* allocate enough for a simple encoding without replacements, if we need more, we'll resize */ + if (size == 0) + return PyString_FromStringAndSize(NULL, 0); res = PyBytes_FromStringAndSize(NULL, size); if (res == NULL) - goto onError; - if (size == 0) - return res; + return NULL; str = PyBytes_AS_STRING(res); ressize = size; @@ -3589,20 +3577,13 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, } } } - /* Resize if we allocated to much */ - respos = str - PyBytes_AS_STRING(res); - if (respos<ressize) - /* If this falls res will be NULL */ - PyBytes_Resize(res, respos); - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - return res; - - onError: - Py_XDECREF(res); + result = PyString_FromStringAndSize(PyBytes_AS_STRING(res), + str - PyBytes_AS_STRING(res)); + onError: + Py_DECREF(res); Py_XDECREF(errorHandler); Py_XDECREF(exc); - return NULL; + return result; } PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p, @@ -3848,20 +3829,20 @@ static int encode_mbcs(PyObject **repr, if (*repr == NULL) { /* Create string object */ - *repr = PyBytes_FromStringAndSize(NULL, mbcssize); + *repr = PyString_FromStringAndSize(NULL, mbcssize); if (*repr == NULL) return -1; } else { /* Extend string object */ - n = PyBytes_Size(*repr); - if (PyBytes_Resize(*repr, n + mbcssize) < 0) + n = PyString_Size(*repr); + if (_PyString_Resize(repr, n + mbcssize) < 0) return -1; } /* Do the conversion */ if (size > 0) { - char *s = PyBytes_AS_STRING(*repr) + n; + char *s = PyString_AS_STRING(*repr) + n; if (0 == WideCharToMultiByte(CP_ACP, 0, p, size, s, mbcssize, NULL, NULL)) { PyErr_SetFromWindowsErrWithFilename(0, NULL); return -1; @@ -4341,16 +4322,14 @@ static PyObject *charmapencode_lookup(Py_UNICODE c, PyObject *mapping) } static int -charmapencode_resize(PyObject *outobj, Py_ssize_t *outpos, Py_ssize_t requiredsize) +charmapencode_resize(PyObject **outobj, Py_ssize_t *outpos, Py_ssize_t requiredsize) { - Py_ssize_t outsize = PyBytes_GET_SIZE( outobj); + Py_ssize_t outsize = PyString_GET_SIZE(*outobj); /* exponentially overallocate to minimize reallocations */ if (requiredsize < 2*outsize) requiredsize = 2*outsize; - if (PyBytes_Resize(outobj, requiredsize)) { - Py_DECREF(outobj); + if (_PyString_Resize(outobj, requiredsize)) return -1; - } return 0; } @@ -4365,21 +4344,21 @@ typedef enum charmapencode_result { reallocation error occurred. The caller must decref the result */ static charmapencode_result charmapencode_output(Py_UNICODE c, PyObject *mapping, - PyObject *outobj, Py_ssize_t *outpos) + PyObject **outobj, Py_ssize_t *outpos) { PyObject *rep; char *outstart; - Py_ssize_t outsize = PyBytes_GET_SIZE(outobj); + Py_ssize_t outsize = PyString_GET_SIZE(*outobj); if (Py_Type(mapping) == &EncodingMapType) { int res = encoding_map_lookup(c, mapping); Py_ssize_t requiredsize = *outpos+1; if (res == -1) return enc_FAILED; - if (outsize<requiredsize) + if (outsize<requiredsize) if (charmapencode_resize(outobj, outpos, requiredsize)) return enc_EXCEPTION; - outstart = PyBytes_AS_STRING(outobj); + outstart = PyString_AS_STRING(*outobj); outstart[(*outpos)++] = (char)res; return enc_SUCCESS; } @@ -4398,7 +4377,7 @@ charmapencode_result charmapencode_output(Py_UNICODE c, PyObject *mapping, Py_DECREF(rep); return enc_EXCEPTION; } - outstart = PyBytes_AS_STRING(outobj); + outstart = PyString_AS_STRING(*outobj); outstart[(*outpos)++] = (char)PyInt_AS_LONG(rep); } else { @@ -4410,7 +4389,7 @@ charmapencode_result charmapencode_output(Py_UNICODE c, PyObject *mapping, Py_DECREF(rep); return enc_EXCEPTION; } - outstart = PyBytes_AS_STRING(outobj); + outstart = PyString_AS_STRING(*outobj); memcpy(outstart + *outpos, repchars, repsize); *outpos += repsize; } @@ -4426,7 +4405,7 @@ int charmap_encoding_error( const Py_UNICODE *p, Py_ssize_t size, Py_ssize_t *inpos, PyObject *mapping, PyObject **exceptionObject, int *known_errorHandler, PyObject **errorHandler, const char *errors, - PyObject *res, Py_ssize_t *respos) + PyObject **res, Py_ssize_t *respos) { PyObject *repunicode = NULL; /* initialize to prevent gcc warning */ Py_ssize_t repsize; @@ -4561,7 +4540,7 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p, /* allocate enough for a simple encoding without replacements, if we need more, we'll resize */ - res = PyBytes_FromStringAndSize(NULL, size); + res = PyString_FromStringAndSize(NULL, size); if (res == NULL) goto onError; if (size == 0) @@ -4569,14 +4548,14 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p, while (inpos<size) { /* try to encode it */ - charmapencode_result x = charmapencode_output(p[inpos], mapping, res, &respos); + charmapencode_result x = charmapencode_output(p[inpos], mapping, &res, &respos); if (x==enc_EXCEPTION) /* error */ goto onError; if (x==enc_FAILED) { /* unencodable character */ if (charmap_encoding_error(p, size, &inpos, mapping, &exc, &known_errorHandler, &errorHandler, errors, - res, &respos)) { + &res, &respos)) { goto onError; } } @@ -4586,10 +4565,9 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p, } /* Resize if we allocated to much */ - if (respos<PyBytes_GET_SIZE(res)) { - if (PyBytes_Resize(res, respos)) - goto onError; - } + if (respos<PyString_GET_SIZE(res)) + _PyString_Resize(&res, respos); + Py_XDECREF(exc); Py_XDECREF(errorHandler); return res; @@ -5483,20 +5461,14 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) item = PySequence_Fast_GET_ITEM(fseq, i); /* Convert item to Unicode. */ - if (!PyString_Check(item) && !PyUnicode_Check(item)) - { - if (PyBytes_Check(item)) - { - PyErr_Format(PyExc_TypeError, - "sequence item %d: join() will not operate on " - "bytes objects", i); - goto onError; - } - item = PyObject_Unicode(item); + if (!PyUnicode_Check(item)) { + PyErr_Format(PyExc_TypeError, + "sequence item %zd: expected str instance," + " %.80s found", + i, Py_Type(item)->tp_name); + goto onError; } - else - item = PyUnicode_FromObject(item); - + item = PyUnicode_FromObject(item); if (item == NULL) goto onError; /* We own a reference to item from here on. */ @@ -6396,9 +6368,6 @@ PyObject *PyUnicode_Concat(PyObject *left, { PyUnicodeObject *u = NULL, *v = NULL, *w; - if (PyBytes_Check(left) || PyBytes_Check(right)) - return PyBytes_Concat(left, right); - /* Coerce the two arguments */ u = (PyUnicodeObject *)PyUnicode_FromObject(left); if (u == NULL) @@ -6515,7 +6484,7 @@ unicode_encode(PyUnicodeObject *self, PyObject *args) v = PyUnicode_AsEncodedObject((PyObject *)self, encoding, errors); if (v == NULL) goto onError; - if (!PyBytes_Check(v)) { + if (!PyString_Check(v)) { PyErr_Format(PyExc_TypeError, "encoder did not return a bytes object " "(type=%.400s)", @@ -8232,12 +8201,6 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) return NULL; } -#define F_LJUST (1<<0) -#define F_SIGN (1<<1) -#define F_BLANK (1<<2) -#define F_ALT (1<<3) -#define F_ZERO (1<<4) - static Py_ssize_t strtounicode(Py_UNICODE *buffer, const char *charbuffer) { |