summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2007-11-06 21:34:58 (GMT)
committerGuido van Rossum <guido@python.org>2007-11-06 21:34:58 (GMT)
commit98297ee7815939b124156e438b22bd652d67b5db (patch)
treea9d239ebd87c73af2571ab48003984c4e18e27e5 /Objects
parenta19f80c6df2df5e8a5d0cff37131097835ef971e (diff)
downloadcpython-98297ee7815939b124156e438b22bd652d67b5db.zip
cpython-98297ee7815939b124156e438b22bd652d67b5db.tar.gz
cpython-98297ee7815939b124156e438b22bd652d67b5db.tar.bz2
Merging the py3k-pep3137 branch back into the py3k branch.
No detailed change log; just check out the change log for the py3k-pep3137 branch. The most obvious changes: - str8 renamed to bytes (PyString at the C level); - bytes renamed to buffer (PyBytes at the C level); - PyString and PyUnicode are no longer compatible. I.e. we now have an immutable bytes type and a mutable bytes type. The behavior of PyString was modified quite a bit, to make it more bytes-like. Some changes are still on the to-do list.
Diffstat (limited to 'Objects')
-rw-r--r--Objects/abstract.c105
-rw-r--r--Objects/bytesobject.c396
-rw-r--r--Objects/codeobject.c12
-rw-r--r--Objects/exceptions.c115
-rw-r--r--Objects/fileobject.c2
-rw-r--r--Objects/longobject.c20
-rw-r--r--Objects/moduleobject.c2
-rw-r--r--Objects/object.c168
-rw-r--r--Objects/stringlib/transmogrify.h14
-rw-r--r--Objects/stringobject.c1604
-rw-r--r--Objects/typeobject.c4
-rw-r--r--Objects/unicodeobject.c225
12 files changed, 862 insertions, 1805 deletions
diff --git a/Objects/abstract.c b/Objects/abstract.c
index e848f8f..01fbcbf 100644
--- a/Objects/abstract.c
+++ b/Objects/abstract.c
@@ -216,7 +216,7 @@ PyObject_DelItemString(PyObject *o, char *key)
}
/* We release the buffer right after use of this function which could
- cause issues later on. Don't use these functions in new code.
+ cause issues later on. Don't use these functions in new code.
*/
int
PyObject_AsCharBuffer(PyObject *obj,
@@ -248,7 +248,7 @@ PyObject_AsCharBuffer(PyObject *obj,
int
PyObject_CheckReadBuffer(PyObject *obj)
{
- PyBufferProcs *pb = obj->ob_type->tp_as_buffer;
+ PyBufferProcs *pb = obj->ob_type->tp_as_buffer;
if (pb == NULL ||
pb->bf_getbuffer == NULL)
@@ -305,7 +305,7 @@ int PyObject_AsWriteBuffer(PyObject *obj,
if (pb == NULL ||
pb->bf_getbuffer == NULL ||
((*pb->bf_getbuffer)(obj, &view, PyBUF_WRITABLE) != 0)) {
- PyErr_SetString(PyExc_TypeError,
+ PyErr_SetString(PyExc_TypeError,
"expected an object with a writable buffer interface");
return -1;
}
@@ -323,8 +323,9 @@ int
PyObject_GetBuffer(PyObject *obj, Py_buffer *view, int flags)
{
if (!PyObject_CheckBuffer(obj)) {
- PyErr_SetString(PyExc_TypeError,
- "object does not have the buffer interface");
+ PyErr_Format(PyExc_TypeError,
+ "'%100s' does not have the buffer interface",
+ Py_Type(obj)->tp_name);
return -1;
}
return (*(obj->ob_type->tp_as_buffer->bf_getbuffer))(obj, view, flags);
@@ -333,7 +334,7 @@ PyObject_GetBuffer(PyObject *obj, Py_buffer *view, int flags)
void
PyObject_ReleaseBuffer(PyObject *obj, Py_buffer *view)
{
- if (obj->ob_type->tp_as_buffer != NULL &&
+ if (obj->ob_type->tp_as_buffer != NULL &&
obj->ob_type->tp_as_buffer->bf_releasebuffer != NULL) {
(*(obj->ob_type->tp_as_buffer->bf_releasebuffer))(obj, view);
}
@@ -345,7 +346,7 @@ _IsFortranContiguous(Py_buffer *view)
{
Py_ssize_t sd, dim;
int i;
-
+
if (view->ndim == 0) return 1;
if (view->strides == NULL) return (view->ndim == 1);
@@ -366,7 +367,7 @@ _IsCContiguous(Py_buffer *view)
{
Py_ssize_t sd, dim;
int i;
-
+
if (view->ndim == 0) return 1;
if (view->strides == NULL) return 1;
@@ -379,7 +380,7 @@ _IsCContiguous(Py_buffer *view)
if (view->strides[i] != sd) return 0;
sd *= dim;
}
- return 1;
+ return 1;
}
int
@@ -390,7 +391,7 @@ PyBuffer_IsContiguous(Py_buffer *view, char fort)
if (fort == 'C')
return _IsCContiguous(view);
- else if (fort == 'F')
+ else if (fort == 'F')
return _IsFortranContiguous(view);
else if (fort == 'A')
return (_IsCContiguous(view) || _IsFortranContiguous(view));
@@ -398,7 +399,7 @@ PyBuffer_IsContiguous(Py_buffer *view, char fort)
}
-void*
+void*
PyBuffer_GetPointer(Py_buffer *view, Py_ssize_t *indices)
{
char* pointer;
@@ -414,11 +415,11 @@ PyBuffer_GetPointer(Py_buffer *view, Py_ssize_t *indices)
}
-void
+void
_add_one_to_index_F(int nd, Py_ssize_t *index, Py_ssize_t *shape)
{
int k;
-
+
for (k=0; k<nd; k++) {
if (index[k] < shape[k]-1) {
index[k]++;
@@ -430,7 +431,7 @@ _add_one_to_index_F(int nd, Py_ssize_t *index, Py_ssize_t *shape)
}
}
-void
+void
_add_one_to_index_C(int nd, Py_ssize_t *index, Py_ssize_t *shape)
{
int k;
@@ -447,11 +448,11 @@ _add_one_to_index_C(int nd, Py_ssize_t *index, Py_ssize_t *shape)
}
/* view is not checked for consistency in either of these. It is
- assumed that the size of the buffer is view->len in
+ assumed that the size of the buffer is view->len in
view->len / view->itemsize elements.
*/
-int
+int
PyBuffer_ToContiguous(void *buf, Py_buffer *view, Py_ssize_t len, char fort)
{
int k;
@@ -462,7 +463,7 @@ PyBuffer_ToContiguous(void *buf, Py_buffer *view, Py_ssize_t len, char fort)
if (len > view->len) {
len = view->len;
}
-
+
if (PyBuffer_IsContiguous(view, fort)) {
/* simplest copy is all that is needed */
memcpy(buf, view->buf, len);
@@ -470,7 +471,7 @@ PyBuffer_ToContiguous(void *buf, Py_buffer *view, Py_ssize_t len, char fort)
}
/* Otherwise a more elaborate scheme is needed */
-
+
/* XXX(nnorwitz): need to check for overflow! */
indices = (Py_ssize_t *)PyMem_Malloc(sizeof(Py_ssize_t)*(view->ndim));
if (indices == NULL) {
@@ -480,7 +481,7 @@ PyBuffer_ToContiguous(void *buf, Py_buffer *view, Py_ssize_t len, char fort)
for (k=0; k<view->ndim;k++) {
indices[k] = 0;
}
-
+
if (fort == 'F') {
addone = _add_one_to_index_F;
}
@@ -489,7 +490,7 @@ PyBuffer_ToContiguous(void *buf, Py_buffer *view, Py_ssize_t len, char fort)
}
dest = buf;
/* XXX : This is not going to be the fastest code in the world
- several optimizations are possible.
+ several optimizations are possible.
*/
elements = len / view->itemsize;
while (elements--) {
@@ -497,7 +498,7 @@ PyBuffer_ToContiguous(void *buf, Py_buffer *view, Py_ssize_t len, char fort)
ptr = PyBuffer_GetPointer(view, indices);
memcpy(dest, ptr, view->itemsize);
dest += view->itemsize;
- }
+ }
PyMem_Free(indices);
return 0;
}
@@ -521,7 +522,7 @@ PyBuffer_FromContiguous(Py_buffer *view, void *buf, Py_ssize_t len, char fort)
}
/* Otherwise a more elaborate scheme is needed */
-
+
/* XXX(nnorwitz): need to check for overflow! */
indices = (Py_ssize_t *)PyMem_Malloc(sizeof(Py_ssize_t)*(view->ndim));
if (indices == NULL) {
@@ -531,7 +532,7 @@ PyBuffer_FromContiguous(Py_buffer *view, void *buf, Py_ssize_t len, char fort)
for (k=0; k<view->ndim;k++) {
indices[k] = 0;
}
-
+
if (fort == 'F') {
addone = _add_one_to_index_F;
}
@@ -540,7 +541,7 @@ PyBuffer_FromContiguous(Py_buffer *view, void *buf, Py_ssize_t len, char fort)
}
src = buf;
/* XXX : This is not going to be the fastest code in the world
- several optimizations are possible.
+ several optimizations are possible.
*/
elements = len / view->itemsize;
while (elements--) {
@@ -549,12 +550,12 @@ PyBuffer_FromContiguous(Py_buffer *view, void *buf, Py_ssize_t len, char fort)
memcpy(ptr, src, view->itemsize);
src += view->itemsize;
}
-
+
PyMem_Free(indices);
return 0;
}
-int PyObject_CopyData(PyObject *dest, PyObject *src)
+int PyObject_CopyData(PyObject *dest, PyObject *src)
{
Py_buffer view_dest, view_src;
int k;
@@ -576,16 +577,16 @@ int PyObject_CopyData(PyObject *dest, PyObject *src)
}
if (view_dest.len < view_src.len) {
- PyErr_SetString(PyExc_BufferError,
+ PyErr_SetString(PyExc_BufferError,
"destination is too small to receive data from source");
PyObject_ReleaseBuffer(dest, &view_dest);
PyObject_ReleaseBuffer(src, &view_src);
return -1;
}
- if ((PyBuffer_IsContiguous(&view_dest, 'C') &&
+ if ((PyBuffer_IsContiguous(&view_dest, 'C') &&
PyBuffer_IsContiguous(&view_src, 'C')) ||
- (PyBuffer_IsContiguous(&view_dest, 'F') &&
+ (PyBuffer_IsContiguous(&view_dest, 'F') &&
PyBuffer_IsContiguous(&view_src, 'F'))) {
/* simplest copy is all that is needed */
memcpy(view_dest.buf, view_src.buf, view_src.len);
@@ -595,7 +596,7 @@ int PyObject_CopyData(PyObject *dest, PyObject *src)
}
/* Otherwise a more elaborate copy scheme is needed */
-
+
/* XXX(nnorwitz): need to check for overflow! */
indices = (Py_ssize_t *)PyMem_Malloc(sizeof(Py_ssize_t)*view_src.ndim);
if (indices == NULL) {
@@ -606,7 +607,7 @@ int PyObject_CopyData(PyObject *dest, PyObject *src)
}
for (k=0; k<view_src.ndim;k++) {
indices[k] = 0;
- }
+ }
elements = 1;
for (k=0; k<view_src.ndim; k++) {
/* XXX(nnorwitz): can this overflow? */
@@ -617,7 +618,7 @@ int PyObject_CopyData(PyObject *dest, PyObject *src)
dptr = PyBuffer_GetPointer(&view_dest, indices);
sptr = PyBuffer_GetPointer(&view_src, indices);
memcpy(dptr, sptr, view_src.itemsize);
- }
+ }
PyMem_Free(indices);
PyObject_ReleaseBuffer(dest, &view_dest);
PyObject_ReleaseBuffer(src, &view_src);
@@ -631,13 +632,13 @@ PyBuffer_FillContiguousStrides(int nd, Py_ssize_t *shape,
{
int k;
Py_ssize_t sd;
-
+
sd = itemsize;
if (fort == 'F') {
for (k=0; k<nd; k++) {
strides[k] = sd;
sd *= shape[k];
- }
+ }
}
else {
for (k=nd-1; k>=0; k--) {
@@ -651,11 +652,11 @@ PyBuffer_FillContiguousStrides(int nd, Py_ssize_t *shape,
int
PyBuffer_FillInfo(Py_buffer *view, void *buf, Py_ssize_t len,
int readonly, int flags)
-{
+{
if (view == NULL) return 0;
- if (((flags & PyBUF_LOCK) == PyBUF_LOCK) &&
+ if (((flags & PyBUF_LOCK) == PyBUF_LOCK) &&
readonly >= 0) {
- PyErr_SetString(PyExc_BufferError,
+ PyErr_SetString(PyExc_BufferError,
"Cannot lock this object.");
return -1;
}
@@ -665,13 +666,13 @@ PyBuffer_FillInfo(Py_buffer *view, void *buf, Py_ssize_t len,
"Object is not writable.");
return -1;
}
-
+
view->buf = buf;
view->len = len;
view->readonly = readonly;
view->itemsize = 1;
view->format = NULL;
- if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT)
+ if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT)
view->format = "B";
view->ndim = 1;
view->shape = NULL;
@@ -1143,9 +1144,9 @@ PyNumber_Absolute(PyObject *o)
return type_error("bad operand type for abs(): '%.200s'", o);
}
-/* Return a Python Int or Long from the object item
+/* Return a Python Int or Long from the object item
Raise TypeError if the result is not an int-or-long
- or if the object cannot be interpreted as an index.
+ or if the object cannot be interpreted as an index.
*/
PyObject *
PyNumber_Index(PyObject *item)
@@ -1193,19 +1194,19 @@ PyNumber_AsSsize_t(PyObject *item, PyObject *err)
goto finish;
/* Error handling code -- only manage OverflowError differently */
- if (!PyErr_GivenExceptionMatches(runerr, PyExc_OverflowError))
+ if (!PyErr_GivenExceptionMatches(runerr, PyExc_OverflowError))
goto finish;
PyErr_Clear();
- /* If no error-handling desired then the default clipping
+ /* If no error-handling desired then the default clipping
is sufficient.
*/
if (!err) {
assert(PyLong_Check(value));
- /* Whether or not it is less than or equal to
+ /* Whether or not it is less than or equal to
zero is determined by the sign of ob_size
*/
- if (_PyLong_Sign(value) < 0)
+ if (_PyLong_Sign(value) < 0)
result = PY_SSIZE_T_MIN;
else
result = PY_SSIZE_T_MAX;
@@ -1213,10 +1214,10 @@ PyNumber_AsSsize_t(PyObject *item, PyObject *err)
else {
/* Otherwise replace the error with caller's error object. */
PyErr_Format(err,
- "cannot fit '%.200s' into an index-sized integer",
- item->ob_type->tp_name);
+ "cannot fit '%.200s' into an index-sized integer",
+ item->ob_type->tp_name);
}
-
+
finish:
Py_DECREF(value);
return result;
@@ -1679,7 +1680,7 @@ PySequence_Tuple(PyObject *v)
if (j >= n) {
Py_ssize_t oldn = n;
/* The over-allocation strategy can grow a bit faster
- than for lists because unlike lists the
+ than for lists because unlike lists the
over-allocation isn't permanent -- we reclaim
the excess before the end of this routine.
So, grow by ten and then add 25%.
@@ -1690,7 +1691,7 @@ PySequence_Tuple(PyObject *v)
/* Check for overflow */
PyErr_NoMemory();
Py_DECREF(item);
- goto Fail;
+ goto Fail;
}
if (_PyTuple_Resize(&result, n) != 0) {
Py_DECREF(item);
@@ -2147,7 +2148,7 @@ PyObject_CallMethod(PyObject *o, char *name, char *format, ...)
}
if (!PyCallable_Check(func)) {
- type_error("attribute of type '%.200s' is not callable", func);
+ type_error("attribute of type '%.200s' is not callable", func);
goto exit;
}
@@ -2186,7 +2187,7 @@ _PyObject_CallMethod_SizeT(PyObject *o, char *name, char *format, ...)
}
if (!PyCallable_Check(func)) {
- type_error("attribute of type '%.200s' is not callable", func);
+ type_error("attribute of type '%.200s' is not callable", func);
goto exit;
}
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 3f2dbc2..b28cacf 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -1,7 +1,5 @@
/* Bytes object implementation */
-/* XXX TO DO: optimizations */
-
#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include "structmember.h"
@@ -214,26 +212,21 @@ PyBytes_Concat(PyObject *a, PyObject *b)
{
Py_ssize_t size;
Py_buffer va, vb;
- PyBytesObject *result;
+ PyBytesObject *result = NULL;
va.len = -1;
vb.len = -1;
if (_getbuffer(a, &va) < 0 ||
_getbuffer(b, &vb) < 0) {
- if (va.len != -1)
- PyObject_ReleaseBuffer(a, &va);
- if (vb.len != -1)
- PyObject_ReleaseBuffer(b, &vb);
PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Py_Type(a)->tp_name, Py_Type(b)->tp_name);
- return NULL;
+ goto done;
}
size = va.len + vb.len;
if (size < 0) {
- PyObject_ReleaseBuffer(a, &va);
- PyObject_ReleaseBuffer(b, &vb);
return PyErr_NoMemory();
+ goto done;
}
result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
@@ -242,8 +235,11 @@ PyBytes_Concat(PyObject *a, PyObject *b)
memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
}
- PyObject_ReleaseBuffer(a, &va);
- PyObject_ReleaseBuffer(b, &vb);
+ done:
+ if (va.len != -1)
+ PyObject_ReleaseBuffer(a, &va);
+ if (vb.len != -1)
+ PyObject_ReleaseBuffer(b, &vb);
return (PyObject *)result;
}
@@ -256,12 +252,6 @@ bytes_length(PyBytesObject *self)
}
static PyObject *
-bytes_concat(PyBytesObject *self, PyObject *other)
-{
- return PyBytes_Concat((PyObject *)self, other);
-}
-
-static PyObject *
bytes_iconcat(PyBytesObject *self, PyObject *other)
{
Py_ssize_t mysize;
@@ -351,51 +341,13 @@ bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
return (PyObject *)self;
}
-static int
-bytes_substring(PyBytesObject *self, PyBytesObject *other)
-{
- Py_ssize_t i;
-
- if (Py_Size(other) == 1) {
- return memchr(self->ob_bytes, other->ob_bytes[0],
- Py_Size(self)) != NULL;
- }
- if (Py_Size(other) == 0)
- return 1; /* Edge case */
- for (i = 0; i + Py_Size(other) <= Py_Size(self); i++) {
- /* XXX Yeah, yeah, lots of optimizations possible... */
- if (memcmp(self->ob_bytes + i, other->ob_bytes, Py_Size(other)) == 0)
- return 1;
- }
- return 0;
-}
-
-static int
-bytes_contains(PyBytesObject *self, PyObject *value)
-{
- Py_ssize_t ival;
-
- if (PyBytes_Check(value))
- return bytes_substring(self, (PyBytesObject *)value);
-
- ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
- if (ival == -1 && PyErr_Occurred())
- return -1;
- if (ival < 0 || ival >= 256) {
- PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
- return -1;
- }
-
- return memchr(self->ob_bytes, ival, Py_Size(self)) != NULL;
-}
-
static PyObject *
bytes_getitem(PyBytesObject *self, Py_ssize_t i)
{
if (i < 0)
i += Py_Size(self);
if (i < 0 || i >= Py_Size(self)) {
- PyErr_SetString(PyExc_IndexError, "bytes index out of range");
+ PyErr_SetString(PyExc_IndexError, "buffer index out of range");
return NULL;
}
return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
@@ -414,7 +366,7 @@ bytes_subscript(PyBytesObject *self, PyObject *item)
i += PyBytes_GET_SIZE(self);
if (i < 0 || i >= Py_Size(self)) {
- PyErr_SetString(PyExc_IndexError, "bytes index out of range");
+ PyErr_SetString(PyExc_IndexError, "buffer index out of range");
return NULL;
}
return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
@@ -451,7 +403,7 @@ bytes_subscript(PyBytesObject *self, PyObject *item)
}
}
else {
- PyErr_SetString(PyExc_TypeError, "bytes indices must be integers");
+ PyErr_SetString(PyExc_TypeError, "buffer indices must be integers");
return NULL;
}
}
@@ -551,7 +503,7 @@ bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
i += Py_Size(self);
if (i < 0 || i >= Py_Size(self)) {
- PyErr_SetString(PyExc_IndexError, "bytes index out of range");
+ PyErr_SetString(PyExc_IndexError, "buffer index out of range");
return -1;
}
@@ -587,7 +539,7 @@ bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
i += PyBytes_GET_SIZE(self);
if (i < 0 || i >= Py_Size(self)) {
- PyErr_SetString(PyExc_IndexError, "bytes index out of range");
+ PyErr_SetString(PyExc_IndexError, "buffer index out of range");
return -1;
}
@@ -619,7 +571,7 @@ bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
}
}
else {
- PyErr_SetString(PyExc_TypeError, "bytes indices must be integer");
+ PyErr_SetString(PyExc_TypeError, "buffer indices must be integer");
return -1;
}
@@ -772,13 +724,7 @@ bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
encoded = PyCodec_Encode(arg, encoding, errors);
if (encoded == NULL)
return -1;
- if (!PyBytes_Check(encoded) && !PyString_Check(encoded)) {
- PyErr_Format(PyExc_TypeError,
- "encoder did not return a str8 or bytes object (type=%.400s)",
- Py_Type(encoded)->tp_name);
- Py_DECREF(encoded);
- return -1;
- }
+ assert(PyString_Check(encoded));
new = bytes_iconcat(self, encoded);
Py_DECREF(encoded);
if (new == NULL)
@@ -889,11 +835,15 @@ static PyObject *
bytes_repr(PyBytesObject *self)
{
static const char *hexdigits = "0123456789abcdef";
- size_t newsize = 3 + 4 * Py_Size(self);
+ const char *quote_prefix = "buffer(b";
+ const char *quote_postfix = ")";
+ Py_ssize_t length = Py_Size(self);
+ /* 9 prefix + 2 postfix */
+ size_t newsize = 11 + 4 * length;
PyObject *v;
- if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(self)) {
+ if (newsize > PY_SSIZE_T_MAX || newsize / 4 - 2 != length) {
PyErr_SetString(PyExc_OverflowError,
- "bytes object is too large to make repr");
+ "buffer object is too large to make repr");
return NULL;
}
v = PyUnicode_FromUnicode(NULL, newsize);
@@ -904,17 +854,36 @@ bytes_repr(PyBytesObject *self)
register Py_ssize_t i;
register Py_UNICODE c;
register Py_UNICODE *p;
- int quote = '\'';
+ int quote;
+
+ /* Figure out which quote to use; single is preferred */
+ quote = '\'';
+ {
+ char *test, *start;
+ start = PyBytes_AS_STRING(self);
+ for (test = start; test < start+length; ++test) {
+ if (*test == '"') {
+ quote = '\''; /* back to single */
+ goto decided;
+ }
+ else if (*test == '\'')
+ quote = '"';
+ }
+ decided:
+ ;
+ }
p = PyUnicode_AS_UNICODE(v);
- *p++ = 'b';
+ while (*quote_prefix)
+ *p++ = *quote_prefix++;
*p++ = quote;
- for (i = 0; i < Py_Size(self); i++) {
+
+ for (i = 0; i < length; i++) {
/* There's at least enough room for a hex escape
and a closing quote. */
assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
c = self->ob_bytes[i];
- if (c == quote || c == '\\')
+ if (c == '\'' || c == '\\')
*p++ = '\\', *p++ = c;
else if (c == '\t')
*p++ = '\\', *p++ = 't';
@@ -935,6 +904,9 @@ bytes_repr(PyBytesObject *self)
}
assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
*p++ = quote;
+ while (*quote_postfix) {
+ *p++ = *quote_postfix++;
+ }
*p = '\0';
if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
Py_DECREF(v);
@@ -945,9 +917,14 @@ bytes_repr(PyBytesObject *self)
}
static PyObject *
-bytes_str(PyBytesObject *self)
+bytes_str(PyObject *op)
{
- return PyString_FromStringAndSize(self->ob_bytes, Py_Size(self));
+ if (Py_BytesWarningFlag) {
+ if (PyErr_WarnEx(PyExc_BytesWarning,
+ "str() on a buffer instance", 1))
+ return NULL;
+ }
+ return bytes_repr((PyBytesObject*)op);
}
static PyObject *
@@ -964,6 +941,12 @@ bytes_richcompare(PyObject *self, PyObject *other, int op)
error, even if the comparison is for equality. */
if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
+ if (Py_BytesWarningFlag && op == Py_EQ) {
+ if (PyErr_WarnEx(PyExc_BytesWarning,
+ "Comparsion between buffer and string", 1))
+ return NULL;
+ }
+
Py_INCREF(Py_NotImplemented);
return Py_NotImplemented;
}
@@ -1112,7 +1095,7 @@ bytes_find(PyBytesObject *self, PyObject *args)
}
PyDoc_STRVAR(count__doc__,
-"B.count(sub[, start[, end]]) -> int\n\
+"B.count(sub [,start [,end]]) -> int\n\
\n\
Return the number of non-overlapping occurrences of subsection sub in\n\
bytes B[start:end]. Optional arguments start and end are interpreted\n\
@@ -1203,6 +1186,30 @@ bytes_rindex(PyBytesObject *self, PyObject *args)
}
+static int
+bytes_contains(PyObject *self, PyObject *arg)
+{
+ Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
+ if (ival == -1 && PyErr_Occurred()) {
+ Py_buffer varg;
+ int pos;
+ PyErr_Clear();
+ if (_getbuffer(arg, &varg) < 0)
+ return -1;
+ pos = stringlib_find(PyBytes_AS_STRING(self), Py_Size(self),
+ varg.buf, varg.len, 0);
+ PyObject_ReleaseBuffer(arg, &varg);
+ return pos >= 0;
+ }
+ if (ival < 0 || ival >= 256) {
+ PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
+ return -1;
+ }
+
+ return memchr(PyBytes_AS_STRING(self), ival, Py_Size(self)) != NULL;
+}
+
+
/* Matches the end (direction >= 0) or start (direction < 0) of self
* against substr, using the start and end arguments. Returns
* -1 on error, 0 if not found and 1 if found.
@@ -1247,7 +1254,7 @@ done:
PyDoc_STRVAR(startswith__doc__,
-"B.startswith(prefix[, start[, end]]) -> bool\n\
+"B.startswith(prefix [,start [,end]]) -> bool\n\
\n\
Return True if B starts with the specified prefix, False otherwise.\n\
With optional start, test B beginning at that position.\n\
@@ -1287,7 +1294,7 @@ bytes_startswith(PyBytesObject *self, PyObject *args)
}
PyDoc_STRVAR(endswith__doc__,
-"B.endswith(suffix[, start[, end]]) -> bool\n\
+"B.endswith(suffix [,start [,end]]) -> bool\n\
\n\
Return True if B ends with the specified suffix, False otherwise.\n\
With optional start, test B beginning at that position.\n\
@@ -1328,12 +1335,12 @@ bytes_endswith(PyBytesObject *self, PyObject *args)
PyDoc_STRVAR(translate__doc__,
-"B.translate(table [,deletechars]) -> bytes\n\
+"B.translate(table[, deletechars]) -> buffer\n\
\n\
-Return a copy of the bytes B, where all characters occurring\n\
-in the optional argument deletechars are removed, and the\n\
-remaining characters have been mapped through the given\n\
-translation table, which must be a bytes of length 256.");
+Return a copy of B, where all characters occurring in the\n\
+optional argument deletechars are removed, and the remaining\n\
+characters have been mapped through the given translation\n\
+table, which must be a bytes object of length 256.");
static PyObject *
bytes_translate(PyBytesObject *self, PyObject *args)
@@ -2026,9 +2033,9 @@ replace(PyBytesObject *self,
PyDoc_STRVAR(replace__doc__,
-"B.replace (old, new[, count]) -> bytes\n\
+"B.replace(old, new[, count]) -> bytes\n\
\n\
-Return a copy of bytes B with all occurrences of subsection\n\
+Return a copy of B with all occurrences of subsection\n\
old replaced by new. If the optional argument count is\n\
given, only the first count occurrences are replaced.");
@@ -2149,23 +2156,23 @@ split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
return NULL;
for (i = j = 0; i < len; ) {
- /* find a token */
- while (i < len && ISSPACE(s[i]))
- i++;
- j = i;
- while (i < len && !ISSPACE(s[i]))
- i++;
- if (j < i) {
- if (maxcount-- <= 0)
- break;
- SPLIT_ADD(s, j, i);
- while (i < len && ISSPACE(s[i]))
- i++;
- j = i;
- }
+ /* find a token */
+ while (i < len && ISSPACE(s[i]))
+ i++;
+ j = i;
+ while (i < len && !ISSPACE(s[i]))
+ i++;
+ if (j < i) {
+ if (maxcount-- <= 0)
+ break;
+ SPLIT_ADD(s, j, i);
+ while (i < len && ISSPACE(s[i]))
+ i++;
+ j = i;
+ }
}
if (j < len) {
- SPLIT_ADD(s, j, len);
+ SPLIT_ADD(s, j, len);
}
FIX_PREALLOC_SIZE(list);
return list;
@@ -2176,10 +2183,10 @@ split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
}
PyDoc_STRVAR(split__doc__,
-"B.split([sep [, maxsplit]]) -> list of bytes\n\
+"B.split([sep[, maxsplit]]) -> list of buffer\n\
\n\
-Return a list of the bytes in the string B, using sep as the delimiter.\n\
-If sep is not given, B is split on ASCII whitespace charcters\n\
+Return a list of the sections in B, using sep as the delimiter.\n\
+If sep is not given, B is split on ASCII whitespace characters\n\
(space, tab, return, newline, formfeed, vertical tab).\n\
If maxsplit is given, at most maxsplit splits are done.");
@@ -2255,12 +2262,37 @@ bytes_split(PyBytesObject *self, PyObject *args)
return NULL;
}
+/* stringlib's partition shares nullbytes in some cases.
+ undo this, we don't want the nullbytes to be shared. */
+static PyObject *
+make_nullbytes_unique(PyObject *result)
+{
+ if (result != NULL) {
+ int i;
+ assert(PyTuple_Check(result));
+ assert(PyTuple_GET_SIZE(result) == 3);
+ for (i = 0; i < 3; i++) {
+ if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) {
+ PyObject *new = PyBytes_FromStringAndSize(NULL, 0);
+ if (new == NULL) {
+ Py_DECREF(result);
+ result = NULL;
+ break;
+ }
+ Py_DECREF(nullbytes);
+ PyTuple_SET_ITEM(result, i, new);
+ }
+ }
+ }
+ return result;
+}
+
PyDoc_STRVAR(partition__doc__,
"B.partition(sep) -> (head, sep, tail)\n\
\n\
Searches for the separator sep in B, and returns the part before it,\n\
the separator itself, and the part after it. If the separator is not\n\
-found, returns B and two empty bytes.");
+found, returns B and two empty buffer.");
static PyObject *
bytes_partition(PyBytesObject *self, PyObject *sep_obj)
@@ -2279,15 +2311,16 @@ bytes_partition(PyBytesObject *self, PyObject *sep_obj)
);
Py_DECREF(bytesep);
- return result;
+ return make_nullbytes_unique(result);
}
PyDoc_STRVAR(rpartition__doc__,
"B.rpartition(sep) -> (tail, sep, head)\n\
\n\
-Searches for the separator sep in B, starting at the end of B, and returns\n\
-the part before it, the separator itself, and the part after it. If the\n\
-separator is not found, returns two empty bytes and B.");
+Searches for the separator sep in B, starting at the end of B,\n\
+and returns the part before it, the separator itself, and the\n\
+part after it. If the separator is not found, returns two empty\n\
+buffer objects and B.");
static PyObject *
bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
@@ -2306,7 +2339,7 @@ bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
);
Py_DECREF(bytesep);
- return result;
+ return make_nullbytes_unique(result);
}
Py_LOCAL_INLINE(PyObject *)
@@ -2354,23 +2387,23 @@ rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
return NULL;
for (i = j = len - 1; i >= 0; ) {
- /* find a token */
- while (i >= 0 && Py_UNICODE_ISSPACE(s[i]))
- i--;
- j = i;
- while (i >= 0 && !Py_UNICODE_ISSPACE(s[i]))
- i--;
- if (j > i) {
- if (maxcount-- <= 0)
- break;
- SPLIT_ADD(s, i + 1, j + 1);
- while (i >= 0 && Py_UNICODE_ISSPACE(s[i]))
- i--;
- j = i;
- }
+ /* find a token */
+ while (i >= 0 && Py_UNICODE_ISSPACE(s[i]))
+ i--;
+ j = i;
+ while (i >= 0 && !Py_UNICODE_ISSPACE(s[i]))
+ i--;
+ if (j > i) {
+ if (maxcount-- <= 0)
+ break;
+ SPLIT_ADD(s, i + 1, j + 1);
+ while (i >= 0 && Py_UNICODE_ISSPACE(s[i]))
+ i--;
+ j = i;
+ }
}
if (j >= 0) {
- SPLIT_ADD(s, 0, j + 1);
+ SPLIT_ADD(s, 0, j + 1);
}
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
@@ -2384,10 +2417,10 @@ rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
}
PyDoc_STRVAR(rsplit__doc__,
-"B.rsplit(sep [,maxsplit]) -> list of bytes\n\
+"B.rsplit(sep[, maxsplit]) -> list of buffer\n\
\n\
-Return a list of the sections in the byte B, using sep as the delimiter,\n\
-starting at the end of the bytes and working to the front.\n\
+Return a list of the sections in B, using sep as the delimiter,\n\
+starting at the end of B and working to the front.\n\
If sep is not given, B is split on ASCII whitespace characters\n\
(space, tab, return, newline, formfeed, vertical tab).\n\
If maxsplit is given, at most maxsplit splits are done.");
@@ -2458,7 +2491,7 @@ PyDoc_STRVAR(extend__doc__,
"B.extend(iterable int) -> None\n\
\n\
Append all the elements from the iterator or sequence to the\n\
-end of the bytes.");
+end of B.");
static PyObject *
bytes_extend(PyBytesObject *self, PyObject *arg)
{
@@ -2475,7 +2508,7 @@ bytes_extend(PyBytesObject *self, PyObject *arg)
PyDoc_STRVAR(reverse__doc__,
"B.reverse() -> None\n\
\n\
-Reverse the order of the values in bytes in place.");
+Reverse the order of the values in B in place.");
static PyObject *
bytes_reverse(PyBytesObject *self, PyObject *unused)
{
@@ -2497,7 +2530,7 @@ bytes_reverse(PyBytesObject *self, PyObject *unused)
PyDoc_STRVAR(insert__doc__,
"B.insert(index, int) -> None\n\
\n\
-Insert a single item into the bytes before the given index.");
+Insert a single item into the buffer before the given index.");
static PyObject *
bytes_insert(PyBytesObject *self, PyObject *args)
{
@@ -2536,7 +2569,7 @@ bytes_insert(PyBytesObject *self, PyObject *args)
PyDoc_STRVAR(append__doc__,
"B.append(int) -> None\n\
\n\
-Append a single item to the end of the bytes.");
+Append a single item to the end of B.");
static PyObject *
bytes_append(PyBytesObject *self, PyObject *arg)
{
@@ -2561,7 +2594,7 @@ bytes_append(PyBytesObject *self, PyObject *arg)
PyDoc_STRVAR(pop__doc__,
"B.pop([index]) -> int\n\
\n\
-Remove and return a single item from the bytes. If no index\n\
+Remove and return a single item from B. If no index\n\
argument is give, will pop the last value.");
static PyObject *
bytes_pop(PyBytesObject *self, PyObject *args)
@@ -2595,7 +2628,7 @@ bytes_pop(PyBytesObject *self, PyObject *args)
PyDoc_STRVAR(remove__doc__,
"B.remove(int) -> None\n\
\n\
-Remove the first occurance of a value in bytes");
+Remove the first occurance of a value in B.");
static PyObject *
bytes_remove(PyBytesObject *self, PyObject *arg)
{
@@ -2644,7 +2677,7 @@ rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
}
PyDoc_STRVAR(strip__doc__,
-"B.strip([bytes]) -> bytes\n\
+"B.strip([bytes]) -> buffer\n\
\n\
Strip leading and trailing bytes contained in the argument.\n\
If the argument is omitted, strip ASCII whitespace.");
@@ -2662,10 +2695,10 @@ bytes_strip(PyBytesObject *self, PyObject *args)
argsize = 6;
}
else {
- if (_getbuffer(arg, &varg) < 0)
- return NULL;
- argptr = varg.buf;
- argsize = varg.len;
+ if (_getbuffer(arg, &varg) < 0)
+ return NULL;
+ argptr = varg.buf;
+ argsize = varg.len;
}
myptr = self->ob_bytes;
mysize = Py_Size(self);
@@ -2675,12 +2708,12 @@ bytes_strip(PyBytesObject *self, PyObject *args)
else
right = rstrip_helper(myptr, mysize, argptr, argsize);
if (arg != Py_None)
- PyObject_ReleaseBuffer(arg, &varg);
+ PyObject_ReleaseBuffer(arg, &varg);
return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
}
PyDoc_STRVAR(lstrip__doc__,
-"B.lstrip([bytes]) -> bytes\n\
+"B.lstrip([bytes]) -> buffer\n\
\n\
Strip leading bytes contained in the argument.\n\
If the argument is omitted, strip leading ASCII whitespace.");
@@ -2698,22 +2731,22 @@ bytes_lstrip(PyBytesObject *self, PyObject *args)
argsize = 6;
}
else {
- if (_getbuffer(arg, &varg) < 0)
- return NULL;
- argptr = varg.buf;
- argsize = varg.len;
+ if (_getbuffer(arg, &varg) < 0)
+ return NULL;
+ argptr = varg.buf;
+ argsize = varg.len;
}
myptr = self->ob_bytes;
mysize = Py_Size(self);
left = lstrip_helper(myptr, mysize, argptr, argsize);
right = mysize;
if (arg != Py_None)
- PyObject_ReleaseBuffer(arg, &varg);
+ PyObject_ReleaseBuffer(arg, &varg);
return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
}
PyDoc_STRVAR(rstrip__doc__,
-"B.rstrip([bytes]) -> bytes\n\
+"B.rstrip([bytes]) -> buffer\n\
\n\
Strip trailing bytes contained in the argument.\n\
If the argument is omitted, strip trailing ASCII whitespace.");
@@ -2731,27 +2764,27 @@ bytes_rstrip(PyBytesObject *self, PyObject *args)
argsize = 6;
}
else {
- if (_getbuffer(arg, &varg) < 0)
- return NULL;
- argptr = varg.buf;
- argsize = varg.len;
+ if (_getbuffer(arg, &varg) < 0)
+ return NULL;
+ argptr = varg.buf;
+ argsize = varg.len;
}
myptr = self->ob_bytes;
mysize = Py_Size(self);
left = 0;
right = rstrip_helper(myptr, mysize, argptr, argsize);
if (arg != Py_None)
- PyObject_ReleaseBuffer(arg, &varg);
+ PyObject_ReleaseBuffer(arg, &varg);
return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
}
PyDoc_STRVAR(decode_doc,
-"B.decode([encoding[,errors]]) -> unicode obect.\n\
+"B.decode([encoding[, errors]]) -> unicode object.\n\
\n\
Decodes B using the codec registered for encoding. encoding defaults\n\
to the default encoding. errors may be given to set a different error\n\
-handling scheme. Default is 'strict' meaning that encoding errors raise\n\
-a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
+handling scheme. Default is 'strict' meaning that encoding errors raise\n\
+a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
as well as any other name registerd with codecs.register_error that is\n\
able to handle UnicodeDecodeErrors.");
@@ -2782,8 +2815,7 @@ bytes_alloc(PyBytesObject *self)
PyDoc_STRVAR(join_doc,
"B.join(iterable_of_bytes) -> bytes\n\
\n\
-Concatenates any number of bytes objects, with B in between each pair.\n\
-Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
+Concatenates any number of buffer objects, with B in between each pair.");
static PyObject *
bytes_join(PyBytesObject *self, PyObject *it)
@@ -2804,9 +2836,10 @@ bytes_join(PyBytesObject *self, PyObject *it)
items = PySequence_Fast_ITEMS(seq);
/* Compute the total size, and check that they are all bytes */
+ /* XXX Shouldn't we use _getbuffer() on these items instead? */
for (i = 0; i < n; i++) {
PyObject *obj = items[i];
- if (!PyBytes_Check(obj)) {
+ if (!PyBytes_Check(obj) && !PyString_Check(obj)) {
PyErr_Format(PyExc_TypeError,
"can only join an iterable of bytes "
"(item %ld has type '%.100s')",
@@ -2816,7 +2849,7 @@ bytes_join(PyBytesObject *self, PyObject *it)
}
if (i > 0)
totalsize += mysize;
- totalsize += PyBytes_GET_SIZE(obj);
+ totalsize += Py_Size(obj);
if (totalsize < 0) {
PyErr_NoMemory();
goto error;
@@ -2830,12 +2863,17 @@ bytes_join(PyBytesObject *self, PyObject *it)
dest = PyBytes_AS_STRING(result);
for (i = 0; i < n; i++) {
PyObject *obj = items[i];
- Py_ssize_t size = PyBytes_GET_SIZE(obj);
- if (i > 0) {
+ Py_ssize_t size = Py_Size(obj);
+ char *buf;
+ if (PyBytes_Check(obj))
+ buf = PyBytes_AS_STRING(obj);
+ else
+ buf = PyString_AS_STRING(obj);
+ if (i) {
memcpy(dest, self->ob_bytes, mysize);
dest += mysize;
}
- memcpy(dest, PyBytes_AS_STRING(obj), size);
+ memcpy(dest, buf, size);
dest += size;
}
@@ -2850,11 +2888,11 @@ bytes_join(PyBytesObject *self, PyObject *it)
}
PyDoc_STRVAR(fromhex_doc,
-"bytes.fromhex(string) -> bytes\n\
+"buffer.fromhex(string) -> buffer\n\
\n\
-Create a bytes object from a string of hexadecimal numbers.\n\
-Spaces between two numbers are accepted. Example:\n\
-bytes.fromhex('10 1112') -> b'\\x10\\x11\\x12'.");
+Create a buffer object from a string of hexadecimal numbers.\n\
+Spaces between two numbers are accepted.\n\
+Example: buffer.fromhex('B9 01EF') -> buffer(b'\\xb9\\x01\\xef').");
static int
hex_digit_to_int(Py_UNICODE c)
@@ -2940,7 +2978,7 @@ bytes_reduce(PyBytesObject *self)
static PySequenceMethods bytes_as_sequence = {
(lenfunc)bytes_length, /* sq_length */
- (binaryfunc)bytes_concat, /* sq_concat */
+ (binaryfunc)PyBytes_Concat, /* sq_concat */
(ssizeargfunc)bytes_repeat, /* sq_repeat */
(ssizeargfunc)bytes_getitem, /* sq_item */
0, /* sq_slice */
@@ -3027,15 +3065,27 @@ bytes_methods[] = {
};
PyDoc_STRVAR(bytes_doc,
-"bytes([iterable]) -> new array of bytes.\n\
+"buffer(iterable_of_ints) -> buffer.\n\
+buffer(string, encoding[, errors]) -> buffer.\n\
+buffer(bytes_or_buffer) -> mutable copy of bytes_or_buffer.\n\
+buffer(memory_view) -> buffer.\n\
+\n\
+Construct an mutable buffer object from:\n\
+ - an iterable yielding integers in range(256)\n\
+ - a text string encoded using the specified encoding\n\
+ - a bytes or a buffer object\n\
+ - any object implementing the buffer API.\n\
\n\
-If an argument is given it must be an iterable yielding ints in range(256).");
+buffer(int) -> buffer.\n\
+\n\
+Construct a zero-initialized buffer of the given length.");
+
static PyObject *bytes_iter(PyObject *seq);
PyTypeObject PyBytes_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
- "bytes",
+ "buffer",
sizeof(PyBytesObject),
0,
(destructor)bytes_dealloc, /* tp_dealloc */
@@ -3049,7 +3099,7 @@ PyTypeObject PyBytes_Type = {
&bytes_as_mapping, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
- (reprfunc)bytes_str, /* tp_str */
+ bytes_str, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
&bytes_as_buffer, /* tp_as_buffer */
diff --git a/Objects/codeobject.c b/Objects/codeobject.c
index b9a26ba..80c2df9 100644
--- a/Objects/codeobject.c
+++ b/Objects/codeobject.c
@@ -8,7 +8,7 @@
/* all_name_chars(s): true iff all chars in s are valid NAME_CHARS */
static int
-all_name_chars(unsigned char *s)
+all_name_chars(Py_UNICODE *s)
{
static char ok_name_char[256];
static unsigned char *name_chars = (unsigned char *)NAME_CHARS;
@@ -19,6 +19,8 @@ all_name_chars(unsigned char *s)
ok_name_char[*p] = 1;
}
while (*s) {
+ if (*s >= 128)
+ return 0;
if (ok_name_char[*s++] == 0)
return 0;
}
@@ -73,11 +75,11 @@ PyCode_New(int argcount, int kwonlyargcount,
/* Intern selected string constants */
for (i = PyTuple_Size(consts); --i >= 0; ) {
PyObject *v = PyTuple_GetItem(consts, i);
- if (!PyString_Check(v))
+ if (!PyUnicode_Check(v))
continue;
- if (!all_name_chars((unsigned char *)PyString_AS_STRING(v)))
+ if (!all_name_chars(PyUnicode_AS_UNICODE(v)))
continue;
- PyString_InternInPlace(&PyTuple_GET_ITEM(consts, i));
+ PyUnicode_InternInPlace(&PyTuple_GET_ITEM(consts, i));
}
co = PyObject_NEW(PyCodeObject, &PyCode_Type);
if (co != NULL) {
@@ -202,7 +204,7 @@ code_new(PyTypeObject *type, PyObject *args, PyObject *kw)
int firstlineno;
PyObject *lnotab;
- if (!PyArg_ParseTuple(args, "iiiiiSO!O!O!SSiS|O!O!:code",
+ if (!PyArg_ParseTuple(args, "iiiiiSO!O!O!UUiS|O!O!:code",
&argcount, &kwonlyargcount,
&nlocals, &stacksize, &flags,
&code,
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index abe4bde..6ef765b 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -1045,14 +1045,14 @@ SimpleExtendsException(PyExc_ValueError, UnicodeError,
"Unicode related error.");
static PyObject *
-get_bytes(PyObject *attr, const char *name)
+get_string(PyObject *attr, const char *name)
{
if (!attr) {
PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name);
return NULL;
}
- if (!PyBytes_Check(attr)) {
+ if (!PyString_Check(attr)) {
PyErr_Format(PyExc_TypeError, "%.200s attribute must be bytes", name);
return NULL;
}
@@ -1109,7 +1109,7 @@ PyUnicodeEncodeError_GetObject(PyObject *exc)
PyObject *
PyUnicodeDecodeError_GetObject(PyObject *exc)
{
- return get_bytes(((PyUnicodeErrorObject *)exc)->object, "object");
+ return get_string(((PyUnicodeErrorObject *)exc)->object, "object");
}
PyObject *
@@ -1141,10 +1141,10 @@ int
PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start)
{
Py_ssize_t size;
- PyObject *obj = get_bytes(((PyUnicodeErrorObject *)exc)->object, "object");
+ PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object, "object");
if (!obj)
return -1;
- size = PyBytes_GET_SIZE(obj);
+ size = PyString_GET_SIZE(obj);
*start = ((PyUnicodeErrorObject *)exc)->start;
if (*start<0)
*start = 0;
@@ -1209,10 +1209,10 @@ int
PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
{
Py_ssize_t size;
- PyObject *obj = get_bytes(((PyUnicodeErrorObject *)exc)->object, "object");
+ PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object, "object");
if (!obj)
return -1;
- size = PyBytes_GET_SIZE(obj);
+ size = PyString_GET_SIZE(obj);
*end = ((PyUnicodeErrorObject *)exc)->end;
if (*end<1)
*end = 1;
@@ -1299,31 +1299,6 @@ PyUnicodeTranslateError_SetReason(PyObject *exc, const char *reason)
static int
-UnicodeError_init(PyUnicodeErrorObject *self, PyObject *args, PyObject *kwds,
- PyTypeObject *objecttype)
-{
- Py_CLEAR(self->encoding);
- Py_CLEAR(self->object);
- Py_CLEAR(self->reason);
-
- if (!PyArg_ParseTuple(args, "O!O!nnO!",
- &PyUnicode_Type, &self->encoding,
- objecttype, &self->object,
- &self->start,
- &self->end,
- &PyUnicode_Type, &self->reason)) {
- self->encoding = self->object = self->reason = NULL;
- return -1;
- }
-
- Py_INCREF(self->encoding);
- Py_INCREF(self->object);
- Py_INCREF(self->reason);
-
- return 0;
-}
-
-static int
UnicodeError_clear(PyUnicodeErrorObject *self)
{
Py_CLEAR(self->encoding);
@@ -1371,10 +1346,32 @@ static PyMemberDef UnicodeError_members[] = {
static int
UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
{
+ PyUnicodeErrorObject *err;
+
if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1)
return -1;
- return UnicodeError_init((PyUnicodeErrorObject *)self, args,
- kwds, &PyUnicode_Type);
+
+ err = (PyUnicodeErrorObject *)self;
+
+ Py_CLEAR(err->encoding);
+ Py_CLEAR(err->object);
+ Py_CLEAR(err->reason);
+
+ if (!PyArg_ParseTuple(args, "O!O!nnO!",
+ &PyUnicode_Type, &err->encoding,
+ &PyUnicode_Type, &err->object,
+ &err->start,
+ &err->end,
+ &PyUnicode_Type, &err->reason)) {
+ err->encoding = err->object = err->reason = NULL;
+ return -1;
+ }
+
+ Py_INCREF(err->encoding);
+ Py_INCREF(err->object);
+ Py_INCREF(err->reason);
+
+ return 0;
}
static PyObject *
@@ -1439,10 +1436,44 @@ PyUnicodeEncodeError_Create(
static int
UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
{
+ PyUnicodeErrorObject *ude;
+ const char *data;
+ Py_ssize_t size;
+
if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1)
return -1;
- return UnicodeError_init((PyUnicodeErrorObject *)self, args,
- kwds, &PyBytes_Type);
+
+ ude = (PyUnicodeErrorObject *)self;
+
+ Py_CLEAR(ude->encoding);
+ Py_CLEAR(ude->object);
+ Py_CLEAR(ude->reason);
+
+ if (!PyArg_ParseTuple(args, "O!OnnO!",
+ &PyUnicode_Type, &ude->encoding,
+ &ude->object,
+ &ude->start,
+ &ude->end,
+ &PyUnicode_Type, &ude->reason)) {
+ ude->encoding = ude->object = ude->reason = NULL;
+ return -1;
+ }
+
+ if (!PyString_Check(ude->object)) {
+ if (PyObject_AsReadBuffer(ude->object, (const void **)&data, &size)) {
+ ude->encoding = ude->object = ude->reason = NULL;
+ return -1;
+ }
+ ude->object = PyString_FromStringAndSize(data, size);
+ }
+ else {
+ Py_INCREF(ude->object);
+ }
+
+ Py_INCREF(ude->encoding);
+ Py_INCREF(ude->reason);
+
+ return 0;
}
static PyObject *
@@ -1451,7 +1482,7 @@ UnicodeDecodeError_str(PyObject *self)
PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self;
if (uself->end==uself->start+1) {
- int byte = (int)(PyBytes_AS_STRING(((PyUnicodeErrorObject *)self)->object)[uself->start]&0xff);
+ int byte = (int)(PyString_AS_STRING(((PyUnicodeErrorObject *)self)->object)[uself->start]&0xff);
return PyUnicode_FromFormat(
"'%U' codec can't decode byte 0x%02x in position %zd: %U",
((PyUnicodeErrorObject *)self)->encoding,
@@ -1709,6 +1740,14 @@ SimpleExtendsException(PyExc_Warning, UnicodeWarning,
"Base class for warnings about Unicode related problems, mostly\n"
"related to conversion problems.");
+/*
+ * BytesWarning extends Warning
+ */
+SimpleExtendsException(PyExc_Warning, BytesWarning,
+ "Base class for warnings about bytes and buffer related problems, mostly\n"
+ "related to conversion from str or comparing to str.");
+
+
/* Pre-computed MemoryError instance. Best to create this as early as
* possible and not wait until a MemoryError is actually raised!
@@ -1808,6 +1847,7 @@ _PyExc_Init(void)
PRE_INIT(FutureWarning)
PRE_INIT(ImportWarning)
PRE_INIT(UnicodeWarning)
+ PRE_INIT(BytesWarning)
bltinmod = PyImport_ImportModule("__builtin__");
if (bltinmod == NULL)
@@ -1868,6 +1908,7 @@ _PyExc_Init(void)
POST_INIT(FutureWarning)
POST_INIT(ImportWarning)
POST_INIT(UnicodeWarning)
+ POST_INIT(BytesWarning)
PyExc_MemoryErrorInst = BaseException_new(&_PyExc_MemoryError, NULL, NULL);
if (!PyExc_MemoryErrorInst)
diff --git a/Objects/fileobject.c b/Objects/fileobject.c
index 97c2756..c6c7d8e 100644
--- a/Objects/fileobject.c
+++ b/Objects/fileobject.c
@@ -146,7 +146,7 @@ PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
if (writer == NULL)
return -1;
if (flags & Py_PRINT_RAW) {
- value = _PyObject_Str(v);
+ value = PyObject_Str(v);
}
else
value = PyObject_Repr(v);
diff --git a/Objects/longobject.c b/Objects/longobject.c
index 8ebc31c..d827e7e 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -3462,14 +3462,22 @@ long_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return PyLong_FromLong(0L);
if (base == -909)
return PyNumber_Long(x);
- else if (PyBytes_Check(x)) {
+ else if (PyUnicode_Check(x))
+ return PyLong_FromUnicode(PyUnicode_AS_UNICODE(x),
+ PyUnicode_GET_SIZE(x),
+ base);
+ else if (PyBytes_Check(x) || PyString_Check(x)) {
/* Since PyLong_FromString doesn't have a length parameter,
* check here for possible NULs in the string. */
- char *string = PyBytes_AS_STRING(x);
- int size = PyBytes_GET_SIZE(x);
+ char *string;
+ int size = Py_Size(x);
+ if (PyBytes_Check(x))
+ string = PyBytes_AS_STRING(x);
+ else
+ string = PyString_AS_STRING(x);
if (strlen(string) != size) {
/* We only see this if there's a null byte in x,
- x is a str8 or a bytes, *and* a base is given. */
+ x is a bytes or buffer, *and* a base is given. */
PyErr_Format(PyExc_ValueError,
"invalid literal for int() with base %d: %R",
base, x);
@@ -3477,10 +3485,6 @@ long_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
}
return PyLong_FromString(string, NULL, base);
}
- else if (PyUnicode_Check(x))
- return PyLong_FromUnicode(PyUnicode_AS_UNICODE(x),
- PyUnicode_GET_SIZE(x),
- base);
else {
PyErr_SetString(PyExc_TypeError,
"int() can't convert non-string with explicit base");
diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c
index 13c1ab4..b8b2b8e 100644
--- a/Objects/moduleobject.c
+++ b/Objects/moduleobject.c
@@ -151,7 +151,7 @@ module_init(PyModuleObject *m, PyObject *args, PyObject *kwds)
{
static char *kwlist[] = {"name", "doc", NULL};
PyObject *dict, *name = Py_None, *doc = Py_None;
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "S|O:module.__init__",
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "U|O:module.__init__",
kwlist, &name, &doc))
return -1;
dict = m->md_dict;
diff --git a/Objects/object.c b/Objects/object.c
index 40b8b42..df93a19 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -372,50 +372,34 @@ PyObject_Repr(PyObject *v)
#endif
if (v == NULL)
return PyUnicode_FromString("<NULL>");
- else if (Py_Type(v)->tp_repr == NULL)
- return PyUnicode_FromFormat("<%s object at %p>", v->ob_type->tp_name, v);
- else {
- res = (*v->ob_type->tp_repr)(v);
- if (res != NULL && !PyUnicode_Check(res)) {
- PyErr_Format(PyExc_TypeError,
- "__repr__ returned non-string (type %.200s)",
- res->ob_type->tp_name);
- Py_DECREF(res);
- return NULL;
- }
- return res;
- }
-}
-
-PyObject *
-PyObject_ReprStr8(PyObject *v)
-{
- PyObject *resu = PyObject_Repr(v);
- if (resu) {
- PyObject *resb = PyUnicode_AsEncodedString(resu, NULL, NULL);
- Py_DECREF(resu);
- if (resb) {
- PyObject *ress = PyString_FromStringAndSize(
- PyBytes_AS_STRING(resb),
- PyBytes_GET_SIZE(resb)
- );
- Py_DECREF(resb);
- return ress;
- }
- }
- return NULL;
+ if (Py_Type(v)->tp_repr == NULL)
+ return PyUnicode_FromFormat("<%s object at %p>",
+ v->ob_type->tp_name, v);
+ res = (*v->ob_type->tp_repr)(v);
+ if (res != NULL && !PyUnicode_Check(res)) {
+ PyErr_Format(PyExc_TypeError,
+ "__repr__ returned non-string (type %.200s)",
+ res->ob_type->tp_name);
+ Py_DECREF(res);
+ return NULL;
+ }
+ return res;
}
PyObject *
-_PyObject_Str(PyObject *v)
+PyObject_Str(PyObject *v)
{
PyObject *res;
+ if (PyErr_CheckSignals())
+ return NULL;
+#ifdef USE_STACKCHECK
+ if (PyOS_CheckStack()) {
+ PyErr_SetString(PyExc_MemoryError, "stack overflow");
+ return NULL;
+ }
+#endif
if (v == NULL)
return PyUnicode_FromString("<NULL>");
- if (PyString_CheckExact(v)) {
- Py_INCREF(v);
- return v;
- }
if (PyUnicode_CheckExact(v)) {
Py_INCREF(v);
return v;
@@ -431,7 +415,7 @@ _PyObject_Str(PyObject *v)
Py_LeaveRecursiveCall();
if (res == NULL)
return NULL;
- if (!(PyString_Check(res) || PyUnicode_Check(res))) {
+ if (!PyUnicode_Check(res)) {
PyErr_Format(PyExc_TypeError,
"__str__ returned non-string (type %.200s)",
Py_Type(res)->tp_name);
@@ -441,90 +425,12 @@ _PyObject_Str(PyObject *v)
return res;
}
-PyObject *
-PyObject_Str(PyObject *v)
-{
- PyObject *res = _PyObject_Str(v);
- if (res == NULL)
- return NULL;
- if (PyUnicode_Check(res)) {
- PyObject* str;
- str = _PyUnicode_AsDefaultEncodedString(res, NULL);
- Py_XINCREF(str);
- Py_DECREF(res);
- if (str)
- res = str;
- else
- return NULL;
- }
- assert(PyString_Check(res));
- return res;
-}
-
-PyObject *
-PyObject_Unicode(PyObject *v)
-{
- PyObject *res;
- PyObject *func;
- PyObject *str;
- static PyObject *unicodestr;
-
- if (v == NULL)
- return PyUnicode_FromString("<NULL>");
- else if (PyUnicode_CheckExact(v)) {
- Py_INCREF(v);
- return v;
- }
- /* XXX As soon as we have a tp_unicode slot, we should
- check this before trying the __unicode__
- method. */
- if (unicodestr == NULL) {
- unicodestr= PyUnicode_InternFromString("__unicode__");
- if (unicodestr == NULL)
- return NULL;
- }
- func = PyObject_GetAttr(v, unicodestr);
- if (func != NULL) {
- res = PyEval_CallObject(func, (PyObject *)NULL);
- Py_DECREF(func);
- }
- else {
- PyErr_Clear();
- if (PyUnicode_Check(v) &&
- v->ob_type->tp_str == PyUnicode_Type.tp_str) {
- /* For a Unicode subtype that's didn't overwrite
- __unicode__ or __str__,
- return a true Unicode object with the same data. */
- return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(v),
- PyUnicode_GET_SIZE(v));
- }
- if (PyString_CheckExact(v)) {
- Py_INCREF(v);
- res = v;
- }
- else {
- if (Py_Type(v)->tp_str != NULL)
- res = (*Py_Type(v)->tp_str)(v);
- else
- res = PyObject_Repr(v);
- }
- }
- if (res == NULL)
- return NULL;
- if (!PyUnicode_Check(res)) {
- str = PyUnicode_FromEncodedObject(res, NULL, "strict");
- Py_DECREF(res);
- res = str;
- }
- return res;
-}
-
/* The new comparison philosophy is: we completely separate three-way
comparison from rich comparison. That is, PyObject_Compare() and
PyObject_Cmp() *just* use the tp_compare slot. And PyObject_RichCompare()
and PyObject_RichCompareBool() *just* use the tp_richcompare slot.
-
+
See (*) below for practical amendments.
IOW, only cmp() uses tp_compare; the comparison operators (==, !=, <=, <,
@@ -580,7 +486,7 @@ do_compare(PyObject *v, PyObject *w)
cmpfunc f;
int ok;
- if (v->ob_type == w->ob_type &&
+ if (v->ob_type == w->ob_type &&
(f = v->ob_type->tp_compare) != NULL) {
return (*f)(v, w);
}
@@ -738,25 +644,25 @@ Py_CmpToRich(int op, int cmp)
return NULL;
switch (op) {
case Py_LT:
- ok = cmp < 0;
+ ok = cmp < 0;
break;
case Py_LE:
- ok = cmp <= 0;
+ ok = cmp <= 0;
break;
case Py_EQ:
- ok = cmp == 0;
+ ok = cmp == 0;
break;
case Py_NE:
- ok = cmp != 0;
+ ok = cmp != 0;
break;
- case Py_GT:
- ok = cmp > 0;
+ case Py_GT:
+ ok = cmp > 0;
break;
case Py_GE:
- ok = cmp >= 0;
+ ok = cmp >= 0;
break;
default:
- PyErr_BadArgument();
+ PyErr_BadArgument();
return NULL;
}
res = ok ? Py_True : Py_False;
@@ -1335,10 +1241,10 @@ _dir_locals(void)
}
/* Helper for PyObject_Dir of type objects: returns __dict__ and __bases__.
- We deliberately don't suck up its __class__, as methods belonging to the
- metaclass would probably be more confusing than helpful.
+ We deliberately don't suck up its __class__, as methods belonging to the
+ metaclass would probably be more confusing than helpful.
*/
-static PyObject *
+static PyObject *
_specialized_dir_type(PyObject *obj)
{
PyObject *result = NULL;
@@ -1381,7 +1287,7 @@ _generic_dir(PyObject *obj)
PyObject *result = NULL;
PyObject *dict = NULL;
PyObject *itsclass = NULL;
-
+
/* Get __dict__ (which may or may not be a real dict...) */
dict = PyObject_GetAttrString(obj, "__dict__");
if (dict == NULL) {
@@ -1486,7 +1392,7 @@ PyObject_Dir(PyObject *obj)
Py_DECREF(result);
result = NULL;
}
-
+
return result;
}
diff --git a/Objects/stringlib/transmogrify.h b/Objects/stringlib/transmogrify.h
index 1ee8e75..fe478c3 100644
--- a/Objects/stringlib/transmogrify.h
+++ b/Objects/stringlib/transmogrify.h
@@ -12,7 +12,7 @@
shared code in bytes_methods.c to cut down on duplicate code bloat. */
PyDoc_STRVAR(expandtabs__doc__,
-"B.expandtabs([tabsize]) -> modified copy of B\n\
+"B.expandtabs([tabsize]) -> copy of B\n\
\n\
Return a copy of B where all tab characters are expanded using spaces.\n\
If tabsize is not given, a tab size of 8 characters is assumed.");
@@ -133,7 +133,7 @@ pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
}
PyDoc_STRVAR(ljust__doc__,
-"B.ljust(width[, fillchar]) -> modified copy of B\n"
+"B.ljust(width[, fillchar]) -> copy of B\n"
"\n"
"Return B left justified in a string of length width. Padding is\n"
"done using the specified fill character (default is a space).");
@@ -163,7 +163,7 @@ stringlib_ljust(PyObject *self, PyObject *args)
PyDoc_STRVAR(rjust__doc__,
-"B.rjust(width[, fillchar]) -> modified copy of B\n"
+"B.rjust(width[, fillchar]) -> copy of B\n"
"\n"
"Return B right justified in a string of length width. Padding is\n"
"done using the specified fill character (default is a space)");
@@ -193,10 +193,10 @@ stringlib_rjust(PyObject *self, PyObject *args)
PyDoc_STRVAR(center__doc__,
-"B.center(width[, fillchar]) -> modified copy of B\n"
+"B.center(width[, fillchar]) -> copy of B\n"
"\n"
-"Return B centered in a string of length width. Padding is\n"
-"done using the specified fill character (default is a space)");
+"Return B centered in a string of length width. Padding is\n"
+"done using the specified fill character (default is a space).");
static PyObject *
stringlib_center(PyObject *self, PyObject *args)
@@ -226,7 +226,7 @@ stringlib_center(PyObject *self, PyObject *args)
}
PyDoc_STRVAR(zfill__doc__,
-"B.zfill(width) -> modified copy of B\n"
+"B.zfill(width) -> copy of B\n"
"\n"
"Pad a numeric string B with zeros on the left, to fill a field\n"
"of the specified width. B is never truncated.");
diff --git a/Objects/stringobject.c b/Objects/stringobject.c
index 3dd1051..8761477 100644
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -1,11 +1,32 @@
/* String object implementation */
+/* XXX This is now called 'bytes' as far as the user is concerned.
+ Many docstrings and error messages need to be cleaned up. */
+
#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include "bytes_methods.h"
+static Py_ssize_t
+_getbuffer(PyObject *obj, Py_buffer *view)
+{
+ PyBufferProcs *buffer = Py_Type(obj)->tp_as_buffer;
+
+ if (buffer == NULL || buffer->bf_getbuffer == NULL)
+ {
+ PyErr_Format(PyExc_TypeError,
+ "Type %.100s doesn't support the buffer API",
+ Py_Type(obj)->tp_name);
+ return -1;
+ }
+
+ if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
+ return -1;
+ return view->len;
+}
+
#ifdef COUNT_ALLOCS
int null_strings, one_strings;
#endif
@@ -13,16 +34,6 @@ int null_strings, one_strings;
static PyStringObject *characters[UCHAR_MAX + 1];
static PyStringObject *nullstring;
-/* This dictionary holds all interned strings. Note that references to
- strings in this dictionary are *not* counted in the string's ob_refcnt.
- When the interned string reaches a refcnt of 0 the string deallocation
- function will delete the reference from this dictionary.
-
- Another way to look at this is that to say that the actual reference
- count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
-*/
-static PyObject *interned;
-
/*
For both PyString_FromString() and PyString_FromStringAndSize(), the
parameter `size' denotes number of characters to allocate, not counting any
@@ -77,21 +88,14 @@ PyString_FromStringAndSize(const char *str, Py_ssize_t size)
return PyErr_NoMemory();
PyObject_INIT_VAR(op, &PyString_Type, size);
op->ob_shash = -1;
- op->ob_sstate = SSTATE_NOT_INTERNED;
if (str != NULL)
Py_MEMCPY(op->ob_sval, str, size);
op->ob_sval[size] = '\0';
/* share short strings */
if (size == 0) {
- PyObject *t = (PyObject *)op;
- PyString_InternInPlace(&t);
- op = (PyStringObject *)t;
nullstring = op;
Py_INCREF(op);
} else if (size == 1 && str != NULL) {
- PyObject *t = (PyObject *)op;
- PyString_InternInPlace(&t);
- op = (PyStringObject *)t;
characters[*str & UCHAR_MAX] = op;
Py_INCREF(op);
}
@@ -132,19 +136,12 @@ PyString_FromString(const char *str)
return PyErr_NoMemory();
PyObject_INIT_VAR(op, &PyString_Type, size);
op->ob_shash = -1;
- op->ob_sstate = SSTATE_NOT_INTERNED;
Py_MEMCPY(op->ob_sval, str, size+1);
/* share short strings */
if (size == 0) {
- PyObject *t = (PyObject *)op;
- PyString_InternInPlace(&t);
- op = (PyStringObject *)t;
nullstring = op;
Py_INCREF(op);
} else if (size == 1) {
- PyObject *t = (PyObject *)op;
- PyString_InternInPlace(&t);
- op = (PyStringObject *)t;
characters[*str & UCHAR_MAX] = op;
Py_INCREF(op);
}
@@ -351,174 +348,9 @@ PyString_FromFormat(const char *format, ...)
return ret;
}
-
-PyObject *PyString_Decode(const char *s,
- Py_ssize_t size,
- const char *encoding,
- const char *errors)
-{
- PyObject *v, *str;
-
- str = PyString_FromStringAndSize(s, size);
- if (str == NULL)
- return NULL;
- v = PyString_AsDecodedString(str, encoding, errors);
- Py_DECREF(str);
- return v;
-}
-
-PyObject *PyString_AsDecodedObject(PyObject *str,
- const char *encoding,
- const char *errors)
-{
- PyObject *v;
-
- if (!PyString_Check(str)) {
- PyErr_BadArgument();
- goto onError;
- }
-
- if (encoding == NULL) {
- encoding = PyUnicode_GetDefaultEncoding();
- }
-
- /* Decode via the codec registry */
- v = PyCodec_Decode(str, encoding, errors);
- if (v == NULL)
- goto onError;
-
- return v;
-
- onError:
- return NULL;
-}
-
-PyObject *PyString_AsDecodedString(PyObject *str,
- const char *encoding,
- const char *errors)
-{
- PyObject *v;
-
- v = PyString_AsDecodedObject(str, encoding, errors);
- if (v == NULL)
- goto onError;
-
- /* Convert Unicode to a string using the default encoding */
- if (PyUnicode_Check(v)) {
- PyObject *temp = v;
- v = PyUnicode_AsEncodedString(v, NULL, NULL);
- Py_DECREF(temp);
- if (v == NULL)
- goto onError;
- }
- if (!PyString_Check(v)) {
- PyErr_Format(PyExc_TypeError,
- "decoder did not return a string object (type=%.400s)",
- Py_Type(v)->tp_name);
- Py_DECREF(v);
- goto onError;
- }
-
- return v;
-
- onError:
- return NULL;
-}
-
-PyObject *PyString_Encode(const char *s,
- Py_ssize_t size,
- const char *encoding,
- const char *errors)
-{
- PyObject *v, *str;
-
- str = PyString_FromStringAndSize(s, size);
- if (str == NULL)
- return NULL;
- v = PyString_AsEncodedString(str, encoding, errors);
- Py_DECREF(str);
- return v;
-}
-
-PyObject *PyString_AsEncodedObject(PyObject *str,
- const char *encoding,
- const char *errors)
-{
- PyObject *v;
-
- if (!PyString_Check(str)) {
- PyErr_BadArgument();
- goto onError;
- }
-
- if (encoding == NULL) {
- encoding = PyUnicode_GetDefaultEncoding();
- }
-
- /* Encode via the codec registry */
- v = PyCodec_Encode(str, encoding, errors);
- if (v == NULL)
- goto onError;
-
- return v;
-
- onError:
- return NULL;
-}
-
-PyObject *PyString_AsEncodedString(PyObject *str,
- const char *encoding,
- const char *errors)
-{
- PyObject *v;
-
- v = PyString_AsEncodedObject(str, encoding, errors);
- if (v == NULL)
- goto onError;
-
- /* Convert Unicode to a string using the default encoding */
- if (PyUnicode_Check(v)) {
- PyObject *temp = v;
- v = PyUnicode_AsEncodedString(v, NULL, NULL);
- Py_DECREF(temp);
- if (v == NULL)
- goto onError;
- }
- if (!PyString_Check(v)) {
- PyErr_Format(PyExc_TypeError,
- "encoder did not return a string object (type=%.400s)",
- Py_Type(v)->tp_name);
- Py_DECREF(v);
- goto onError;
- }
-
- return v;
-
- onError:
- return NULL;
-}
-
static void
string_dealloc(PyObject *op)
{
- switch (PyString_CHECK_INTERNED(op)) {
- case SSTATE_NOT_INTERNED:
- break;
-
- case SSTATE_INTERNED_MORTAL:
- /* revive dead object temporarily for DelItem */
- Py_Refcnt(op) = 3;
- if (PyDict_DelItem(interned, op) != 0)
- Py_FatalError(
- "deletion of interned string failed");
- break;
-
- case SSTATE_INTERNED_IMMORTAL:
- Py_FatalError("Immortal interned string died.");
-
- default:
- Py_FatalError("Inconsistent interned string state.");
- }
Py_Type(op)->tp_free(op);
}
@@ -577,7 +409,7 @@ PyObject *PyString_DecodeEscape(const char *s,
continue;
}
s++;
- if (s==end) {
+ if (s==end) {
PyErr_SetString(PyExc_ValueError,
"Trailing \\ in string");
goto failed;
@@ -639,8 +471,8 @@ PyObject *PyString_DecodeEscape(const char *s,
/* do nothing */;
else {
PyErr_Format(PyExc_ValueError,
- "decoding error; "
- "unknown error handling code: %.400s",
+ "decoding error; unknown "
+ "error handling code: %.400s",
errors);
goto failed;
}
@@ -665,8 +497,8 @@ PyObject *PyString_DecodeEscape(const char *s,
static Py_ssize_t
string_getsize(register PyObject *op)
{
- char *s;
- Py_ssize_t len;
+ char *s;
+ Py_ssize_t len;
if (PyString_AsStringAndSize(op, &s, &len))
return -1;
return len;
@@ -675,8 +507,8 @@ string_getsize(register PyObject *op)
static /*const*/ char *
string_getbuffer(register PyObject *op)
{
- char *s;
- Py_ssize_t len;
+ char *s;
+ Py_ssize_t len;
if (PyString_AsStringAndSize(op, &s, &len))
return NULL;
return s;
@@ -753,7 +585,7 @@ PyString_AsStringAndSize(register PyObject *obj,
#define STRINGLIB_LEN PyString_GET_SIZE
#define STRINGLIB_NEW PyString_FromStringAndSize
#define STRINGLIB_STR PyString_AS_STRING
-#define STRINGLIB_WANT_CONTAINS_OBJ 1
+/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */
#define STRINGLIB_EMPTY nullstring
#define STRINGLIB_CHECK_EXACT PyString_CheckExact
@@ -773,12 +605,12 @@ PyString_Repr(PyObject *obj, int smartquotes)
{
static const char *hexdigits = "0123456789abcdef";
register PyStringObject* op = (PyStringObject*) obj;
- Py_ssize_t length = PyString_GET_SIZE(op);
- size_t newsize = 3 + 4 * Py_Size(op);
+ Py_ssize_t length = Py_Size(op);
+ size_t newsize = 3 + 4 * length;
PyObject *v;
- if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(op)) {
+ if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
PyErr_SetString(PyExc_OverflowError,
- "string is too large to make repr");
+ "bytes object is too large to make repr");
}
v = PyUnicode_FromUnicode(NULL, newsize);
if (v == NULL) {
@@ -790,14 +622,14 @@ PyString_Repr(PyObject *obj, int smartquotes)
register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
int quote;
- /* figure out which quote to use; single is preferred */
+ /* Figure out which quote to use; single is preferred */
quote = '\'';
if (smartquotes) {
char *test, *start;
start = PyString_AS_STRING(op);
for (test = start; test < start+length; ++test) {
if (*test == '"') {
- quote = '\''; /* switch back to single quote */
+ quote = '\''; /* back to single */
goto decided;
}
else if (*test == '\'')
@@ -807,8 +639,8 @@ PyString_Repr(PyObject *obj, int smartquotes)
;
}
- *p++ = 's', *p++ = quote;
- for (i = 0; i < Py_Size(op); i++) {
+ *p++ = 'b', *p++ = quote;
+ for (i = 0; i < length; i++) {
/* There's at least enough room for a hex escape
and a closing quote. */
assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
@@ -848,18 +680,14 @@ string_repr(PyObject *op)
}
static PyObject *
-string_str(PyObject *s)
+string_str(PyObject *op)
{
- assert(PyString_Check(s));
- if (PyString_CheckExact(s)) {
- Py_INCREF(s);
- return s;
- }
- else {
- /* Subtype -- return genuine string with the same value. */
- PyStringObject *t = (PyStringObject *) s;
- return PyString_FromStringAndSize(t->ob_sval, Py_Size(t));
+ if (Py_BytesWarningFlag) {
+ if (PyErr_WarnEx(PyExc_BytesWarning,
+ "str() on a bytes instance", 1))
+ return NULL;
}
+ return string_repr(op);
}
static Py_ssize_t
@@ -868,51 +696,53 @@ string_length(PyStringObject *a)
return Py_Size(a);
}
+/* This is also used by PyString_Concat() */
static PyObject *
-string_concat(register PyStringObject *a, register PyObject *bb)
+string_concat(PyObject *a, PyObject *b)
{
- register Py_ssize_t size;
- register PyStringObject *op;
- if (!PyString_Check(bb)) {
- if (PyUnicode_Check(bb))
- return PyUnicode_Concat((PyObject *)a, bb);
- if (PyBytes_Check(bb))
- return PyBytes_Concat((PyObject *)a, bb);
- PyErr_Format(PyExc_TypeError,
- "cannot concatenate 'str8' and '%.200s' objects",
- Py_Type(bb)->tp_name);
- return NULL;
+ Py_ssize_t size;
+ Py_buffer va, vb;
+ PyObject *result = NULL;
+
+ va.len = -1;
+ vb.len = -1;
+ if (_getbuffer(a, &va) < 0 ||
+ _getbuffer(b, &vb) < 0) {
+ PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
+ Py_Type(a)->tp_name, Py_Type(b)->tp_name);
+ goto done;
}
-#define b ((PyStringObject *)bb)
- /* Optimize cases with empty left or right operand */
- if ((Py_Size(a) == 0 || Py_Size(b) == 0) &&
- PyString_CheckExact(a) && PyString_CheckExact(b)) {
- if (Py_Size(a) == 0) {
- Py_INCREF(bb);
- return bb;
- }
- Py_INCREF(a);
- return (PyObject *)a;
+
+ /* Optimize end cases */
+ if (va.len == 0 && PyString_CheckExact(b)) {
+ result = b;
+ Py_INCREF(result);
+ goto done;
+ }
+ if (vb.len == 0 && PyString_CheckExact(a)) {
+ result = a;
+ Py_INCREF(result);
+ goto done;
}
- size = Py_Size(a) + Py_Size(b);
+
+ size = va.len + vb.len;
if (size < 0) {
- PyErr_SetString(PyExc_OverflowError,
- "strings are too large to concat");
- return NULL;
+ PyErr_NoMemory();
+ goto done;
}
- /* Inline PyObject_NewVar */
- op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
- if (op == NULL)
- return PyErr_NoMemory();
- PyObject_INIT_VAR(op, &PyString_Type, size);
- op->ob_shash = -1;
- op->ob_sstate = SSTATE_NOT_INTERNED;
- Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
- Py_MEMCPY(op->ob_sval + Py_Size(a), b->ob_sval, Py_Size(b));
- op->ob_sval[size] = '\0';
- return (PyObject *) op;
-#undef b
+ result = PyString_FromStringAndSize(NULL, size);
+ if (result != NULL) {
+ memcpy(PyString_AS_STRING(result), va.buf, va.len);
+ memcpy(PyString_AS_STRING(result) + va.len, vb.buf, vb.len);
+ }
+
+ done:
+ if (va.len != -1)
+ PyObject_ReleaseBuffer(a, &va);
+ if (vb.len != -1)
+ PyObject_ReleaseBuffer(b, &vb);
+ return result;
}
static PyObject *
@@ -950,7 +780,6 @@ string_repeat(register PyStringObject *a, register Py_ssize_t n)
return PyErr_NoMemory();
PyObject_INIT_VAR(op, &PyString_Type, size);
op->ob_shash = -1;
- op->ob_sstate = SSTATE_NOT_INTERNED;
op->ob_sval[size] = '\0';
if (Py_Size(a) == 1 && n > 0) {
memset(op->ob_sval, a->ob_sval[0] , n);
@@ -970,20 +799,36 @@ string_repeat(register PyStringObject *a, register Py_ssize_t n)
}
static int
-string_contains(PyObject *str_obj, PyObject *sub_obj)
+string_contains(PyObject *self, PyObject *arg)
+{
+ Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
+ if (ival == -1 && PyErr_Occurred()) {
+ Py_buffer varg;
+ int pos;
+ PyErr_Clear();
+ if (_getbuffer(arg, &varg) < 0)
+ return -1;
+ pos = stringlib_find(PyString_AS_STRING(self), Py_Size(self),
+ varg.buf, varg.len, 0);
+ PyObject_ReleaseBuffer(arg, &varg);
+ return pos >= 0;
+ }
+ if (ival < 0 || ival >= 256) {
+ PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
+ return -1;
+ }
+
+ return memchr(PyString_AS_STRING(self), ival, Py_Size(self)) != NULL;
+}
+
+static PyObject *
+string_item(PyStringObject *a, register Py_ssize_t i)
{
- if (!PyString_CheckExact(sub_obj)) {
- if (PyUnicode_Check(sub_obj))
- return PyUnicode_Contains(str_obj, sub_obj);
- if (!PyString_Check(sub_obj)) {
- PyErr_Format(PyExc_TypeError,
- "'in <string>' requires string as left operand, "
- "not %.200s", Py_Type(sub_obj)->tp_name);
- return -1;
- }
+ if (i < 0 || i >= Py_Size(a)) {
+ PyErr_SetString(PyExc_IndexError, "string index out of range");
+ return NULL;
}
-
- return stringlib_contains_obj(str_obj, sub_obj);
+ return PyInt_FromLong((unsigned char)a->ob_sval[i]);
}
static PyObject*
@@ -996,6 +841,15 @@ string_richcompare(PyStringObject *a, PyStringObject *b, int op)
/* Make sure both arguments are strings. */
if (!(PyString_Check(a) && PyString_Check(b))) {
+ if (Py_BytesWarningFlag && (op == Py_EQ) &&
+ (PyObject_IsInstance((PyObject*)a,
+ (PyObject*)&PyUnicode_Type) ||
+ PyObject_IsInstance((PyObject*)b,
+ (PyObject*)&PyUnicode_Type))) {
+ if (PyErr_WarnEx(PyExc_BytesWarning,
+ "Comparsion between bytes and string", 1))
+ return NULL;
+ }
result = Py_NotImplemented;
goto out;
}
@@ -1053,9 +907,9 @@ _PyString_Eq(PyObject *o1, PyObject *o2)
{
PyStringObject *a = (PyStringObject*) o1;
PyStringObject *b = (PyStringObject*) o2;
- return Py_Size(a) == Py_Size(b)
- && *a->ob_sval == *b->ob_sval
- && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
+ return Py_Size(a) == Py_Size(b)
+ && *a->ob_sval == *b->ob_sval
+ && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
}
static long
@@ -1088,12 +942,12 @@ string_subscript(PyStringObject* self, PyObject* item)
return NULL;
if (i < 0)
i += PyString_GET_SIZE(self);
- if (i < 0 || i >= PyString_GET_SIZE(self)) {
+ if (i < 0 || i >= PyString_GET_SIZE(self)) {
PyErr_SetString(PyExc_IndexError,
"string index out of range");
return NULL;
- }
- return PyInt_FromLong((unsigned char)self->ob_sval[i]);
+ }
+ return PyInt_FromLong((unsigned char)self->ob_sval[i]);
}
else if (PySlice_Check(item)) {
Py_ssize_t start, stop, step, slicelength, cur, i;
@@ -1149,14 +1003,15 @@ string_subscript(PyStringObject* self, PyObject* item)
static int
string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
{
- return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_Size(self), 0, flags);
+ return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_Size(self),
+ 0, flags);
}
static PySequenceMethods string_as_sequence = {
(lenfunc)string_length, /*sq_length*/
(binaryfunc)string_concat, /*sq_concat*/
(ssizeargfunc)string_repeat, /*sq_repeat*/
- 0, /*sq_item*/
+ (ssizeargfunc)string_item, /*sq_item*/
0, /*sq_slice*/
0, /*sq_ass_item*/
0, /*sq_ass_slice*/
@@ -1171,7 +1026,7 @@ static PyMappingMethods string_as_mapping = {
static PyBufferProcs string_as_buffer = {
(getbufferproc)string_buffer_getbuffer,
- NULL,
+ NULL,
};
@@ -1297,12 +1152,12 @@ split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
}
PyDoc_STRVAR(split__doc__,
-"S.split([sep [,maxsplit]]) -> list of strings\n\
+"B.split([sep[, maxsplit]]) -> list of bytes\n\
\n\
-Return a list of the words in the string S, using sep as the\n\
-delimiter string. If maxsplit is given, at most maxsplit\n\
-splits are done. If sep is not specified or is None, any\n\
-whitespace string is a separator.");
+Return a list of the sections in B, using sep as the delimiter.\n\
+If sep is not given, B is split on ASCII whitespace characters\n\
+(space, tab, return, newline, formfeed, vertical tab).\n\
+If maxsplit is given, at most maxsplit splits are done.");
static PyObject *
string_split(PyStringObject *self, PyObject *args)
@@ -1310,6 +1165,7 @@ string_split(PyStringObject *self, PyObject *args)
Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Py_ssize_t maxsplit = -1, count=0;
const char *s = PyString_AS_STRING(self), *sub;
+ Py_buffer vsub;
PyObject *list, *str, *subobj = Py_None;
#ifdef USE_FAST
Py_ssize_t pos;
@@ -1321,25 +1177,27 @@ string_split(PyStringObject *self, PyObject *args)
maxsplit = PY_SSIZE_T_MAX;
if (subobj == Py_None)
return split_whitespace(s, len, maxsplit);
- if (PyString_Check(subobj)) {
- sub = PyString_AS_STRING(subobj);
- n = PyString_GET_SIZE(subobj);
- }
- else if (PyUnicode_Check(subobj))
- return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
- else if (PyObject_AsCharBuffer(subobj, &sub, &n))
+ if (_getbuffer(subobj, &vsub) < 0)
return NULL;
+ sub = vsub.buf;
+ n = vsub.len;
if (n == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator");
+ PyObject_ReleaseBuffer(subobj, &vsub);
return NULL;
}
- else if (n == 1)
- return split_char(s, len, sub[0], maxsplit);
+ else if (n == 1) {
+ char ch = sub[0];
+ PyObject_ReleaseBuffer(subobj, &vsub);
+ return split_char(s, len, ch, maxsplit);
+ }
list = PyList_New(PREALLOC_SIZE(maxsplit));
- if (list == NULL)
+ if (list == NULL) {
+ PyObject_ReleaseBuffer(subobj, &vsub);
return NULL;
+ }
#ifdef USE_FAST
i = j = 0;
@@ -1365,19 +1223,21 @@ string_split(PyStringObject *self, PyObject *args)
#endif
SPLIT_ADD(s, i, len);
FIX_PREALLOC_SIZE(list);
+ PyObject_ReleaseBuffer(subobj, &vsub);
return list;
onError:
Py_DECREF(list);
+ PyObject_ReleaseBuffer(subobj, &vsub);
return NULL;
}
PyDoc_STRVAR(partition__doc__,
-"S.partition(sep) -> (head, sep, tail)\n\
+"B.partition(sep) -> (head, sep, tail)\n\
\n\
-Searches for the separator sep in S, and returns the part before it,\n\
+Searches for the separator sep in B, and returns the part before it,\n\
the separator itself, and the part after it. If the separator is not\n\
-found, returns S and two empty strings.");
+found, returns B and two empty bytes objects.");
static PyObject *
string_partition(PyStringObject *self, PyObject *sep_obj)
@@ -1402,11 +1262,12 @@ string_partition(PyStringObject *self, PyObject *sep_obj)
}
PyDoc_STRVAR(rpartition__doc__,
-"S.rpartition(sep) -> (tail, sep, head)\n\
+"B.rpartition(sep) -> (tail, sep, head)\n\
\n\
-Searches for the separator sep in S, starting at the end of S, and returns\n\
-the part before it, the separator itself, and the part after it. If the\n\
-separator is not found, returns two empty strings and S.");
+Searches for the separator sep in B, starting at the end of B,\n\
+and returns the part before it, the separator itself, and the\n\
+part after it. If the separator is not found, returns two empty\n\
+bytes objects and B.");
static PyObject *
string_rpartition(PyStringObject *self, PyObject *sep_obj)
@@ -1450,8 +1311,8 @@ rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
SPLIT_ADD(s, i + 1, j + 1);
}
if (i >= 0) {
- /* Only occurs when maxsplit was reached */
- /* Skip any remaining whitespace and copy to beginning of string */
+ /* Only occurs when maxsplit was reached. Skip any remaining
+ whitespace and copy to beginning of string. */
RSKIP_SPACE(s, i);
if (i >= 0)
SPLIT_ADD(s, 0, i + 1);
@@ -1500,13 +1361,14 @@ rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
}
PyDoc_STRVAR(rsplit__doc__,
-"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
+"B.rsplit([sep[, maxsplit]]) -> list of strings\n\
\n\
-Return a list of the words in the string S, using sep as the\n\
-delimiter string, starting at the end of the string and working\n\
-to the front. If maxsplit is given, at most maxsplit splits are\n\
-done. If sep is not specified or is None, any whitespace string\n\
-is a separator.");
+Return a list of the sections in B, using sep as the delimiter,\n\
+starting at the end of B and working to the front.\n\
+If sep is not given, B is split on ASCII whitespace characters\n\
+(space, tab, return, newline, formfeed, vertical tab).\n\
+If maxsplit is given, at most maxsplit splits are done.");
+
static PyObject *
string_rsplit(PyStringObject *self, PyObject *args)
@@ -1514,6 +1376,7 @@ string_rsplit(PyStringObject *self, PyObject *args)
Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Py_ssize_t maxsplit = -1, count=0;
const char *s = PyString_AS_STRING(self), *sub;
+ Py_buffer vsub;
PyObject *list, *str, *subobj = Py_None;
if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
@@ -1522,25 +1385,27 @@ string_rsplit(PyStringObject *self, PyObject *args)
maxsplit = PY_SSIZE_T_MAX;
if (subobj == Py_None)
return rsplit_whitespace(s, len, maxsplit);
- if (PyString_Check(subobj)) {
- sub = PyString_AS_STRING(subobj);
- n = PyString_GET_SIZE(subobj);
- }
- else if (PyUnicode_Check(subobj))
- return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
- else if (PyObject_AsCharBuffer(subobj, &sub, &n))
+ if (_getbuffer(subobj, &vsub) < 0)
return NULL;
+ sub = vsub.buf;
+ n = vsub.len;
if (n == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator");
+ PyObject_ReleaseBuffer(subobj, &vsub);
return NULL;
}
- else if (n == 1)
- return rsplit_char(s, len, sub[0], maxsplit);
+ else if (n == 1) {
+ char ch = sub[0];
+ PyObject_ReleaseBuffer(subobj, &vsub);
+ return rsplit_char(s, len, ch, maxsplit);
+ }
list = PyList_New(PREALLOC_SIZE(maxsplit));
- if (list == NULL)
+ if (list == NULL) {
+ PyObject_ReleaseBuffer(subobj, &vsub);
return NULL;
+ }
j = len;
i = j - n;
@@ -1559,10 +1424,12 @@ string_rsplit(PyStringObject *self, PyObject *args)
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
+ PyObject_ReleaseBuffer(subobj, &vsub);
return list;
onError:
Py_DECREF(list);
+ PyObject_ReleaseBuffer(subobj, &vsub);
return NULL;
}
@@ -1572,13 +1439,13 @@ onError:
PyDoc_STRVAR(join__doc__,
-"S.join(sequence) -> string\n\
+"B.join(iterable_of_bytes) -> bytes\n\
\n\
-Return a string which is the concatenation of the strings in the\n\
-sequence. The separator between elements is S.");
+Concatenates any number of bytes objects, with B in between each pair.\n\
+Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
static PyObject *
-string_join(PyStringObject *self, PyObject *orig)
+string_join(PyObject *self, PyObject *orig)
{
char *sep = PyString_AS_STRING(self);
const Py_ssize_t seplen = PyString_GET_SIZE(self);
@@ -1601,7 +1468,7 @@ string_join(PyStringObject *self, PyObject *orig)
}
if (seqlen == 1) {
item = PySequence_Fast_GET_ITEM(seq, 0);
- if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
+ if (PyString_CheckExact(item)) {
Py_INCREF(item);
Py_DECREF(seq);
return item;
@@ -1611,37 +1478,26 @@ string_join(PyStringObject *self, PyObject *orig)
/* There are at least two things to join, or else we have a subclass
* of the builtin types in the sequence.
* Do a pre-pass to figure out the total amount of space we'll
- * need (sz), see whether any argument is absurd, and defer to
- * the Unicode join if appropriate.
+ * need (sz), and see whether all argument are bytes.
*/
+ /* XXX Shouldn't we use _getbuffer() on these items instead? */
for (i = 0; i < seqlen; i++) {
const size_t old_sz = sz;
item = PySequence_Fast_GET_ITEM(seq, i);
- if (!PyString_Check(item)){
- if (PyUnicode_Check(item)) {
- /* Defer to Unicode join.
- * CAUTION: There's no gurantee that the
- * original sequence can be iterated over
- * again, so we must pass seq here.
- */
- PyObject *result;
- result = PyUnicode_Join((PyObject *)self, seq);
- Py_DECREF(seq);
- return result;
- }
+ if (!PyString_Check(item) && !PyBytes_Check(item)) {
PyErr_Format(PyExc_TypeError,
- "sequence item %zd: expected string,"
+ "sequence item %zd: expected bytes,"
" %.80s found",
i, Py_Type(item)->tp_name);
Py_DECREF(seq);
return NULL;
}
- sz += PyString_GET_SIZE(item);
+ sz += Py_Size(item);
if (i != 0)
sz += seplen;
if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
PyErr_SetString(PyExc_OverflowError,
- "join() result is too long for a Python string");
+ "join() result is too long for a Python string");
Py_DECREF(seq);
return NULL;
}
@@ -1655,17 +1511,24 @@ string_join(PyStringObject *self, PyObject *orig)
}
/* Catenate everything. */
+ /* I'm not worried about a PyBytes item growing because there's
+ nowhere in this function where we release the GIL. */
p = PyString_AS_STRING(res);
for (i = 0; i < seqlen; ++i) {
size_t n;
- item = PySequence_Fast_GET_ITEM(seq, i);
- n = PyString_GET_SIZE(item);
- Py_MEMCPY(p, PyString_AS_STRING(item), n);
- p += n;
- if (i < seqlen - 1) {
+ char *q;
+ if (i) {
Py_MEMCPY(p, sep, seplen);
p += seplen;
}
+ item = PySequence_Fast_GET_ITEM(seq, i);
+ n = Py_Size(item);
+ if (PyString_Check(item))
+ q = PyString_AS_STRING(item);
+ else
+ q = PyBytes_AS_STRING(item);
+ Py_MEMCPY(p, q, n);
+ p += n;
}
Py_DECREF(seq);
@@ -1677,7 +1540,7 @@ _PyString_Join(PyObject *sep, PyObject *x)
{
assert(sep != NULL && PyString_Check(sep));
assert(x != NULL);
- return string_join((PyStringObject *)sep, x);
+ return string_join(sep, x);
}
Py_LOCAL_INLINE(void)
@@ -1730,7 +1593,7 @@ string_find_internal(PyStringObject *self, PyObject *args, int dir)
PyDoc_STRVAR(find__doc__,
-"S.find(sub [,start [,end]]) -> int\n\
+"B.find(sub [,start [,end]]) -> int\n\
\n\
Return the lowest index in S where substring sub is found,\n\
such that sub is contained within s[start:end]. Optional\n\
@@ -1749,9 +1612,9 @@ string_find(PyStringObject *self, PyObject *args)
PyDoc_STRVAR(index__doc__,
-"S.index(sub [,start [,end]]) -> int\n\
+"B.index(sub [,start [,end]]) -> int\n\
\n\
-Like S.find() but raise ValueError when the substring is not found.");
+Like B.find() but raise ValueError when the substring is not found.");
static PyObject *
string_index(PyStringObject *self, PyObject *args)
@@ -1769,9 +1632,9 @@ string_index(PyStringObject *self, PyObject *args)
PyDoc_STRVAR(rfind__doc__,
-"S.rfind(sub [,start [,end]]) -> int\n\
+"B.rfind(sub [,start [,end]]) -> int\n\
\n\
-Return the highest index in S where substring sub is found,\n\
+Return the highest index in B where substring sub is found,\n\
such that sub is contained within s[start:end]. Optional\n\
arguments start and end are interpreted as in slice notation.\n\
\n\
@@ -1788,9 +1651,9 @@ string_rfind(PyStringObject *self, PyObject *args)
PyDoc_STRVAR(rindex__doc__,
-"S.rindex(sub [,start [,end]]) -> int\n\
+"B.rindex(sub [,start [,end]]) -> int\n\
\n\
-Like S.rfind() but raise ValueError when the substring is not found.");
+Like B.rfind() but raise ValueError when the substring is not found.");
static PyObject *
string_rindex(PyStringObject *self, PyObject *args)
@@ -1810,12 +1673,18 @@ string_rindex(PyStringObject *self, PyObject *args)
Py_LOCAL_INLINE(PyObject *)
do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
{
+ Py_buffer vsep;
char *s = PyString_AS_STRING(self);
Py_ssize_t len = PyString_GET_SIZE(self);
- char *sep = PyString_AS_STRING(sepobj);
- Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
+ char *sep;
+ Py_ssize_t seplen;
Py_ssize_t i, j;
+ if (_getbuffer(sepobj, &vsep) < 0)
+ return NULL;
+ sep = vsep.buf;
+ seplen = vsep.len;
+
i = 0;
if (striptype != RIGHTSTRIP) {
while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
@@ -1831,6 +1700,8 @@ do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
j++;
}
+ PyObject_ReleaseBuffer(sepobj, &vsep);
+
if (i == 0 && j == len && PyString_CheckExact(self)) {
Py_INCREF(self);
return (PyObject*)self;
@@ -1879,36 +1750,17 @@ do_argstrip(PyStringObject *self, int striptype, PyObject *args)
return NULL;
if (sep != NULL && sep != Py_None) {
- if (PyString_Check(sep))
- return do_xstrip(self, striptype, sep);
- else if (PyUnicode_Check(sep)) {
- PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
- PyObject *res;
- if (uniself==NULL)
- return NULL;
- res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
- striptype, sep);
- Py_DECREF(uniself);
- return res;
- }
- PyErr_Format(PyExc_TypeError,
- "%s arg must be None or string",
- STRIPNAME(striptype));
- return NULL;
+ return do_xstrip(self, striptype, sep);
}
-
return do_strip(self, striptype);
}
PyDoc_STRVAR(strip__doc__,
-"S.strip([chars]) -> string\n\
+"B.strip([bytes]) -> bytes\n\
\n\
-Return a copy of the string S with leading and trailing\n\
-whitespace removed.\n\
-If chars is given and not None, remove characters in chars instead.\n\
-If chars is unicode, S will be converted to unicode before stripping");
-
+Strip leading and trailing bytes contained in the argument.\n\
+If the argument is omitted, strip trailing ASCII whitespace.");
static PyObject *
string_strip(PyStringObject *self, PyObject *args)
{
@@ -1920,12 +1772,10 @@ string_strip(PyStringObject *self, PyObject *args)
PyDoc_STRVAR(lstrip__doc__,
-"S.lstrip([chars]) -> string\n\
+"B.lstrip([bytes]) -> bytes\n\
\n\
-Return a copy of the string S with leading whitespace removed.\n\
-If chars is given and not None, remove characters in chars instead.\n\
-If chars is unicode, S will be converted to unicode before stripping");
-
+Strip leading bytes contained in the argument.\n\
+If the argument is omitted, strip leading ASCII whitespace.");
static PyObject *
string_lstrip(PyStringObject *self, PyObject *args)
{
@@ -1937,12 +1787,10 @@ string_lstrip(PyStringObject *self, PyObject *args)
PyDoc_STRVAR(rstrip__doc__,
-"S.rstrip([chars]) -> string\n\
+"B.rstrip([bytes]) -> bytes\n\
\n\
-Return a copy of the string S with trailing whitespace removed.\n\
-If chars is given and not None, remove characters in chars instead.\n\
-If chars is unicode, S will be converted to unicode before stripping");
-
+Strip trailing bytes contained in the argument.\n\
+If the argument is omitted, strip trailing ASCII whitespace.");
static PyObject *
string_rstrip(PyStringObject *self, PyObject *args)
{
@@ -1954,7 +1802,7 @@ string_rstrip(PyStringObject *self, PyObject *args)
PyDoc_STRVAR(count__doc__,
-"S.count(sub[, start[, end]]) -> int\n\
+"B.count(sub [,start [,end]]) -> int\n\
\n\
Return the number of non-overlapping occurrences of substring sub in\n\
string S[start:end]. Optional arguments start and end are interpreted\n\
@@ -1996,12 +1844,12 @@ string_count(PyStringObject *self, PyObject *args)
PyDoc_STRVAR(translate__doc__,
-"S.translate(table [,deletechars]) -> string\n\
+"B.translate(table[, deletechars]) -> bytes\n\
\n\
-Return a copy of the string S, where all characters occurring\n\
-in the optional argument deletechars are removed, and the\n\
-remaining characters have been mapped through the given\n\
-translation table, which must be a string of length 256.");
+Return a copy of B, where all characters occurring in the\n\
+optional argument deletechars are removed, and the remaining\n\
+characters have been mapped through the given translation\n\
+table, which must be a bytes object of length 256.");
static PyObject *
string_translate(PyStringObject *self, PyObject *args)
@@ -2187,7 +2035,7 @@ findstring(const char *target, Py_ssize_t target_len,
return end;
} else {
for (; start <= end; start++)
- if (Py_STRING_MATCH(target, start, pattern, pattern_len))
+ if (Py_STRING_MATCH(target, start,pattern,pattern_len))
return start;
}
return -1;
@@ -2225,14 +2073,15 @@ countstring(const char *target, Py_ssize_t target_len,
end -= pattern_len;
if (direction < 0) {
for (; (end >= start); end--)
- if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
+ if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
count++;
if (--maxcount <= 0) break;
end -= pattern_len-1;
}
} else {
for (; (start <= end); start++)
- if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
+ if (Py_STRING_MATCH(target, start,
+ pattern, pattern_len)) {
count++;
if (--maxcount <= 0)
break;
@@ -2522,12 +2371,14 @@ replace_single_character(PyStringObject *self,
/* result_len = self_len + count * (to_len-1) */
product = count * (to_len-1);
if (product / (to_len-1) != count) {
- PyErr_SetString(PyExc_OverflowError, "replace string is too long");
+ PyErr_SetString(PyExc_OverflowError,
+ "replace string is too long");
return NULL;
}
result_len = self_len + product;
if (result_len < 0) {
- PyErr_SetString(PyExc_OverflowError, "replace string is too long");
+ PyErr_SetString(PyExc_OverflowError,
+ "replace string is too long");
return NULL;
}
@@ -2590,12 +2441,14 @@ replace_substring(PyStringObject *self,
/* result_len = self_len + count * (to_len-from_len) */
product = count * (to_len-from_len);
if (product / (to_len-from_len) != count) {
- PyErr_SetString(PyExc_OverflowError, "replace string is too long");
+ PyErr_SetString(PyExc_OverflowError,
+ "replace string is too long");
return NULL;
}
result_len = self_len + product;
if (result_len < 0) {
- PyErr_SetString(PyExc_OverflowError, "replace string is too long");
+ PyErr_SetString(PyExc_OverflowError,
+ "replace string is too long");
return NULL;
}
@@ -2675,7 +2528,8 @@ replace(PyStringObject *self,
return replace_delete_single_character(
self, from_s[0], maxcount);
} else {
- return replace_delete_substring(self, from_s, from_len, maxcount);
+ return replace_delete_substring(self, from_s,
+ from_len, maxcount);
}
}
@@ -2690,7 +2544,8 @@ replace(PyStringObject *self,
maxcount);
} else {
return replace_substring_in_place(
- self, from_s, from_len, to_s, to_len, maxcount);
+ self, from_s, from_len, to_s, to_len,
+ maxcount);
}
}
@@ -2700,14 +2555,15 @@ replace(PyStringObject *self,
to_s, to_len, maxcount);
} else {
/* len('from')>=2, len('to')>=1 */
- return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
+ return replace_substring(self, from_s, from_len, to_s, to_len,
+ maxcount);
}
}
PyDoc_STRVAR(replace__doc__,
-"S.replace (old, new[, count]) -> string\n\
+"B.replace(old, new[, count]) -> bytes\n\
\n\
-Return a copy of string S with all occurrences of substring\n\
+Return a copy of B with all occurrences of subsection\n\
old replaced by new. If the optional argument count is\n\
given, only the first count occurrences are replaced.");
@@ -2794,11 +2650,11 @@ _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
PyDoc_STRVAR(startswith__doc__,
-"S.startswith(prefix[, start[, end]]) -> bool\n\
+"B.startswith(prefix [,start [,end]]) -> bool\n\
\n\
-Return True if S starts with the specified prefix, False otherwise.\n\
-With optional start, test S beginning at that position.\n\
-With optional end, stop comparing S at that position.\n\
+Return True if B starts with the specified prefix, False otherwise.\n\
+With optional start, test B beginning at that position.\n\
+With optional end, stop comparing B at that position.\n\
prefix can also be a tuple of strings to try.");
static PyObject *
@@ -2835,11 +2691,11 @@ string_startswith(PyStringObject *self, PyObject *args)
PyDoc_STRVAR(endswith__doc__,
-"S.endswith(suffix[, start[, end]]) -> bool\n\
+"B.endswith(suffix [,start [,end]]) -> bool\n\
\n\
-Return True if S ends with the specified suffix, False otherwise.\n\
-With optional start, test S beginning at that position.\n\
-With optional end, stop comparing S at that position.\n\
+Return True if B ends with the specified suffix, False otherwise.\n\
+With optional start, test B beginning at that position.\n\
+With optional end, stop comparing B at that position.\n\
suffix can also be a tuple of strings to try.");
static PyObject *
@@ -2876,63 +2732,50 @@ string_endswith(PyStringObject *self, PyObject *args)
PyDoc_STRVAR(decode__doc__,
-"S.decode([encoding[,errors]]) -> object\n\
+"B.decode([encoding[, errors]]) -> object\n\
\n\
Decodes S using the codec registered for encoding. encoding defaults\n\
to the default encoding. errors may be given to set a different error\n\
-handling scheme. Default is 'strict' meaning that encoding errors raise\n\
-a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
+handling scheme. Default is 'strict' meaning that encoding errors raise\n\
+a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
as well as any other name registerd with codecs.register_error that is\n\
able to handle UnicodeDecodeErrors.");
static PyObject *
-string_decode(PyStringObject *self, PyObject *args)
+string_decode(PyObject *self, PyObject *args)
{
- char *encoding = NULL;
- char *errors = NULL;
- PyObject *v;
-
- if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
- return NULL;
- v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
- if (v == NULL)
- goto onError;
- if (!PyString_Check(v) && !PyUnicode_Check(v)) {
- PyErr_Format(PyExc_TypeError,
- "decoder did not return a string/unicode object "
- "(type=%.400s)",
- Py_Type(v)->tp_name);
- Py_DECREF(v);
- return NULL;
- }
- return v;
+ const char *encoding = NULL;
+ const char *errors = NULL;
- onError:
- return NULL;
+ if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
+ return NULL;
+ if (encoding == NULL)
+ encoding = PyUnicode_GetDefaultEncoding();
+ return PyCodec_Decode(self, encoding, errors);
}
PyDoc_STRVAR(fromhex_doc,
-"str8.fromhex(string) -> str8\n\
+"bytes.fromhex(string) -> bytes\n\
\n\
-Create a str8 object from a string of hexadecimal numbers.\n\
-Spaces between two numbers are accepted. Example:\n\
-str8.fromhex('10 1112') -> s'\\x10\\x11\\x12'.");
+Create a bytes object from a string of hexadecimal numbers.\n\
+Spaces between two numbers are accepted.\n\
+Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
static int
hex_digit_to_int(Py_UNICODE c)
{
- if (c >= 128)
- return -1;
- if (ISDIGIT(c))
- return c - '0';
- else {
- if (ISUPPER(c))
- c = TOLOWER(c);
- if (c >= 'a' && c <= 'f')
- return c - 'a' + 10;
- }
- return -1;
+ if (c >= 128)
+ return -1;
+ if (ISDIGIT(c))
+ return c - '0';
+ else {
+ if (ISUPPER(c))
+ c = TOLOWER(c);
+ if (c >= 'a' && c <= 'f')
+ return c - 'a' + 10;
+ }
+ return -1;
}
static PyObject *
@@ -2975,7 +2818,7 @@ string_fromhex(PyObject *cls, PyObject *args)
return newstring;
error:
- Py_DECREF(newstring);
+ Py_XDECREF(newstring);
return NULL;
}
@@ -3058,11 +2901,11 @@ string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
const char *errors = NULL;
PyObject *new = NULL;
Py_ssize_t i, size;
- static char *kwlist[] = {"object", "encoding", "errors", 0};
+ static char *kwlist[] = {"source", "encoding", "errors", 0};
if (type != &PyString_Type)
return str_subtype_new(type, args, kwds);
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:str8", kwlist, &x,
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
&encoding, &errors))
return NULL;
if (x == NULL) {
@@ -3085,34 +2928,37 @@ string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
new = PyCodec_Encode(x, encoding, errors);
if (new == NULL)
return NULL;
- /* XXX(gb): must accept bytes here since codecs output bytes
- at the moment */
- if (PyBytes_Check(new)) {
- PyObject *str;
- str = PyString_FromString(PyBytes_AsString(new));
- Py_DECREF(new);
- if (!str)
- return NULL;
- return str;
- }
- if (!PyString_Check(new)) {
- PyErr_Format(PyExc_TypeError,
- "encoder did not return a str8 "
- "object (type=%.400s)",
- Py_Type(new)->tp_name);
- Py_DECREF(new);
- return NULL;
- }
+ assert(PyString_Check(new));
return new;
}
/* If it's not unicode, there can't be encoding or errors */
if (encoding != NULL || errors != NULL) {
PyErr_SetString(PyExc_TypeError,
- "encoding or errors without a string argument");
+ "encoding or errors without a string argument");
return NULL;
}
+ /* Is it an int? */
+ size = PyNumber_AsSsize_t(x, PyExc_ValueError);
+ if (size == -1 && PyErr_Occurred()) {
+ PyErr_Clear();
+ }
+ else {
+ if (size < 0) {
+ PyErr_SetString(PyExc_ValueError, "negative count");
+ return NULL;
+ }
+ new = PyString_FromStringAndSize(NULL, size);
+ if (new == NULL) {
+ return NULL;
+ }
+ if (size > 0) {
+ memset(((PyStringObject*)new)->ob_sval, 0, size);
+ }
+ return new;
+ }
+
/* Use the modern buffer interface */
if (PyObject_CheckBuffer(x)) {
Py_buffer view;
@@ -3133,8 +2979,10 @@ string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return NULL;
}
- /* For the iterator version, create a string object and resize as needed. */
- /* XXX(gb): is 64 a good value? also, optimize this if length is known */
+ /* For iterator version, create a string object and resize as needed */
+ /* XXX(gb): is 64 a good value? also, optimize if length is known */
+ /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
+ input being a truly long iterator. */
size = 64;
new = PyString_FromStringAndSize(NULL, size);
if (new == NULL)
@@ -3158,9 +3006,9 @@ string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
item = iternext(it);
if (item == NULL) {
if (PyErr_Occurred()) {
- if (!PyErr_ExceptionMatches(PyExc_StopIteration))
- goto error;
- PyErr_Clear();
+ if (!PyErr_ExceptionMatches(PyExc_StopIteration))
+ goto error;
+ PyErr_Clear();
}
break;
}
@@ -3193,7 +3041,7 @@ string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return new;
error:
- /* Error handling when it != NULL */
+ /* Error handling when new != NULL */
Py_XDECREF(it);
Py_DECREF(new);
return NULL;
@@ -3213,43 +3061,32 @@ str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
n = PyString_GET_SIZE(tmp);
pnew = type->tp_alloc(type, n);
if (pnew != NULL) {
- Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
+ Py_MEMCPY(PyString_AS_STRING(pnew),
+ PyString_AS_STRING(tmp), n+1);
((PyStringObject *)pnew)->ob_shash =
((PyStringObject *)tmp)->ob_shash;
- ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
}
Py_DECREF(tmp);
return pnew;
}
-static PyObject *
-string_mod(PyObject *v, PyObject *w)
-{
- if (!PyString_Check(v)) {
- Py_INCREF(Py_NotImplemented);
- return Py_NotImplemented;
- }
- return PyString_Format(v, w);
-}
-
-static PyNumberMethods string_as_number = {
- 0, /*nb_add*/
- 0, /*nb_subtract*/
- 0, /*nb_multiply*/
- string_mod, /*nb_remainder*/
-};
-
PyDoc_STRVAR(string_doc,
-"str(object) -> string\n\
+"bytes(iterable_of_ints) -> bytes.\n\
+bytes(string, encoding[, errors]) -> bytes\n\
+bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer.\n\
+bytes(memory_view) -> bytes.\n\
\n\
-Return a nice string representation of the object.\n\
-If the argument is a string, the return value is the same object.");
+Construct an immutable array of bytes from:\n\
+ - an iterable yielding integers in range(256)\n\
+ - a text string encoded using the specified encoding\n\
+ - a bytes or a buffer object\n\
+ - any object implementing the buffer API.");
static PyObject *str_iter(PyObject *seq);
PyTypeObject PyString_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
- "str8",
+ "bytes",
sizeof(PyStringObject),
sizeof(char),
string_dealloc, /* tp_dealloc */
@@ -3257,8 +3094,8 @@ PyTypeObject PyString_Type = {
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
- string_repr, /* tp_repr */
- &string_as_number, /* tp_as_number */
+ (reprfunc)string_repr, /* tp_repr */
+ 0, /* tp_as_number */
&string_as_sequence, /* tp_as_sequence */
&string_as_mapping, /* tp_as_mapping */
(hashfunc)string_hash, /* tp_hash */
@@ -3294,14 +3131,15 @@ void
PyString_Concat(register PyObject **pv, register PyObject *w)
{
register PyObject *v;
+ assert(pv != NULL);
if (*pv == NULL)
return;
- if (w == NULL || !PyString_Check(*pv)) {
+ if (w == NULL) {
Py_DECREF(*pv);
*pv = NULL;
return;
}
- v = string_concat((PyStringObject *) *pv, w);
+ v = string_concat(*pv, w);
Py_DECREF(*pv);
*pv = v;
}
@@ -3334,8 +3172,7 @@ _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
register PyObject *v;
register PyStringObject *sv;
v = *pv;
- if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0 ||
- PyString_CHECK_INTERNED(v)) {
+ if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0) {
*pv = 0;
Py_DECREF(v);
PyErr_BadInternalCall();
@@ -3359,85 +3196,6 @@ _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
return 0;
}
-/* Helpers for formatstring */
-
-Py_LOCAL_INLINE(PyObject *)
-getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
-{
- Py_ssize_t argidx = *p_argidx;
- if (argidx < arglen) {
- (*p_argidx)++;
- if (arglen < 0)
- return args;
- else
- return PyTuple_GetItem(args, argidx);
- }
- PyErr_SetString(PyExc_TypeError,
- "not enough arguments for format string");
- return NULL;
-}
-
-/* Format codes
- * F_LJUST '-'
- * F_SIGN '+'
- * F_BLANK ' '
- * F_ALT '#'
- * F_ZERO '0'
- */
-#define F_LJUST (1<<0)
-#define F_SIGN (1<<1)
-#define F_BLANK (1<<2)
-#define F_ALT (1<<3)
-#define F_ZERO (1<<4)
-
-Py_LOCAL_INLINE(int)
-formatfloat(char *buf, size_t buflen, int flags,
- int prec, int type, PyObject *v)
-{
- /* fmt = '%#.' + `prec` + `type`
- worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
- char fmt[20];
- double x;
- x = PyFloat_AsDouble(v);
- if (x == -1.0 && PyErr_Occurred()) {
- PyErr_Format(PyExc_TypeError, "float argument required, "
- "not %.200s", Py_Type(v)->tp_name);
- return -1;
- }
- if (prec < 0)
- prec = 6;
- if (type == 'f' && fabs(x)/1e25 >= 1e25)
- type = 'g';
- /* Worst case length calc to ensure no buffer overrun:
-
- 'g' formats:
- fmt = %#.<prec>g
- buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
- for any double rep.)
- len = 1 + prec + 1 + 2 + 5 = 9 + prec
-
- 'f' formats:
- buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
- len = 1 + 50 + 1 + prec = 52 + prec
-
- If prec=0 the effective precision is 1 (the leading digit is
- always given), therefore increase the length by one.
-
- */
- if (((type == 'g' || type == 'G') &&
- buflen <= (size_t)10 + (size_t)prec) ||
- (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
- PyErr_SetString(PyExc_OverflowError,
- "formatted float is too long (precision too large?)");
- return -1;
- }
- PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
- (flags&F_ALT) ? "#" : "",
- prec, type);
- PyOS_ascii_formatd(buf, buflen, fmt, x);
- return (int)strlen(buf);
-}
-
/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
* the F_ALT flag, for Python's long (unbounded) ints. It's not used for
* Python's regular ints.
@@ -3516,7 +3274,8 @@ _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
}
llen = PyString_Size(result);
if (llen > INT_MAX) {
- PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
+ PyErr_SetString(PyExc_ValueError,
+ "string too large in _PyString_FormatLong");
return NULL;
}
len = (int)llen;
@@ -3534,7 +3293,7 @@ _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
(type == 'o' || type == 'x' || type == 'X'))) {
assert(buf[sign] == '0');
assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
- buf[sign+1] == 'o');
+ buf[sign+1] == 'o');
numnondigits -= 2;
buf += 2;
len -= 2;
@@ -3580,623 +3339,6 @@ _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
return result;
}
-Py_LOCAL_INLINE(int)
-formatint(char *buf, size_t buflen, int flags,
- int prec, int type, PyObject *v)
-{
- /* fmt = '%#.' + `prec` + 'l' + `type`
- worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
- + 1 + 1 = 24 */
- char fmt[64]; /* plenty big enough! */
- char *sign;
- long x;
-
- x = PyInt_AsLong(v);
- if (x == -1 && PyErr_Occurred()) {
- PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
- Py_Type(v)->tp_name);
- return -1;
- }
- if (x < 0 && type == 'u') {
- type = 'd';
- }
- if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
- sign = "-";
- else
- sign = "";
- if (prec < 0)
- prec = 1;
-
- if ((flags & F_ALT) &&
- (type == 'x' || type == 'X' || type == 'o')) {
- /* When converting under %#o, %#x or %#X, there are a number
- * of issues that cause pain:
- * - for %#o, we want a different base marker than C
- * - when 0 is being converted, the C standard leaves off
- * the '0x' or '0X', which is inconsistent with other
- * %#x/%#X conversions and inconsistent with Python's
- * hex() function
- * - there are platforms that violate the standard and
- * convert 0 with the '0x' or '0X'
- * (Metrowerks, Compaq Tru64)
- * - there are platforms that give '0x' when converting
- * under %#X, but convert 0 in accordance with the
- * standard (OS/2 EMX)
- *
- * We can achieve the desired consistency by inserting our
- * own '0x' or '0X' prefix, and substituting %x/%X in place
- * of %#x/%#X.
- *
- * Note that this is the same approach as used in
- * formatint() in unicodeobject.c
- */
- PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
- sign, type, prec, type);
- }
- else {
- PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
- sign, (flags&F_ALT) ? "#" : "",
- prec, type);
- }
-
- /* buf = '+'/'-'/'' + '0o'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
- * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
- */
- if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
- PyErr_SetString(PyExc_OverflowError,
- "formatted integer is too long (precision too large?)");
- return -1;
- }
- if (sign[0])
- PyOS_snprintf(buf, buflen, fmt, -x);
- else
- PyOS_snprintf(buf, buflen, fmt, x);
- return (int)strlen(buf);
-}
-
-Py_LOCAL_INLINE(int)
-formatchar(char *buf, size_t buflen, PyObject *v)
-{
- /* presume that the buffer is at least 2 characters long */
- if (PyString_Check(v)) {
- if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
- return -1;
- }
- else {
- if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
- return -1;
- }
- buf[1] = '\0';
- return 1;
-}
-
-/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
-
- FORMATBUFLEN is the length of the buffer in which the floats, ints, &
- chars are formatted. XXX This is a magic number. Each formatting
- routine does bounds checking to ensure no overflow, but a better
- solution may be to malloc a buffer of appropriate size for each
- format. For now, the current solution is sufficient.
-*/
-#define FORMATBUFLEN (size_t)120
-
-PyObject *
-PyString_Format(PyObject *format, PyObject *args)
-{
- char *fmt, *res;
- Py_ssize_t arglen, argidx;
- Py_ssize_t reslen, rescnt, fmtcnt;
- int args_owned = 0;
- PyObject *result, *orig_args;
- PyObject *v, *w;
- PyObject *dict = NULL;
- if (format == NULL || !PyString_Check(format) || args == NULL) {
- PyErr_BadInternalCall();
- return NULL;
- }
- orig_args = args;
- fmt = PyString_AS_STRING(format);
- fmtcnt = PyString_GET_SIZE(format);
- reslen = rescnt = fmtcnt + 100;
- result = PyString_FromStringAndSize((char *)NULL, reslen);
- if (result == NULL)
- return NULL;
- res = PyString_AsString(result);
- if (PyTuple_Check(args)) {
- arglen = PyTuple_GET_SIZE(args);
- argidx = 0;
- }
- else {
- arglen = -1;
- argidx = -2;
- }
- if (Py_Type(args)->tp_as_mapping && !PyTuple_Check(args) &&
- !PyString_Check(args) && !PyUnicode_Check(args))
- dict = args;
- while (--fmtcnt >= 0) {
- if (*fmt != '%') {
- if (--rescnt < 0) {
- rescnt = fmtcnt + 100;
- reslen += rescnt;
- if (_PyString_Resize(&result, reslen) < 0)
- return NULL;
- res = PyString_AS_STRING(result)
- + reslen - rescnt;
- --rescnt;
- }
- *res++ = *fmt++;
- }
- else {
- /* Got a format specifier */
- int flags = 0;
- Py_ssize_t width = -1;
- int prec = -1;
- int c = '\0';
- int fill;
- PyObject *v = NULL;
- PyObject *temp = NULL;
- char *pbuf;
- int sign;
- Py_ssize_t len;
- char formatbuf[FORMATBUFLEN];
- /* For format{float,int,char}() */
- char *fmt_start = fmt;
- Py_ssize_t argidx_start = argidx;
-
- fmt++;
- if (*fmt == '(') {
- char *keystart;
- Py_ssize_t keylen;
- PyObject *key;
- int pcount = 1;
-
- if (dict == NULL) {
- PyErr_SetString(PyExc_TypeError,
- "format requires a mapping");
- goto error;
- }
- ++fmt;
- --fmtcnt;
- keystart = fmt;
- /* Skip over balanced parentheses */
- while (pcount > 0 && --fmtcnt >= 0) {
- if (*fmt == ')')
- --pcount;
- else if (*fmt == '(')
- ++pcount;
- fmt++;
- }
- keylen = fmt - keystart - 1;
- if (fmtcnt < 0 || pcount > 0) {
- PyErr_SetString(PyExc_ValueError,
- "incomplete format key");
- goto error;
- }
- key = PyString_FromStringAndSize(keystart,
- keylen);
- if (key == NULL)
- goto error;
- if (args_owned) {
- Py_DECREF(args);
- args_owned = 0;
- }
- args = PyObject_GetItem(dict, key);
- Py_DECREF(key);
- if (args == NULL) {
- goto error;
- }
- args_owned = 1;
- arglen = -1;
- argidx = -2;
- }
- while (--fmtcnt >= 0) {
- switch (c = *fmt++) {
- case '-': flags |= F_LJUST; continue;
- case '+': flags |= F_SIGN; continue;
- case ' ': flags |= F_BLANK; continue;
- case '#': flags |= F_ALT; continue;
- case '0': flags |= F_ZERO; continue;
- }
- break;
- }
- if (c == '*') {
- v = getnextarg(args, arglen, &argidx);
- if (v == NULL)
- goto error;
- if (!PyInt_Check(v)) {
- PyErr_SetString(PyExc_TypeError,
- "* wants int");
- goto error;
- }
- width = PyInt_AsLong(v);
- if (width == -1 && PyErr_Occurred())
- goto error;
- if (width < 0) {
- flags |= F_LJUST;
- width = -width;
- }
- if (--fmtcnt >= 0)
- c = *fmt++;
- }
- else if (c >= 0 && ISDIGIT(c)) {
- width = c - '0';
- while (--fmtcnt >= 0) {
- c = Py_CHARMASK(*fmt++);
- if (!ISDIGIT(c))
- break;
- if ((width*10) / 10 != width) {
- PyErr_SetString(
- PyExc_ValueError,
- "width too big");
- goto error;
- }
- width = width*10 + (c - '0');
- }
- }
- if (c == '.') {
- prec = 0;
- if (--fmtcnt >= 0)
- c = *fmt++;
- if (c == '*') {
- v = getnextarg(args, arglen, &argidx);
- if (v == NULL)
- goto error;
- if (!PyInt_Check(v)) {
- PyErr_SetString(
- PyExc_TypeError,
- "* wants int");
- goto error;
- }
- prec = PyInt_AsLong(v);
- if (prec == -1 && PyErr_Occurred())
- goto error;
- if (prec < 0)
- prec = 0;
- if (--fmtcnt >= 0)
- c = *fmt++;
- }
- else if (c >= 0 && ISDIGIT(c)) {
- prec = c - '0';
- while (--fmtcnt >= 0) {
- c = Py_CHARMASK(*fmt++);
- if (!ISDIGIT(c))
- break;
- if ((prec*10) / 10 != prec) {
- PyErr_SetString(
- PyExc_ValueError,
- "prec too big");
- goto error;
- }
- prec = prec*10 + (c - '0');
- }
- }
- } /* prec */
- if (fmtcnt >= 0) {
- if (c == 'h' || c == 'l' || c == 'L') {
- if (--fmtcnt >= 0)
- c = *fmt++;
- }
- }
- if (fmtcnt < 0) {
- PyErr_SetString(PyExc_ValueError,
- "incomplete format");
- goto error;
- }
- if (c != '%') {
- v = getnextarg(args, arglen, &argidx);
- if (v == NULL)
- goto error;
- }
- sign = 0;
- fill = ' ';
- switch (c) {
- case '%':
- pbuf = "%";
- len = 1;
- break;
- case 's':
- if (PyUnicode_Check(v)) {
- fmt = fmt_start;
- argidx = argidx_start;
- goto unicode;
- }
- temp = _PyObject_Str(v);
- if (temp != NULL && PyUnicode_Check(temp)) {
- Py_DECREF(temp);
- fmt = fmt_start;
- argidx = argidx_start;
- goto unicode;
- }
- /* Fall through */
- case 'r':
- if (c == 'r')
- temp = PyObject_ReprStr8(v);
- if (temp == NULL)
- goto error;
- if (!PyString_Check(temp)) {
- PyErr_SetString(PyExc_TypeError,
- "%s argument has non-string str()/repr()");
- Py_DECREF(temp);
- goto error;
- }
- pbuf = PyString_AS_STRING(temp);
- len = PyString_GET_SIZE(temp);
- if (prec >= 0 && len > prec)
- len = prec;
- break;
- case 'i':
- case 'd':
- case 'u':
- case 'o':
- case 'x':
- case 'X':
- if (c == 'i')
- c = 'd';
- if (PyLong_Check(v)) {
- int ilen;
- temp = _PyString_FormatLong(v, flags,
- prec, c, &pbuf, &ilen);
- len = ilen;
- if (!temp)
- goto error;
- sign = 1;
- }
- else {
- pbuf = formatbuf;
- len = formatint(pbuf,
- sizeof(formatbuf),
- flags, prec, c, v);
- if (len < 0)
- goto error;
- sign = 1;
- }
- if (flags & F_ZERO)
- fill = '0';
- break;
- case 'e':
- case 'E':
- case 'f':
- case 'F':
- case 'g':
- case 'G':
- if (c == 'F')
- c = 'f';
- pbuf = formatbuf;
- len = formatfloat(pbuf, sizeof(formatbuf),
- flags, prec, c, v);
- if (len < 0)
- goto error;
- sign = 1;
- if (flags & F_ZERO)
- fill = '0';
- break;
- case 'c':
- if (PyUnicode_Check(v)) {
- fmt = fmt_start;
- argidx = argidx_start;
- goto unicode;
- }
- pbuf = formatbuf;
- len = formatchar(pbuf, sizeof(formatbuf), v);
- if (len < 0)
- goto error;
- break;
- default:
- PyErr_Format(PyExc_ValueError,
- "unsupported format character '%c' (0x%x) "
- "at index %zd",
- c, c,
- (Py_ssize_t)(fmt - 1 -
- PyString_AsString(format)));
- goto error;
- }
- if (sign) {
- if (*pbuf == '-' || *pbuf == '+') {
- sign = *pbuf++;
- len--;
- }
- else if (flags & F_SIGN)
- sign = '+';
- else if (flags & F_BLANK)
- sign = ' ';
- else
- sign = 0;
- }
- if (width < len)
- width = len;
- if (rescnt - (sign != 0) < width) {
- reslen -= rescnt;
- rescnt = width + fmtcnt + 100;
- reslen += rescnt;
- if (reslen < 0) {
- Py_DECREF(result);
- Py_XDECREF(temp);
- return PyErr_NoMemory();
- }
- if (_PyString_Resize(&result, reslen) < 0) {
- Py_XDECREF(temp);
- return NULL;
- }
- res = PyString_AS_STRING(result)
- + reslen - rescnt;
- }
- if (sign) {
- if (fill != ' ')
- *res++ = sign;
- rescnt--;
- if (width > len)
- width--;
- }
- if ((flags & F_ALT) &&
- (c == 'x' || c == 'X' || c == 'o')) {
- assert(pbuf[0] == '0');
- assert(pbuf[1] == c);
- if (fill != ' ') {
- *res++ = *pbuf++;
- *res++ = *pbuf++;
- }
- rescnt -= 2;
- width -= 2;
- if (width < 0)
- width = 0;
- len -= 2;
- }
- if (width > len && !(flags & F_LJUST)) {
- do {
- --rescnt;
- *res++ = fill;
- } while (--width > len);
- }
- if (fill == ' ') {
- if (sign)
- *res++ = sign;
- if ((flags & F_ALT) &&
- (c == 'x' || c == 'X' || c == 'o')) {
- assert(pbuf[0] == '0');
- assert(pbuf[1] == c);
- *res++ = *pbuf++;
- *res++ = *pbuf++;
- }
- }
- Py_MEMCPY(res, pbuf, len);
- res += len;
- rescnt -= len;
- while (--width >= len) {
- --rescnt;
- *res++ = ' ';
- }
- if (dict && (argidx < arglen) && c != '%') {
- PyErr_SetString(PyExc_TypeError,
- "not all arguments converted during string formatting");
- Py_XDECREF(temp);
- goto error;
- }
- Py_XDECREF(temp);
- } /* '%' */
- } /* until end */
- if (argidx < arglen && !dict) {
- PyErr_SetString(PyExc_TypeError,
- "not all arguments converted during string formatting");
- goto error;
- }
- if (args_owned) {
- Py_DECREF(args);
- }
- _PyString_Resize(&result, reslen - rescnt);
- return result;
-
- unicode:
- if (args_owned) {
- Py_DECREF(args);
- args_owned = 0;
- }
- /* Fiddle args right (remove the first argidx arguments) */
- if (PyTuple_Check(orig_args) && argidx > 0) {
- PyObject *v;
- Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
- v = PyTuple_New(n);
- if (v == NULL)
- goto error;
- while (--n >= 0) {
- PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
- Py_INCREF(w);
- PyTuple_SET_ITEM(v, n, w);
- }
- args = v;
- } else {
- Py_INCREF(orig_args);
- args = orig_args;
- }
- args_owned = 1;
- /* Take what we have of the result and let the Unicode formatting
- function format the rest of the input. */
- rescnt = res - PyString_AS_STRING(result);
- if (_PyString_Resize(&result, rescnt))
- goto error;
- fmtcnt = PyString_GET_SIZE(format) - \
- (fmt - PyString_AS_STRING(format));
- format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
- if (format == NULL)
- goto error;
- v = PyUnicode_Format(format, args);
- Py_DECREF(format);
- if (v == NULL)
- goto error;
- /* Paste what we have (result) to what the Unicode formatting
- function returned (v) and return the result (or error) */
- w = PyUnicode_Concat(result, v);
- Py_DECREF(result);
- Py_DECREF(v);
- Py_DECREF(args);
- return w;
-
- error:
- Py_DECREF(result);
- if (args_owned) {
- Py_DECREF(args);
- }
- return NULL;
-}
-
-void
-PyString_InternInPlace(PyObject **p)
-{
- register PyStringObject *s = (PyStringObject *)(*p);
- PyObject *t;
- if (s == NULL || !PyString_Check(s))
- Py_FatalError("PyString_InternInPlace: strings only please!");
- /* If it's a string subclass, we don't really know what putting
- it in the interned dict might do. */
- if (!PyString_CheckExact(s))
- return;
- if (PyString_CHECK_INTERNED(s))
- return;
- if (interned == NULL) {
- interned = PyDict_New();
- if (interned == NULL) {
- PyErr_Clear(); /* Don't leave an exception */
- return;
- }
- }
- t = PyDict_GetItem(interned, (PyObject *)s);
- if (t) {
- Py_INCREF(t);
- Py_DECREF(*p);
- *p = t;
- return;
- }
-
- if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
- PyErr_Clear();
- return;
- }
- /* The two references in interned are not counted by refcnt.
- The string deallocator will take care of this */
- Py_Refcnt(s) -= 2;
- PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
-}
-
-void
-PyString_InternImmortal(PyObject **p)
-{
- PyString_InternInPlace(p);
- if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
- PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
- Py_INCREF(*p);
- }
-}
-
-
-PyObject *
-PyString_InternFromString(const char *cp)
-{
- PyObject *s = PyString_FromString(cp);
- if (s == NULL)
- return NULL;
- PyString_InternInPlace(&s);
- return s;
-}
-
void
PyString_Fini(void)
{
@@ -4209,58 +3351,6 @@ PyString_Fini(void)
nullstring = NULL;
}
-void _Py_ReleaseInternedStrings(void)
-{
- PyObject *keys;
- PyStringObject *s;
- Py_ssize_t i, n;
- Py_ssize_t immortal_size = 0, mortal_size = 0;
-
- if (interned == NULL || !PyDict_Check(interned))
- return;
- keys = PyDict_Keys(interned);
- if (keys == NULL || !PyList_Check(keys)) {
- PyErr_Clear();
- return;
- }
-
- /* Since _Py_ReleaseInternedStrings() is intended to help a leak
- detector, interned strings are not forcibly deallocated; rather, we
- give them their stolen references back, and then clear and DECREF
- the interned dict. */
-
- n = PyList_GET_SIZE(keys);
- fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
- n);
- for (i = 0; i < n; i++) {
- s = (PyStringObject *) PyList_GET_ITEM(keys, i);
- switch (s->ob_sstate) {
- case SSTATE_NOT_INTERNED:
- /* XXX Shouldn't happen */
- break;
- case SSTATE_INTERNED_IMMORTAL:
- Py_Refcnt(s) += 1;
- immortal_size += Py_Size(s);
- break;
- case SSTATE_INTERNED_MORTAL:
- Py_Refcnt(s) += 2;
- mortal_size += Py_Size(s);
- break;
- default:
- Py_FatalError("Inconsistent interned string state.");
- }
- s->ob_sstate = SSTATE_NOT_INTERNED;
- }
- fprintf(stderr, "total size of all interned strings: "
- "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
- "mortal/immortal\n", mortal_size, immortal_size);
- Py_DECREF(keys);
- PyDict_Clear(interned);
- Py_DECREF(interned);
- interned = NULL;
-}
-
-
/*********************** Str Iterator ****************************/
typedef struct {
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index 44cf5f1..4266a7c 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -1015,7 +1015,7 @@ class_name(PyObject *cls)
if (name == NULL) {
PyErr_Clear();
Py_XDECREF(name);
- name = PyObject_ReprStr8(cls);
+ name = PyObject_Repr(cls);
}
if (name == NULL)
return NULL;
@@ -1654,7 +1654,7 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds)
}
/* Check arguments: (name, bases, dict) */
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "SO!O!:type", kwlist,
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "UO!O!:type", kwlist,
&name,
&PyTuple_Type, &bases,
&PyDict_Type, &dict))
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index c568a8e..ae34c9e 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -101,7 +101,7 @@ extern "C" {
function will delete the reference from this dictionary.
Another way to look at this is that to say that the actual reference
- count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
+ count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
*/
static PyObject *interned;
@@ -998,7 +998,10 @@ PyObject *PyUnicode_FromObject(register PyObject *obj)
return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(obj),
PyUnicode_GET_SIZE(obj));
}
- return PyUnicode_FromEncodedObject(obj, NULL, "strict");
+ PyErr_Format(PyExc_TypeError,
+ "Can't convert '%.100s' object to str implicitly",
+ Py_Type(obj)->tp_name);
+ return NULL;
}
PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
@@ -1219,22 +1222,7 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
v = PyCodec_Encode(unicode, encoding, errors);
if (v == NULL)
goto onError;
- if (!PyBytes_Check(v)) {
- if (PyString_Check(v)) {
- /* Old codec, turn it into bytes */
- PyObject *b = PyBytes_FromObject(v);
- Py_DECREF(v);
- return b;
- }
- PyErr_Format(PyExc_TypeError,
- "encoder did not return a bytes object "
- "(type=%.400s, encoding=%.20s, errors=%.20s)",
- v->ob_type->tp_name,
- encoding ? encoding : "NULL",
- errors ? errors : "NULL");
- Py_DECREF(v);
- goto onError;
- }
+ assert(PyString_Check(v));
return v;
onError:
@@ -1245,19 +1233,15 @@ PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
const char *errors)
{
PyObject *v = ((PyUnicodeObject *)unicode)->defenc;
- PyObject *b;
if (v)
return v;
if (errors != NULL)
Py_FatalError("non-NULL encoding in _PyUnicode_AsDefaultEncodedString");
- b = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
+ v = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode),
NULL);
- if (!b)
+ if (!v)
return NULL;
- v = PyString_FromStringAndSize(PyBytes_AsString(b),
- PyBytes_Size(b));
- Py_DECREF(b);
((PyUnicodeObject *)unicode)->defenc = v;
return v;
}
@@ -1420,11 +1404,11 @@ int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler
inputobj = PyUnicodeDecodeError_GetObject(*exceptionObject);
if (!inputobj)
goto onError;
- if (!PyBytes_Check(inputobj)) {
+ if (!PyString_Check(inputobj)) {
PyErr_Format(PyExc_TypeError, "exception attribute object must be bytes");
}
- *input = PyBytes_AS_STRING(inputobj);
- insize = PyBytes_GET_SIZE(inputobj);
+ *input = PyString_AS_STRING(inputobj);
+ insize = PyString_GET_SIZE(inputobj);
*inend = *input + insize;
/* we can DECREF safely, as the exception has another reference,
so the object won't go away. */
@@ -1674,7 +1658,7 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s,
int encodeWhiteSpace,
const char *errors)
{
- PyObject *v;
+ PyObject *v, *result;
/* It might be possible to tighten this worst case */
Py_ssize_t cbAllocated = 5 * size;
int inShift = 0;
@@ -1685,7 +1669,7 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s,
char * start;
if (size == 0)
- return PyBytes_FromStringAndSize(NULL, 0);
+ return PyString_FromStringAndSize(NULL, 0);
v = PyBytes_FromStringAndSize(NULL, cbAllocated);
if (v == NULL)
@@ -1757,11 +1741,9 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s,
*out++ = '-';
}
- if (PyBytes_Resize(v, out - start)) {
- Py_DECREF(v);
- return NULL;
- }
- return v;
+ result = PyString_FromStringAndSize(PyBytes_AS_STRING(v), out - start);
+ Py_DECREF(v);
+ return result;
}
#undef SPECIAL
@@ -2001,11 +1983,11 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s,
{
#define MAX_SHORT_UNICHARS 300 /* largest size we'll do on the stack */
- Py_ssize_t i; /* index into s of next input byte */
- PyObject *v; /* result string object */
- char *p; /* next free byte in output buffer */
- Py_ssize_t nallocated; /* number of result bytes allocated */
- Py_ssize_t nneeded; /* number of result bytes needed */
+ Py_ssize_t i; /* index into s of next input byte */
+ PyObject *result; /* result string object */
+ char *p; /* next free byte in output buffer */
+ Py_ssize_t nallocated; /* number of result bytes allocated */
+ Py_ssize_t nneeded; /* number of result bytes needed */
char stackbuf[MAX_SHORT_UNICHARS * 4];
assert(s != NULL);
@@ -2017,7 +1999,7 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s,
* turns out we need.
*/
nallocated = Py_SAFE_DOWNCAST(sizeof(stackbuf), size_t, int);
- v = NULL; /* will allocate after we're done */
+ result = NULL; /* will allocate after we're done */
p = stackbuf;
}
else {
@@ -2025,10 +2007,10 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s,
nallocated = size * 4;
if (nallocated / 4 != size) /* overflow! */
return PyErr_NoMemory();
- v = PyBytes_FromStringAndSize(NULL, nallocated);
- if (v == NULL)
+ result = PyString_FromStringAndSize(NULL, nallocated);
+ if (result == NULL)
return NULL;
- p = PyBytes_AS_STRING(v);
+ p = PyString_AS_STRING(result);
}
for (i = 0; i < size;) {
@@ -2072,19 +2054,19 @@ encodeUCS4:
}
}
- if (v == NULL) {
+ if (result == NULL) {
/* This was stack allocated. */
nneeded = p - stackbuf;
assert(nneeded <= nallocated);
- v = PyBytes_FromStringAndSize(stackbuf, nneeded);
+ result = PyString_FromStringAndSize(stackbuf, nneeded);
}
else {
/* Cut back to size actually needed. */
- nneeded = p - PyBytes_AS_STRING(v);
+ nneeded = p - PyString_AS_STRING(result);
assert(nneeded <= nallocated);
- PyBytes_Resize(v, nneeded);
+ _PyString_Resize(&result, nneeded);
}
- return v;
+ return result;
#undef MAX_SHORT_UNICHARS
}
@@ -2279,7 +2261,7 @@ PyUnicode_EncodeUTF32(const Py_UNICODE *s,
const char *errors,
int byteorder)
{
- PyObject *v;
+ PyObject *v, *result;
unsigned char *p;
#ifndef Py_UNICODE_WIDE
int i, pairs;
@@ -2319,7 +2301,7 @@ PyUnicode_EncodeUTF32(const Py_UNICODE *s,
if (byteorder == 0)
STORECHAR(0xFEFF);
if (size == 0)
- return v;
+ goto done;
if (byteorder == -1) {
/* force LE */
@@ -2350,7 +2332,11 @@ PyUnicode_EncodeUTF32(const Py_UNICODE *s,
#endif
STORECHAR(ch);
}
- return v;
+
+ done:
+ result = PyString_FromStringAndSize(PyBytes_AS_STRING(v), Py_Size(v));
+ Py_DECREF(v);
+ return result;
#undef STORECHAR
}
@@ -2549,7 +2535,7 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s,
const char *errors,
int byteorder)
{
- PyObject *v;
+ PyObject *v, *result;
unsigned char *p;
#ifdef Py_UNICODE_WIDE
int i, pairs;
@@ -2584,7 +2570,7 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s,
if (byteorder == 0)
STORECHAR(0xFEFF);
if (size == 0)
- return v;
+ goto done;
if (byteorder == -1) {
/* force LE */
@@ -2610,7 +2596,11 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s,
if (ch2)
STORECHAR(ch2);
}
- return v;
+
+ done:
+ result = PyString_FromStringAndSize(PyBytes_AS_STRING(v), Py_Size(v));
+ Py_DECREF(v);
+ return result;
#undef STORECHAR
}
@@ -2900,7 +2890,7 @@ static const char *hexdigits = "0123456789abcdef";
PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
Py_ssize_t size)
{
- PyObject *repr;
+ PyObject *repr, *result;
char *p;
/* XXX(nnorwitz): rather than over-allocating, it would be
@@ -3023,12 +3013,10 @@ PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
*p++ = (char) ch;
}
- *p = '\0';
- if (PyBytes_Resize(repr, p - PyBytes_AS_STRING(repr))) {
- Py_DECREF(repr);
- return NULL;
- }
- return repr;
+ result = PyString_FromStringAndSize(PyBytes_AS_STRING(repr),
+ p - PyBytes_AS_STRING(repr));
+ Py_DECREF(repr);
+ return result;
}
PyObject *PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
@@ -3159,7 +3147,7 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
Py_ssize_t size)
{
- PyObject *repr;
+ PyObject *repr, *result;
char *p;
char *q;
@@ -3171,7 +3159,7 @@ PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
if (repr == NULL)
return NULL;
if (size == 0)
- return repr;
+ goto done;
p = q = PyBytes_AS_STRING(repr);
while (size-- > 0) {
@@ -3205,12 +3193,12 @@ PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
else
*p++ = (char) ch;
}
- *p = '\0';
- if (PyBytes_Resize(repr, p - q)) {
- Py_DECREF(repr);
- return NULL;
- }
- return repr;
+ size = p - q;
+
+ done:
+ result = PyString_FromStringAndSize(PyBytes_AS_STRING(repr), size);
+ Py_DECREF(repr);
+ return result;
}
PyObject *PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
@@ -3445,23 +3433,23 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
/* pointer into the output */
char *str;
/* current output position */
- Py_ssize_t respos = 0;
Py_ssize_t ressize;
const char *encoding = (limit == 256) ? "latin-1" : "ascii";
const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)";
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
+ PyObject *result = NULL;
/* the following variable is used for caching string comparisons
* -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
int known_errorHandler = -1;
/* allocate enough for a simple encoding without
replacements, if we need more, we'll resize */
+ if (size == 0)
+ return PyString_FromStringAndSize(NULL, 0);
res = PyBytes_FromStringAndSize(NULL, size);
if (res == NULL)
- goto onError;
- if (size == 0)
- return res;
+ return NULL;
str = PyBytes_AS_STRING(res);
ressize = size;
@@ -3589,20 +3577,13 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
}
}
}
- /* Resize if we allocated to much */
- respos = str - PyBytes_AS_STRING(res);
- if (respos<ressize)
- /* If this falls res will be NULL */
- PyBytes_Resize(res, respos);
- Py_XDECREF(errorHandler);
- Py_XDECREF(exc);
- return res;
-
- onError:
- Py_XDECREF(res);
+ result = PyString_FromStringAndSize(PyBytes_AS_STRING(res),
+ str - PyBytes_AS_STRING(res));
+ onError:
+ Py_DECREF(res);
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
- return NULL;
+ return result;
}
PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p,
@@ -3848,20 +3829,20 @@ static int encode_mbcs(PyObject **repr,
if (*repr == NULL) {
/* Create string object */
- *repr = PyBytes_FromStringAndSize(NULL, mbcssize);
+ *repr = PyString_FromStringAndSize(NULL, mbcssize);
if (*repr == NULL)
return -1;
}
else {
/* Extend string object */
- n = PyBytes_Size(*repr);
- if (PyBytes_Resize(*repr, n + mbcssize) < 0)
+ n = PyString_Size(*repr);
+ if (_PyString_Resize(repr, n + mbcssize) < 0)
return -1;
}
/* Do the conversion */
if (size > 0) {
- char *s = PyBytes_AS_STRING(*repr) + n;
+ char *s = PyString_AS_STRING(*repr) + n;
if (0 == WideCharToMultiByte(CP_ACP, 0, p, size, s, mbcssize, NULL, NULL)) {
PyErr_SetFromWindowsErrWithFilename(0, NULL);
return -1;
@@ -4341,16 +4322,14 @@ static PyObject *charmapencode_lookup(Py_UNICODE c, PyObject *mapping)
}
static int
-charmapencode_resize(PyObject *outobj, Py_ssize_t *outpos, Py_ssize_t requiredsize)
+charmapencode_resize(PyObject **outobj, Py_ssize_t *outpos, Py_ssize_t requiredsize)
{
- Py_ssize_t outsize = PyBytes_GET_SIZE( outobj);
+ Py_ssize_t outsize = PyString_GET_SIZE(*outobj);
/* exponentially overallocate to minimize reallocations */
if (requiredsize < 2*outsize)
requiredsize = 2*outsize;
- if (PyBytes_Resize(outobj, requiredsize)) {
- Py_DECREF(outobj);
+ if (_PyString_Resize(outobj, requiredsize))
return -1;
- }
return 0;
}
@@ -4365,21 +4344,21 @@ typedef enum charmapencode_result {
reallocation error occurred. The caller must decref the result */
static
charmapencode_result charmapencode_output(Py_UNICODE c, PyObject *mapping,
- PyObject *outobj, Py_ssize_t *outpos)
+ PyObject **outobj, Py_ssize_t *outpos)
{
PyObject *rep;
char *outstart;
- Py_ssize_t outsize = PyBytes_GET_SIZE(outobj);
+ Py_ssize_t outsize = PyString_GET_SIZE(*outobj);
if (Py_Type(mapping) == &EncodingMapType) {
int res = encoding_map_lookup(c, mapping);
Py_ssize_t requiredsize = *outpos+1;
if (res == -1)
return enc_FAILED;
- if (outsize<requiredsize)
+ if (outsize<requiredsize)
if (charmapencode_resize(outobj, outpos, requiredsize))
return enc_EXCEPTION;
- outstart = PyBytes_AS_STRING(outobj);
+ outstart = PyString_AS_STRING(*outobj);
outstart[(*outpos)++] = (char)res;
return enc_SUCCESS;
}
@@ -4398,7 +4377,7 @@ charmapencode_result charmapencode_output(Py_UNICODE c, PyObject *mapping,
Py_DECREF(rep);
return enc_EXCEPTION;
}
- outstart = PyBytes_AS_STRING(outobj);
+ outstart = PyString_AS_STRING(*outobj);
outstart[(*outpos)++] = (char)PyInt_AS_LONG(rep);
}
else {
@@ -4410,7 +4389,7 @@ charmapencode_result charmapencode_output(Py_UNICODE c, PyObject *mapping,
Py_DECREF(rep);
return enc_EXCEPTION;
}
- outstart = PyBytes_AS_STRING(outobj);
+ outstart = PyString_AS_STRING(*outobj);
memcpy(outstart + *outpos, repchars, repsize);
*outpos += repsize;
}
@@ -4426,7 +4405,7 @@ int charmap_encoding_error(
const Py_UNICODE *p, Py_ssize_t size, Py_ssize_t *inpos, PyObject *mapping,
PyObject **exceptionObject,
int *known_errorHandler, PyObject **errorHandler, const char *errors,
- PyObject *res, Py_ssize_t *respos)
+ PyObject **res, Py_ssize_t *respos)
{
PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
Py_ssize_t repsize;
@@ -4561,7 +4540,7 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p,
/* allocate enough for a simple encoding without
replacements, if we need more, we'll resize */
- res = PyBytes_FromStringAndSize(NULL, size);
+ res = PyString_FromStringAndSize(NULL, size);
if (res == NULL)
goto onError;
if (size == 0)
@@ -4569,14 +4548,14 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p,
while (inpos<size) {
/* try to encode it */
- charmapencode_result x = charmapencode_output(p[inpos], mapping, res, &respos);
+ charmapencode_result x = charmapencode_output(p[inpos], mapping, &res, &respos);
if (x==enc_EXCEPTION) /* error */
goto onError;
if (x==enc_FAILED) { /* unencodable character */
if (charmap_encoding_error(p, size, &inpos, mapping,
&exc,
&known_errorHandler, &errorHandler, errors,
- res, &respos)) {
+ &res, &respos)) {
goto onError;
}
}
@@ -4586,10 +4565,9 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p,
}
/* Resize if we allocated to much */
- if (respos<PyBytes_GET_SIZE(res)) {
- if (PyBytes_Resize(res, respos))
- goto onError;
- }
+ if (respos<PyString_GET_SIZE(res))
+ _PyString_Resize(&res, respos);
+
Py_XDECREF(exc);
Py_XDECREF(errorHandler);
return res;
@@ -5483,20 +5461,14 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
item = PySequence_Fast_GET_ITEM(fseq, i);
/* Convert item to Unicode. */
- if (!PyString_Check(item) && !PyUnicode_Check(item))
- {
- if (PyBytes_Check(item))
- {
- PyErr_Format(PyExc_TypeError,
- "sequence item %d: join() will not operate on "
- "bytes objects", i);
- goto onError;
- }
- item = PyObject_Unicode(item);
+ if (!PyUnicode_Check(item)) {
+ PyErr_Format(PyExc_TypeError,
+ "sequence item %zd: expected str instance,"
+ " %.80s found",
+ i, Py_Type(item)->tp_name);
+ goto onError;
}
- else
- item = PyUnicode_FromObject(item);
-
+ item = PyUnicode_FromObject(item);
if (item == NULL)
goto onError;
/* We own a reference to item from here on. */
@@ -6396,9 +6368,6 @@ PyObject *PyUnicode_Concat(PyObject *left,
{
PyUnicodeObject *u = NULL, *v = NULL, *w;
- if (PyBytes_Check(left) || PyBytes_Check(right))
- return PyBytes_Concat(left, right);
-
/* Coerce the two arguments */
u = (PyUnicodeObject *)PyUnicode_FromObject(left);
if (u == NULL)
@@ -6515,7 +6484,7 @@ unicode_encode(PyUnicodeObject *self, PyObject *args)
v = PyUnicode_AsEncodedObject((PyObject *)self, encoding, errors);
if (v == NULL)
goto onError;
- if (!PyBytes_Check(v)) {
+ if (!PyString_Check(v)) {
PyErr_Format(PyExc_TypeError,
"encoder did not return a bytes object "
"(type=%.400s)",
@@ -8232,12 +8201,6 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
return NULL;
}
-#define F_LJUST (1<<0)
-#define F_SIGN (1<<1)
-#define F_BLANK (1<<2)
-#define F_ALT (1<<3)
-#define F_ZERO (1<<4)
-
static Py_ssize_t
strtounicode(Py_UNICODE *buffer, const char *charbuffer)
{