summaryrefslogtreecommitdiffstats
path: root/Objects/stringobject.c
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2007-11-06 21:34:58 (GMT)
committerGuido van Rossum <guido@python.org>2007-11-06 21:34:58 (GMT)
commit98297ee7815939b124156e438b22bd652d67b5db (patch)
treea9d239ebd87c73af2571ab48003984c4e18e27e5 /Objects/stringobject.c
parenta19f80c6df2df5e8a5d0cff37131097835ef971e (diff)
downloadcpython-98297ee7815939b124156e438b22bd652d67b5db.zip
cpython-98297ee7815939b124156e438b22bd652d67b5db.tar.gz
cpython-98297ee7815939b124156e438b22bd652d67b5db.tar.bz2
Merging the py3k-pep3137 branch back into the py3k branch.
No detailed change log; just check out the change log for the py3k-pep3137 branch. The most obvious changes: - str8 renamed to bytes (PyString at the C level); - bytes renamed to buffer (PyBytes at the C level); - PyString and PyUnicode are no longer compatible. I.e. we now have an immutable bytes type and a mutable bytes type. The behavior of PyString was modified quite a bit, to make it more bytes-like. Some changes are still on the to-do list.
Diffstat (limited to 'Objects/stringobject.c')
-rw-r--r--Objects/stringobject.c1604
1 files changed, 347 insertions, 1257 deletions
diff --git a/Objects/stringobject.c b/Objects/stringobject.c
index 3dd1051..8761477 100644
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -1,11 +1,32 @@
/* String object implementation */
+/* XXX This is now called 'bytes' as far as the user is concerned.
+ Many docstrings and error messages need to be cleaned up. */
+
#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include "bytes_methods.h"
+static Py_ssize_t
+_getbuffer(PyObject *obj, Py_buffer *view)
+{
+ PyBufferProcs *buffer = Py_Type(obj)->tp_as_buffer;
+
+ if (buffer == NULL || buffer->bf_getbuffer == NULL)
+ {
+ PyErr_Format(PyExc_TypeError,
+ "Type %.100s doesn't support the buffer API",
+ Py_Type(obj)->tp_name);
+ return -1;
+ }
+
+ if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
+ return -1;
+ return view->len;
+}
+
#ifdef COUNT_ALLOCS
int null_strings, one_strings;
#endif
@@ -13,16 +34,6 @@ int null_strings, one_strings;
static PyStringObject *characters[UCHAR_MAX + 1];
static PyStringObject *nullstring;
-/* This dictionary holds all interned strings. Note that references to
- strings in this dictionary are *not* counted in the string's ob_refcnt.
- When the interned string reaches a refcnt of 0 the string deallocation
- function will delete the reference from this dictionary.
-
- Another way to look at this is that to say that the actual reference
- count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
-*/
-static PyObject *interned;
-
/*
For both PyString_FromString() and PyString_FromStringAndSize(), the
parameter `size' denotes number of characters to allocate, not counting any
@@ -77,21 +88,14 @@ PyString_FromStringAndSize(const char *str, Py_ssize_t size)
return PyErr_NoMemory();
PyObject_INIT_VAR(op, &PyString_Type, size);
op->ob_shash = -1;
- op->ob_sstate = SSTATE_NOT_INTERNED;
if (str != NULL)
Py_MEMCPY(op->ob_sval, str, size);
op->ob_sval[size] = '\0';
/* share short strings */
if (size == 0) {
- PyObject *t = (PyObject *)op;
- PyString_InternInPlace(&t);
- op = (PyStringObject *)t;
nullstring = op;
Py_INCREF(op);
} else if (size == 1 && str != NULL) {
- PyObject *t = (PyObject *)op;
- PyString_InternInPlace(&t);
- op = (PyStringObject *)t;
characters[*str & UCHAR_MAX] = op;
Py_INCREF(op);
}
@@ -132,19 +136,12 @@ PyString_FromString(const char *str)
return PyErr_NoMemory();
PyObject_INIT_VAR(op, &PyString_Type, size);
op->ob_shash = -1;
- op->ob_sstate = SSTATE_NOT_INTERNED;
Py_MEMCPY(op->ob_sval, str, size+1);
/* share short strings */
if (size == 0) {
- PyObject *t = (PyObject *)op;
- PyString_InternInPlace(&t);
- op = (PyStringObject *)t;
nullstring = op;
Py_INCREF(op);
} else if (size == 1) {
- PyObject *t = (PyObject *)op;
- PyString_InternInPlace(&t);
- op = (PyStringObject *)t;
characters[*str & UCHAR_MAX] = op;
Py_INCREF(op);
}
@@ -351,174 +348,9 @@ PyString_FromFormat(const char *format, ...)
return ret;
}
-
-PyObject *PyString_Decode(const char *s,
- Py_ssize_t size,
- const char *encoding,
- const char *errors)
-{
- PyObject *v, *str;
-
- str = PyString_FromStringAndSize(s, size);
- if (str == NULL)
- return NULL;
- v = PyString_AsDecodedString(str, encoding, errors);
- Py_DECREF(str);
- return v;
-}
-
-PyObject *PyString_AsDecodedObject(PyObject *str,
- const char *encoding,
- const char *errors)
-{
- PyObject *v;
-
- if (!PyString_Check(str)) {
- PyErr_BadArgument();
- goto onError;
- }
-
- if (encoding == NULL) {
- encoding = PyUnicode_GetDefaultEncoding();
- }
-
- /* Decode via the codec registry */
- v = PyCodec_Decode(str, encoding, errors);
- if (v == NULL)
- goto onError;
-
- return v;
-
- onError:
- return NULL;
-}
-
-PyObject *PyString_AsDecodedString(PyObject *str,
- const char *encoding,
- const char *errors)
-{
- PyObject *v;
-
- v = PyString_AsDecodedObject(str, encoding, errors);
- if (v == NULL)
- goto onError;
-
- /* Convert Unicode to a string using the default encoding */
- if (PyUnicode_Check(v)) {
- PyObject *temp = v;
- v = PyUnicode_AsEncodedString(v, NULL, NULL);
- Py_DECREF(temp);
- if (v == NULL)
- goto onError;
- }
- if (!PyString_Check(v)) {
- PyErr_Format(PyExc_TypeError,
- "decoder did not return a string object (type=%.400s)",
- Py_Type(v)->tp_name);
- Py_DECREF(v);
- goto onError;
- }
-
- return v;
-
- onError:
- return NULL;
-}
-
-PyObject *PyString_Encode(const char *s,
- Py_ssize_t size,
- const char *encoding,
- const char *errors)
-{
- PyObject *v, *str;
-
- str = PyString_FromStringAndSize(s, size);
- if (str == NULL)
- return NULL;
- v = PyString_AsEncodedString(str, encoding, errors);
- Py_DECREF(str);
- return v;
-}
-
-PyObject *PyString_AsEncodedObject(PyObject *str,
- const char *encoding,
- const char *errors)
-{
- PyObject *v;
-
- if (!PyString_Check(str)) {
- PyErr_BadArgument();
- goto onError;
- }
-
- if (encoding == NULL) {
- encoding = PyUnicode_GetDefaultEncoding();
- }
-
- /* Encode via the codec registry */
- v = PyCodec_Encode(str, encoding, errors);
- if (v == NULL)
- goto onError;
-
- return v;
-
- onError:
- return NULL;
-}
-
-PyObject *PyString_AsEncodedString(PyObject *str,
- const char *encoding,
- const char *errors)
-{
- PyObject *v;
-
- v = PyString_AsEncodedObject(str, encoding, errors);
- if (v == NULL)
- goto onError;
-
- /* Convert Unicode to a string using the default encoding */
- if (PyUnicode_Check(v)) {
- PyObject *temp = v;
- v = PyUnicode_AsEncodedString(v, NULL, NULL);
- Py_DECREF(temp);
- if (v == NULL)
- goto onError;
- }
- if (!PyString_Check(v)) {
- PyErr_Format(PyExc_TypeError,
- "encoder did not return a string object (type=%.400s)",
- Py_Type(v)->tp_name);
- Py_DECREF(v);
- goto onError;
- }
-
- return v;
-
- onError:
- return NULL;
-}
-
static void
string_dealloc(PyObject *op)
{
- switch (PyString_CHECK_INTERNED(op)) {
- case SSTATE_NOT_INTERNED:
- break;
-
- case SSTATE_INTERNED_MORTAL:
- /* revive dead object temporarily for DelItem */
- Py_Refcnt(op) = 3;
- if (PyDict_DelItem(interned, op) != 0)
- Py_FatalError(
- "deletion of interned string failed");
- break;
-
- case SSTATE_INTERNED_IMMORTAL:
- Py_FatalError("Immortal interned string died.");
-
- default:
- Py_FatalError("Inconsistent interned string state.");
- }
Py_Type(op)->tp_free(op);
}
@@ -577,7 +409,7 @@ PyObject *PyString_DecodeEscape(const char *s,
continue;
}
s++;
- if (s==end) {
+ if (s==end) {
PyErr_SetString(PyExc_ValueError,
"Trailing \\ in string");
goto failed;
@@ -639,8 +471,8 @@ PyObject *PyString_DecodeEscape(const char *s,
/* do nothing */;
else {
PyErr_Format(PyExc_ValueError,
- "decoding error; "
- "unknown error handling code: %.400s",
+ "decoding error; unknown "
+ "error handling code: %.400s",
errors);
goto failed;
}
@@ -665,8 +497,8 @@ PyObject *PyString_DecodeEscape(const char *s,
static Py_ssize_t
string_getsize(register PyObject *op)
{
- char *s;
- Py_ssize_t len;
+ char *s;
+ Py_ssize_t len;
if (PyString_AsStringAndSize(op, &s, &len))
return -1;
return len;
@@ -675,8 +507,8 @@ string_getsize(register PyObject *op)
static /*const*/ char *
string_getbuffer(register PyObject *op)
{
- char *s;
- Py_ssize_t len;
+ char *s;
+ Py_ssize_t len;
if (PyString_AsStringAndSize(op, &s, &len))
return NULL;
return s;
@@ -753,7 +585,7 @@ PyString_AsStringAndSize(register PyObject *obj,
#define STRINGLIB_LEN PyString_GET_SIZE
#define STRINGLIB_NEW PyString_FromStringAndSize
#define STRINGLIB_STR PyString_AS_STRING
-#define STRINGLIB_WANT_CONTAINS_OBJ 1
+/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */
#define STRINGLIB_EMPTY nullstring
#define STRINGLIB_CHECK_EXACT PyString_CheckExact
@@ -773,12 +605,12 @@ PyString_Repr(PyObject *obj, int smartquotes)
{
static const char *hexdigits = "0123456789abcdef";
register PyStringObject* op = (PyStringObject*) obj;
- Py_ssize_t length = PyString_GET_SIZE(op);
- size_t newsize = 3 + 4 * Py_Size(op);
+ Py_ssize_t length = Py_Size(op);
+ size_t newsize = 3 + 4 * length;
PyObject *v;
- if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(op)) {
+ if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
PyErr_SetString(PyExc_OverflowError,
- "string is too large to make repr");
+ "bytes object is too large to make repr");
}
v = PyUnicode_FromUnicode(NULL, newsize);
if (v == NULL) {
@@ -790,14 +622,14 @@ PyString_Repr(PyObject *obj, int smartquotes)
register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
int quote;
- /* figure out which quote to use; single is preferred */
+ /* Figure out which quote to use; single is preferred */
quote = '\'';
if (smartquotes) {
char *test, *start;
start = PyString_AS_STRING(op);
for (test = start; test < start+length; ++test) {
if (*test == '"') {
- quote = '\''; /* switch back to single quote */
+ quote = '\''; /* back to single */
goto decided;
}
else if (*test == '\'')
@@ -807,8 +639,8 @@ PyString_Repr(PyObject *obj, int smartquotes)
;
}
- *p++ = 's', *p++ = quote;
- for (i = 0; i < Py_Size(op); i++) {
+ *p++ = 'b', *p++ = quote;
+ for (i = 0; i < length; i++) {
/* There's at least enough room for a hex escape
and a closing quote. */
assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
@@ -848,18 +680,14 @@ string_repr(PyObject *op)
}
static PyObject *
-string_str(PyObject *s)
+string_str(PyObject *op)
{
- assert(PyString_Check(s));
- if (PyString_CheckExact(s)) {
- Py_INCREF(s);
- return s;
- }
- else {
- /* Subtype -- return genuine string with the same value. */
- PyStringObject *t = (PyStringObject *) s;
- return PyString_FromStringAndSize(t->ob_sval, Py_Size(t));
+ if (Py_BytesWarningFlag) {
+ if (PyErr_WarnEx(PyExc_BytesWarning,
+ "str() on a bytes instance", 1))
+ return NULL;
}
+ return string_repr(op);
}
static Py_ssize_t
@@ -868,51 +696,53 @@ string_length(PyStringObject *a)
return Py_Size(a);
}
+/* This is also used by PyString_Concat() */
static PyObject *
-string_concat(register PyStringObject *a, register PyObject *bb)
+string_concat(PyObject *a, PyObject *b)
{
- register Py_ssize_t size;
- register PyStringObject *op;
- if (!PyString_Check(bb)) {
- if (PyUnicode_Check(bb))
- return PyUnicode_Concat((PyObject *)a, bb);
- if (PyBytes_Check(bb))
- return PyBytes_Concat((PyObject *)a, bb);
- PyErr_Format(PyExc_TypeError,
- "cannot concatenate 'str8' and '%.200s' objects",
- Py_Type(bb)->tp_name);
- return NULL;
+ Py_ssize_t size;
+ Py_buffer va, vb;
+ PyObject *result = NULL;
+
+ va.len = -1;
+ vb.len = -1;
+ if (_getbuffer(a, &va) < 0 ||
+ _getbuffer(b, &vb) < 0) {
+ PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
+ Py_Type(a)->tp_name, Py_Type(b)->tp_name);
+ goto done;
}
-#define b ((PyStringObject *)bb)
- /* Optimize cases with empty left or right operand */
- if ((Py_Size(a) == 0 || Py_Size(b) == 0) &&
- PyString_CheckExact(a) && PyString_CheckExact(b)) {
- if (Py_Size(a) == 0) {
- Py_INCREF(bb);
- return bb;
- }
- Py_INCREF(a);
- return (PyObject *)a;
+
+ /* Optimize end cases */
+ if (va.len == 0 && PyString_CheckExact(b)) {
+ result = b;
+ Py_INCREF(result);
+ goto done;
+ }
+ if (vb.len == 0 && PyString_CheckExact(a)) {
+ result = a;
+ Py_INCREF(result);
+ goto done;
}
- size = Py_Size(a) + Py_Size(b);
+
+ size = va.len + vb.len;
if (size < 0) {
- PyErr_SetString(PyExc_OverflowError,
- "strings are too large to concat");
- return NULL;
+ PyErr_NoMemory();
+ goto done;
}
- /* Inline PyObject_NewVar */
- op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
- if (op == NULL)
- return PyErr_NoMemory();
- PyObject_INIT_VAR(op, &PyString_Type, size);
- op->ob_shash = -1;
- op->ob_sstate = SSTATE_NOT_INTERNED;
- Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
- Py_MEMCPY(op->ob_sval + Py_Size(a), b->ob_sval, Py_Size(b));
- op->ob_sval[size] = '\0';
- return (PyObject *) op;
-#undef b
+ result = PyString_FromStringAndSize(NULL, size);
+ if (result != NULL) {
+ memcpy(PyString_AS_STRING(result), va.buf, va.len);
+ memcpy(PyString_AS_STRING(result) + va.len, vb.buf, vb.len);
+ }
+
+ done:
+ if (va.len != -1)
+ PyObject_ReleaseBuffer(a, &va);
+ if (vb.len != -1)
+ PyObject_ReleaseBuffer(b, &vb);
+ return result;
}
static PyObject *
@@ -950,7 +780,6 @@ string_repeat(register PyStringObject *a, register Py_ssize_t n)
return PyErr_NoMemory();
PyObject_INIT_VAR(op, &PyString_Type, size);
op->ob_shash = -1;
- op->ob_sstate = SSTATE_NOT_INTERNED;
op->ob_sval[size] = '\0';
if (Py_Size(a) == 1 && n > 0) {
memset(op->ob_sval, a->ob_sval[0] , n);
@@ -970,20 +799,36 @@ string_repeat(register PyStringObject *a, register Py_ssize_t n)
}
static int
-string_contains(PyObject *str_obj, PyObject *sub_obj)
+string_contains(PyObject *self, PyObject *arg)
+{
+ Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
+ if (ival == -1 && PyErr_Occurred()) {
+ Py_buffer varg;
+ int pos;
+ PyErr_Clear();
+ if (_getbuffer(arg, &varg) < 0)
+ return -1;
+ pos = stringlib_find(PyString_AS_STRING(self), Py_Size(self),
+ varg.buf, varg.len, 0);
+ PyObject_ReleaseBuffer(arg, &varg);
+ return pos >= 0;
+ }
+ if (ival < 0 || ival >= 256) {
+ PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
+ return -1;
+ }
+
+ return memchr(PyString_AS_STRING(self), ival, Py_Size(self)) != NULL;
+}
+
+static PyObject *
+string_item(PyStringObject *a, register Py_ssize_t i)
{
- if (!PyString_CheckExact(sub_obj)) {
- if (PyUnicode_Check(sub_obj))
- return PyUnicode_Contains(str_obj, sub_obj);
- if (!PyString_Check(sub_obj)) {
- PyErr_Format(PyExc_TypeError,
- "'in <string>' requires string as left operand, "
- "not %.200s", Py_Type(sub_obj)->tp_name);
- return -1;
- }
+ if (i < 0 || i >= Py_Size(a)) {
+ PyErr_SetString(PyExc_IndexError, "string index out of range");
+ return NULL;
}
-
- return stringlib_contains_obj(str_obj, sub_obj);
+ return PyInt_FromLong((unsigned char)a->ob_sval[i]);
}
static PyObject*
@@ -996,6 +841,15 @@ string_richcompare(PyStringObject *a, PyStringObject *b, int op)
/* Make sure both arguments are strings. */
if (!(PyString_Check(a) && PyString_Check(b))) {
+ if (Py_BytesWarningFlag && (op == Py_EQ) &&
+ (PyObject_IsInstance((PyObject*)a,
+ (PyObject*)&PyUnicode_Type) ||
+ PyObject_IsInstance((PyObject*)b,
+ (PyObject*)&PyUnicode_Type))) {
+ if (PyErr_WarnEx(PyExc_BytesWarning,
+ "Comparsion between bytes and string", 1))
+ return NULL;
+ }
result = Py_NotImplemented;
goto out;
}
@@ -1053,9 +907,9 @@ _PyString_Eq(PyObject *o1, PyObject *o2)
{
PyStringObject *a = (PyStringObject*) o1;
PyStringObject *b = (PyStringObject*) o2;
- return Py_Size(a) == Py_Size(b)
- && *a->ob_sval == *b->ob_sval
- && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
+ return Py_Size(a) == Py_Size(b)
+ && *a->ob_sval == *b->ob_sval
+ && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
}
static long
@@ -1088,12 +942,12 @@ string_subscript(PyStringObject* self, PyObject* item)
return NULL;
if (i < 0)
i += PyString_GET_SIZE(self);
- if (i < 0 || i >= PyString_GET_SIZE(self)) {
+ if (i < 0 || i >= PyString_GET_SIZE(self)) {
PyErr_SetString(PyExc_IndexError,
"string index out of range");
return NULL;
- }
- return PyInt_FromLong((unsigned char)self->ob_sval[i]);
+ }
+ return PyInt_FromLong((unsigned char)self->ob_sval[i]);
}
else if (PySlice_Check(item)) {
Py_ssize_t start, stop, step, slicelength, cur, i;
@@ -1149,14 +1003,15 @@ string_subscript(PyStringObject* self, PyObject* item)
static int
string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
{
- return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_Size(self), 0, flags);
+ return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_Size(self),
+ 0, flags);
}
static PySequenceMethods string_as_sequence = {
(lenfunc)string_length, /*sq_length*/
(binaryfunc)string_concat, /*sq_concat*/
(ssizeargfunc)string_repeat, /*sq_repeat*/
- 0, /*sq_item*/
+ (ssizeargfunc)string_item, /*sq_item*/
0, /*sq_slice*/
0, /*sq_ass_item*/
0, /*sq_ass_slice*/
@@ -1171,7 +1026,7 @@ static PyMappingMethods string_as_mapping = {
static PyBufferProcs string_as_buffer = {
(getbufferproc)string_buffer_getbuffer,
- NULL,
+ NULL,
};
@@ -1297,12 +1152,12 @@ split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
}
PyDoc_STRVAR(split__doc__,
-"S.split([sep [,maxsplit]]) -> list of strings\n\
+"B.split([sep[, maxsplit]]) -> list of bytes\n\
\n\
-Return a list of the words in the string S, using sep as the\n\
-delimiter string. If maxsplit is given, at most maxsplit\n\
-splits are done. If sep is not specified or is None, any\n\
-whitespace string is a separator.");
+Return a list of the sections in B, using sep as the delimiter.\n\
+If sep is not given, B is split on ASCII whitespace characters\n\
+(space, tab, return, newline, formfeed, vertical tab).\n\
+If maxsplit is given, at most maxsplit splits are done.");
static PyObject *
string_split(PyStringObject *self, PyObject *args)
@@ -1310,6 +1165,7 @@ string_split(PyStringObject *self, PyObject *args)
Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Py_ssize_t maxsplit = -1, count=0;
const char *s = PyString_AS_STRING(self), *sub;
+ Py_buffer vsub;
PyObject *list, *str, *subobj = Py_None;
#ifdef USE_FAST
Py_ssize_t pos;
@@ -1321,25 +1177,27 @@ string_split(PyStringObject *self, PyObject *args)
maxsplit = PY_SSIZE_T_MAX;
if (subobj == Py_None)
return split_whitespace(s, len, maxsplit);
- if (PyString_Check(subobj)) {
- sub = PyString_AS_STRING(subobj);
- n = PyString_GET_SIZE(subobj);
- }
- else if (PyUnicode_Check(subobj))
- return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
- else if (PyObject_AsCharBuffer(subobj, &sub, &n))
+ if (_getbuffer(subobj, &vsub) < 0)
return NULL;
+ sub = vsub.buf;
+ n = vsub.len;
if (n == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator");
+ PyObject_ReleaseBuffer(subobj, &vsub);
return NULL;
}
- else if (n == 1)
- return split_char(s, len, sub[0], maxsplit);
+ else if (n == 1) {
+ char ch = sub[0];
+ PyObject_ReleaseBuffer(subobj, &vsub);
+ return split_char(s, len, ch, maxsplit);
+ }
list = PyList_New(PREALLOC_SIZE(maxsplit));
- if (list == NULL)
+ if (list == NULL) {
+ PyObject_ReleaseBuffer(subobj, &vsub);
return NULL;
+ }
#ifdef USE_FAST
i = j = 0;
@@ -1365,19 +1223,21 @@ string_split(PyStringObject *self, PyObject *args)
#endif
SPLIT_ADD(s, i, len);
FIX_PREALLOC_SIZE(list);
+ PyObject_ReleaseBuffer(subobj, &vsub);
return list;
onError:
Py_DECREF(list);
+ PyObject_ReleaseBuffer(subobj, &vsub);
return NULL;
}
PyDoc_STRVAR(partition__doc__,
-"S.partition(sep) -> (head, sep, tail)\n\
+"B.partition(sep) -> (head, sep, tail)\n\
\n\
-Searches for the separator sep in S, and returns the part before it,\n\
+Searches for the separator sep in B, and returns the part before it,\n\
the separator itself, and the part after it. If the separator is not\n\
-found, returns S and two empty strings.");
+found, returns B and two empty bytes objects.");
static PyObject *
string_partition(PyStringObject *self, PyObject *sep_obj)
@@ -1402,11 +1262,12 @@ string_partition(PyStringObject *self, PyObject *sep_obj)
}
PyDoc_STRVAR(rpartition__doc__,
-"S.rpartition(sep) -> (tail, sep, head)\n\
+"B.rpartition(sep) -> (tail, sep, head)\n\
\n\
-Searches for the separator sep in S, starting at the end of S, and returns\n\
-the part before it, the separator itself, and the part after it. If the\n\
-separator is not found, returns two empty strings and S.");
+Searches for the separator sep in B, starting at the end of B,\n\
+and returns the part before it, the separator itself, and the\n\
+part after it. If the separator is not found, returns two empty\n\
+bytes objects and B.");
static PyObject *
string_rpartition(PyStringObject *self, PyObject *sep_obj)
@@ -1450,8 +1311,8 @@ rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
SPLIT_ADD(s, i + 1, j + 1);
}
if (i >= 0) {
- /* Only occurs when maxsplit was reached */
- /* Skip any remaining whitespace and copy to beginning of string */
+ /* Only occurs when maxsplit was reached. Skip any remaining
+ whitespace and copy to beginning of string. */
RSKIP_SPACE(s, i);
if (i >= 0)
SPLIT_ADD(s, 0, i + 1);
@@ -1500,13 +1361,14 @@ rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
}
PyDoc_STRVAR(rsplit__doc__,
-"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
+"B.rsplit([sep[, maxsplit]]) -> list of strings\n\
\n\
-Return a list of the words in the string S, using sep as the\n\
-delimiter string, starting at the end of the string and working\n\
-to the front. If maxsplit is given, at most maxsplit splits are\n\
-done. If sep is not specified or is None, any whitespace string\n\
-is a separator.");
+Return a list of the sections in B, using sep as the delimiter,\n\
+starting at the end of B and working to the front.\n\
+If sep is not given, B is split on ASCII whitespace characters\n\
+(space, tab, return, newline, formfeed, vertical tab).\n\
+If maxsplit is given, at most maxsplit splits are done.");
+
static PyObject *
string_rsplit(PyStringObject *self, PyObject *args)
@@ -1514,6 +1376,7 @@ string_rsplit(PyStringObject *self, PyObject *args)
Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Py_ssize_t maxsplit = -1, count=0;
const char *s = PyString_AS_STRING(self), *sub;
+ Py_buffer vsub;
PyObject *list, *str, *subobj = Py_None;
if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
@@ -1522,25 +1385,27 @@ string_rsplit(PyStringObject *self, PyObject *args)
maxsplit = PY_SSIZE_T_MAX;
if (subobj == Py_None)
return rsplit_whitespace(s, len, maxsplit);
- if (PyString_Check(subobj)) {
- sub = PyString_AS_STRING(subobj);
- n = PyString_GET_SIZE(subobj);
- }
- else if (PyUnicode_Check(subobj))
- return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
- else if (PyObject_AsCharBuffer(subobj, &sub, &n))
+ if (_getbuffer(subobj, &vsub) < 0)
return NULL;
+ sub = vsub.buf;
+ n = vsub.len;
if (n == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator");
+ PyObject_ReleaseBuffer(subobj, &vsub);
return NULL;
}
- else if (n == 1)
- return rsplit_char(s, len, sub[0], maxsplit);
+ else if (n == 1) {
+ char ch = sub[0];
+ PyObject_ReleaseBuffer(subobj, &vsub);
+ return rsplit_char(s, len, ch, maxsplit);
+ }
list = PyList_New(PREALLOC_SIZE(maxsplit));
- if (list == NULL)
+ if (list == NULL) {
+ PyObject_ReleaseBuffer(subobj, &vsub);
return NULL;
+ }
j = len;
i = j - n;
@@ -1559,10 +1424,12 @@ string_rsplit(PyStringObject *self, PyObject *args)
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
+ PyObject_ReleaseBuffer(subobj, &vsub);
return list;
onError:
Py_DECREF(list);
+ PyObject_ReleaseBuffer(subobj, &vsub);
return NULL;
}
@@ -1572,13 +1439,13 @@ onError:
PyDoc_STRVAR(join__doc__,
-"S.join(sequence) -> string\n\
+"B.join(iterable_of_bytes) -> bytes\n\
\n\
-Return a string which is the concatenation of the strings in the\n\
-sequence. The separator between elements is S.");
+Concatenates any number of bytes objects, with B in between each pair.\n\
+Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
static PyObject *
-string_join(PyStringObject *self, PyObject *orig)
+string_join(PyObject *self, PyObject *orig)
{
char *sep = PyString_AS_STRING(self);
const Py_ssize_t seplen = PyString_GET_SIZE(self);
@@ -1601,7 +1468,7 @@ string_join(PyStringObject *self, PyObject *orig)
}
if (seqlen == 1) {
item = PySequence_Fast_GET_ITEM(seq, 0);
- if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
+ if (PyString_CheckExact(item)) {
Py_INCREF(item);
Py_DECREF(seq);
return item;
@@ -1611,37 +1478,26 @@ string_join(PyStringObject *self, PyObject *orig)
/* There are at least two things to join, or else we have a subclass
* of the builtin types in the sequence.
* Do a pre-pass to figure out the total amount of space we'll
- * need (sz), see whether any argument is absurd, and defer to
- * the Unicode join if appropriate.
+ * need (sz), and see whether all argument are bytes.
*/
+ /* XXX Shouldn't we use _getbuffer() on these items instead? */
for (i = 0; i < seqlen; i++) {
const size_t old_sz = sz;
item = PySequence_Fast_GET_ITEM(seq, i);
- if (!PyString_Check(item)){
- if (PyUnicode_Check(item)) {
- /* Defer to Unicode join.
- * CAUTION: There's no gurantee that the
- * original sequence can be iterated over
- * again, so we must pass seq here.
- */
- PyObject *result;
- result = PyUnicode_Join((PyObject *)self, seq);
- Py_DECREF(seq);
- return result;
- }
+ if (!PyString_Check(item) && !PyBytes_Check(item)) {
PyErr_Format(PyExc_TypeError,
- "sequence item %zd: expected string,"
+ "sequence item %zd: expected bytes,"
" %.80s found",
i, Py_Type(item)->tp_name);
Py_DECREF(seq);
return NULL;
}
- sz += PyString_GET_SIZE(item);
+ sz += Py_Size(item);
if (i != 0)
sz += seplen;
if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
PyErr_SetString(PyExc_OverflowError,
- "join() result is too long for a Python string");
+ "join() result is too long for a Python string");
Py_DECREF(seq);
return NULL;
}
@@ -1655,17 +1511,24 @@ string_join(PyStringObject *self, PyObject *orig)
}
/* Catenate everything. */
+ /* I'm not worried about a PyBytes item growing because there's
+ nowhere in this function where we release the GIL. */
p = PyString_AS_STRING(res);
for (i = 0; i < seqlen; ++i) {
size_t n;
- item = PySequence_Fast_GET_ITEM(seq, i);
- n = PyString_GET_SIZE(item);
- Py_MEMCPY(p, PyString_AS_STRING(item), n);
- p += n;
- if (i < seqlen - 1) {
+ char *q;
+ if (i) {
Py_MEMCPY(p, sep, seplen);
p += seplen;
}
+ item = PySequence_Fast_GET_ITEM(seq, i);
+ n = Py_Size(item);
+ if (PyString_Check(item))
+ q = PyString_AS_STRING(item);
+ else
+ q = PyBytes_AS_STRING(item);
+ Py_MEMCPY(p, q, n);
+ p += n;
}
Py_DECREF(seq);
@@ -1677,7 +1540,7 @@ _PyString_Join(PyObject *sep, PyObject *x)
{
assert(sep != NULL && PyString_Check(sep));
assert(x != NULL);
- return string_join((PyStringObject *)sep, x);
+ return string_join(sep, x);
}
Py_LOCAL_INLINE(void)
@@ -1730,7 +1593,7 @@ string_find_internal(PyStringObject *self, PyObject *args, int dir)
PyDoc_STRVAR(find__doc__,
-"S.find(sub [,start [,end]]) -> int\n\
+"B.find(sub [,start [,end]]) -> int\n\
\n\
Return the lowest index in S where substring sub is found,\n\
such that sub is contained within s[start:end]. Optional\n\
@@ -1749,9 +1612,9 @@ string_find(PyStringObject *self, PyObject *args)
PyDoc_STRVAR(index__doc__,
-"S.index(sub [,start [,end]]) -> int\n\
+"B.index(sub [,start [,end]]) -> int\n\
\n\
-Like S.find() but raise ValueError when the substring is not found.");
+Like B.find() but raise ValueError when the substring is not found.");
static PyObject *
string_index(PyStringObject *self, PyObject *args)
@@ -1769,9 +1632,9 @@ string_index(PyStringObject *self, PyObject *args)
PyDoc_STRVAR(rfind__doc__,
-"S.rfind(sub [,start [,end]]) -> int\n\
+"B.rfind(sub [,start [,end]]) -> int\n\
\n\
-Return the highest index in S where substring sub is found,\n\
+Return the highest index in B where substring sub is found,\n\
such that sub is contained within s[start:end]. Optional\n\
arguments start and end are interpreted as in slice notation.\n\
\n\
@@ -1788,9 +1651,9 @@ string_rfind(PyStringObject *self, PyObject *args)
PyDoc_STRVAR(rindex__doc__,
-"S.rindex(sub [,start [,end]]) -> int\n\
+"B.rindex(sub [,start [,end]]) -> int\n\
\n\
-Like S.rfind() but raise ValueError when the substring is not found.");
+Like B.rfind() but raise ValueError when the substring is not found.");
static PyObject *
string_rindex(PyStringObject *self, PyObject *args)
@@ -1810,12 +1673,18 @@ string_rindex(PyStringObject *self, PyObject *args)
Py_LOCAL_INLINE(PyObject *)
do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
{
+ Py_buffer vsep;
char *s = PyString_AS_STRING(self);
Py_ssize_t len = PyString_GET_SIZE(self);
- char *sep = PyString_AS_STRING(sepobj);
- Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
+ char *sep;
+ Py_ssize_t seplen;
Py_ssize_t i, j;
+ if (_getbuffer(sepobj, &vsep) < 0)
+ return NULL;
+ sep = vsep.buf;
+ seplen = vsep.len;
+
i = 0;
if (striptype != RIGHTSTRIP) {
while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
@@ -1831,6 +1700,8 @@ do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
j++;
}
+ PyObject_ReleaseBuffer(sepobj, &vsep);
+
if (i == 0 && j == len && PyString_CheckExact(self)) {
Py_INCREF(self);
return (PyObject*)self;
@@ -1879,36 +1750,17 @@ do_argstrip(PyStringObject *self, int striptype, PyObject *args)
return NULL;
if (sep != NULL && sep != Py_None) {
- if (PyString_Check(sep))
- return do_xstrip(self, striptype, sep);
- else if (PyUnicode_Check(sep)) {
- PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
- PyObject *res;
- if (uniself==NULL)
- return NULL;
- res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
- striptype, sep);
- Py_DECREF(uniself);
- return res;
- }
- PyErr_Format(PyExc_TypeError,
- "%s arg must be None or string",
- STRIPNAME(striptype));
- return NULL;
+ return do_xstrip(self, striptype, sep);
}
-
return do_strip(self, striptype);
}
PyDoc_STRVAR(strip__doc__,
-"S.strip([chars]) -> string\n\
+"B.strip([bytes]) -> bytes\n\
\n\
-Return a copy of the string S with leading and trailing\n\
-whitespace removed.\n\
-If chars is given and not None, remove characters in chars instead.\n\
-If chars is unicode, S will be converted to unicode before stripping");
-
+Strip leading and trailing bytes contained in the argument.\n\
+If the argument is omitted, strip trailing ASCII whitespace.");
static PyObject *
string_strip(PyStringObject *self, PyObject *args)
{
@@ -1920,12 +1772,10 @@ string_strip(PyStringObject *self, PyObject *args)
PyDoc_STRVAR(lstrip__doc__,
-"S.lstrip([chars]) -> string\n\
+"B.lstrip([bytes]) -> bytes\n\
\n\
-Return a copy of the string S with leading whitespace removed.\n\
-If chars is given and not None, remove characters in chars instead.\n\
-If chars is unicode, S will be converted to unicode before stripping");
-
+Strip leading bytes contained in the argument.\n\
+If the argument is omitted, strip leading ASCII whitespace.");
static PyObject *
string_lstrip(PyStringObject *self, PyObject *args)
{
@@ -1937,12 +1787,10 @@ string_lstrip(PyStringObject *self, PyObject *args)
PyDoc_STRVAR(rstrip__doc__,
-"S.rstrip([chars]) -> string\n\
+"B.rstrip([bytes]) -> bytes\n\
\n\
-Return a copy of the string S with trailing whitespace removed.\n\
-If chars is given and not None, remove characters in chars instead.\n\
-If chars is unicode, S will be converted to unicode before stripping");
-
+Strip trailing bytes contained in the argument.\n\
+If the argument is omitted, strip trailing ASCII whitespace.");
static PyObject *
string_rstrip(PyStringObject *self, PyObject *args)
{
@@ -1954,7 +1802,7 @@ string_rstrip(PyStringObject *self, PyObject *args)
PyDoc_STRVAR(count__doc__,
-"S.count(sub[, start[, end]]) -> int\n\
+"B.count(sub [,start [,end]]) -> int\n\
\n\
Return the number of non-overlapping occurrences of substring sub in\n\
string S[start:end]. Optional arguments start and end are interpreted\n\
@@ -1996,12 +1844,12 @@ string_count(PyStringObject *self, PyObject *args)
PyDoc_STRVAR(translate__doc__,
-"S.translate(table [,deletechars]) -> string\n\
+"B.translate(table[, deletechars]) -> bytes\n\
\n\
-Return a copy of the string S, where all characters occurring\n\
-in the optional argument deletechars are removed, and the\n\
-remaining characters have been mapped through the given\n\
-translation table, which must be a string of length 256.");
+Return a copy of B, where all characters occurring in the\n\
+optional argument deletechars are removed, and the remaining\n\
+characters have been mapped through the given translation\n\
+table, which must be a bytes object of length 256.");
static PyObject *
string_translate(PyStringObject *self, PyObject *args)
@@ -2187,7 +2035,7 @@ findstring(const char *target, Py_ssize_t target_len,
return end;
} else {
for (; start <= end; start++)
- if (Py_STRING_MATCH(target, start, pattern, pattern_len))
+ if (Py_STRING_MATCH(target, start,pattern,pattern_len))
return start;
}
return -1;
@@ -2225,14 +2073,15 @@ countstring(const char *target, Py_ssize_t target_len,
end -= pattern_len;
if (direction < 0) {
for (; (end >= start); end--)
- if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
+ if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
count++;
if (--maxcount <= 0) break;
end -= pattern_len-1;
}
} else {
for (; (start <= end); start++)
- if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
+ if (Py_STRING_MATCH(target, start,
+ pattern, pattern_len)) {
count++;
if (--maxcount <= 0)
break;
@@ -2522,12 +2371,14 @@ replace_single_character(PyStringObject *self,
/* result_len = self_len + count * (to_len-1) */
product = count * (to_len-1);
if (product / (to_len-1) != count) {
- PyErr_SetString(PyExc_OverflowError, "replace string is too long");
+ PyErr_SetString(PyExc_OverflowError,
+ "replace string is too long");
return NULL;
}
result_len = self_len + product;
if (result_len < 0) {
- PyErr_SetString(PyExc_OverflowError, "replace string is too long");
+ PyErr_SetString(PyExc_OverflowError,
+ "replace string is too long");
return NULL;
}
@@ -2590,12 +2441,14 @@ replace_substring(PyStringObject *self,
/* result_len = self_len + count * (to_len-from_len) */
product = count * (to_len-from_len);
if (product / (to_len-from_len) != count) {
- PyErr_SetString(PyExc_OverflowError, "replace string is too long");
+ PyErr_SetString(PyExc_OverflowError,
+ "replace string is too long");
return NULL;
}
result_len = self_len + product;
if (result_len < 0) {
- PyErr_SetString(PyExc_OverflowError, "replace string is too long");
+ PyErr_SetString(PyExc_OverflowError,
+ "replace string is too long");
return NULL;
}
@@ -2675,7 +2528,8 @@ replace(PyStringObject *self,
return replace_delete_single_character(
self, from_s[0], maxcount);
} else {
- return replace_delete_substring(self, from_s, from_len, maxcount);
+ return replace_delete_substring(self, from_s,
+ from_len, maxcount);
}
}
@@ -2690,7 +2544,8 @@ replace(PyStringObject *self,
maxcount);
} else {
return replace_substring_in_place(
- self, from_s, from_len, to_s, to_len, maxcount);
+ self, from_s, from_len, to_s, to_len,
+ maxcount);
}
}
@@ -2700,14 +2555,15 @@ replace(PyStringObject *self,
to_s, to_len, maxcount);
} else {
/* len('from')>=2, len('to')>=1 */
- return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
+ return replace_substring(self, from_s, from_len, to_s, to_len,
+ maxcount);
}
}
PyDoc_STRVAR(replace__doc__,
-"S.replace (old, new[, count]) -> string\n\
+"B.replace(old, new[, count]) -> bytes\n\
\n\
-Return a copy of string S with all occurrences of substring\n\
+Return a copy of B with all occurrences of subsection\n\
old replaced by new. If the optional argument count is\n\
given, only the first count occurrences are replaced.");
@@ -2794,11 +2650,11 @@ _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
PyDoc_STRVAR(startswith__doc__,
-"S.startswith(prefix[, start[, end]]) -> bool\n\
+"B.startswith(prefix [,start [,end]]) -> bool\n\
\n\
-Return True if S starts with the specified prefix, False otherwise.\n\
-With optional start, test S beginning at that position.\n\
-With optional end, stop comparing S at that position.\n\
+Return True if B starts with the specified prefix, False otherwise.\n\
+With optional start, test B beginning at that position.\n\
+With optional end, stop comparing B at that position.\n\
prefix can also be a tuple of strings to try.");
static PyObject *
@@ -2835,11 +2691,11 @@ string_startswith(PyStringObject *self, PyObject *args)
PyDoc_STRVAR(endswith__doc__,
-"S.endswith(suffix[, start[, end]]) -> bool\n\
+"B.endswith(suffix [,start [,end]]) -> bool\n\
\n\
-Return True if S ends with the specified suffix, False otherwise.\n\
-With optional start, test S beginning at that position.\n\
-With optional end, stop comparing S at that position.\n\
+Return True if B ends with the specified suffix, False otherwise.\n\
+With optional start, test B beginning at that position.\n\
+With optional end, stop comparing B at that position.\n\
suffix can also be a tuple of strings to try.");
static PyObject *
@@ -2876,63 +2732,50 @@ string_endswith(PyStringObject *self, PyObject *args)
PyDoc_STRVAR(decode__doc__,
-"S.decode([encoding[,errors]]) -> object\n\
+"B.decode([encoding[, errors]]) -> object\n\
\n\
Decodes S using the codec registered for encoding. encoding defaults\n\
to the default encoding. errors may be given to set a different error\n\
-handling scheme. Default is 'strict' meaning that encoding errors raise\n\
-a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
+handling scheme. Default is 'strict' meaning that encoding errors raise\n\
+a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
as well as any other name registerd with codecs.register_error that is\n\
able to handle UnicodeDecodeErrors.");
static PyObject *
-string_decode(PyStringObject *self, PyObject *args)
+string_decode(PyObject *self, PyObject *args)
{
- char *encoding = NULL;
- char *errors = NULL;
- PyObject *v;
-
- if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
- return NULL;
- v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
- if (v == NULL)
- goto onError;
- if (!PyString_Check(v) && !PyUnicode_Check(v)) {
- PyErr_Format(PyExc_TypeError,
- "decoder did not return a string/unicode object "
- "(type=%.400s)",
- Py_Type(v)->tp_name);
- Py_DECREF(v);
- return NULL;
- }
- return v;
+ const char *encoding = NULL;
+ const char *errors = NULL;
- onError:
- return NULL;
+ if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
+ return NULL;
+ if (encoding == NULL)
+ encoding = PyUnicode_GetDefaultEncoding();
+ return PyCodec_Decode(self, encoding, errors);
}
PyDoc_STRVAR(fromhex_doc,
-"str8.fromhex(string) -> str8\n\
+"bytes.fromhex(string) -> bytes\n\
\n\
-Create a str8 object from a string of hexadecimal numbers.\n\
-Spaces between two numbers are accepted. Example:\n\
-str8.fromhex('10 1112') -> s'\\x10\\x11\\x12'.");
+Create a bytes object from a string of hexadecimal numbers.\n\
+Spaces between two numbers are accepted.\n\
+Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
static int
hex_digit_to_int(Py_UNICODE c)
{
- if (c >= 128)
- return -1;
- if (ISDIGIT(c))
- return c - '0';
- else {
- if (ISUPPER(c))
- c = TOLOWER(c);
- if (c >= 'a' && c <= 'f')
- return c - 'a' + 10;
- }
- return -1;
+ if (c >= 128)
+ return -1;
+ if (ISDIGIT(c))
+ return c - '0';
+ else {
+ if (ISUPPER(c))
+ c = TOLOWER(c);
+ if (c >= 'a' && c <= 'f')
+ return c - 'a' + 10;
+ }
+ return -1;
}
static PyObject *
@@ -2975,7 +2818,7 @@ string_fromhex(PyObject *cls, PyObject *args)
return newstring;
error:
- Py_DECREF(newstring);
+ Py_XDECREF(newstring);
return NULL;
}
@@ -3058,11 +2901,11 @@ string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
const char *errors = NULL;
PyObject *new = NULL;
Py_ssize_t i, size;
- static char *kwlist[] = {"object", "encoding", "errors", 0};
+ static char *kwlist[] = {"source", "encoding", "errors", 0};
if (type != &PyString_Type)
return str_subtype_new(type, args, kwds);
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:str8", kwlist, &x,
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
&encoding, &errors))
return NULL;
if (x == NULL) {
@@ -3085,34 +2928,37 @@ string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
new = PyCodec_Encode(x, encoding, errors);
if (new == NULL)
return NULL;
- /* XXX(gb): must accept bytes here since codecs output bytes
- at the moment */
- if (PyBytes_Check(new)) {
- PyObject *str;
- str = PyString_FromString(PyBytes_AsString(new));
- Py_DECREF(new);
- if (!str)
- return NULL;
- return str;
- }
- if (!PyString_Check(new)) {
- PyErr_Format(PyExc_TypeError,
- "encoder did not return a str8 "
- "object (type=%.400s)",
- Py_Type(new)->tp_name);
- Py_DECREF(new);
- return NULL;
- }
+ assert(PyString_Check(new));
return new;
}
/* If it's not unicode, there can't be encoding or errors */
if (encoding != NULL || errors != NULL) {
PyErr_SetString(PyExc_TypeError,
- "encoding or errors without a string argument");
+ "encoding or errors without a string argument");
return NULL;
}
+ /* Is it an int? */
+ size = PyNumber_AsSsize_t(x, PyExc_ValueError);
+ if (size == -1 && PyErr_Occurred()) {
+ PyErr_Clear();
+ }
+ else {
+ if (size < 0) {
+ PyErr_SetString(PyExc_ValueError, "negative count");
+ return NULL;
+ }
+ new = PyString_FromStringAndSize(NULL, size);
+ if (new == NULL) {
+ return NULL;
+ }
+ if (size > 0) {
+ memset(((PyStringObject*)new)->ob_sval, 0, size);
+ }
+ return new;
+ }
+
/* Use the modern buffer interface */
if (PyObject_CheckBuffer(x)) {
Py_buffer view;
@@ -3133,8 +2979,10 @@ string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return NULL;
}
- /* For the iterator version, create a string object and resize as needed. */
- /* XXX(gb): is 64 a good value? also, optimize this if length is known */
+ /* For iterator version, create a string object and resize as needed */
+ /* XXX(gb): is 64 a good value? also, optimize if length is known */
+ /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
+ input being a truly long iterator. */
size = 64;
new = PyString_FromStringAndSize(NULL, size);
if (new == NULL)
@@ -3158,9 +3006,9 @@ string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
item = iternext(it);
if (item == NULL) {
if (PyErr_Occurred()) {
- if (!PyErr_ExceptionMatches(PyExc_StopIteration))
- goto error;
- PyErr_Clear();
+ if (!PyErr_ExceptionMatches(PyExc_StopIteration))
+ goto error;
+ PyErr_Clear();
}
break;
}
@@ -3193,7 +3041,7 @@ string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return new;
error:
- /* Error handling when it != NULL */
+ /* Error handling when new != NULL */
Py_XDECREF(it);
Py_DECREF(new);
return NULL;
@@ -3213,43 +3061,32 @@ str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
n = PyString_GET_SIZE(tmp);
pnew = type->tp_alloc(type, n);
if (pnew != NULL) {
- Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
+ Py_MEMCPY(PyString_AS_STRING(pnew),
+ PyString_AS_STRING(tmp), n+1);
((PyStringObject *)pnew)->ob_shash =
((PyStringObject *)tmp)->ob_shash;
- ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
}
Py_DECREF(tmp);
return pnew;
}
-static PyObject *
-string_mod(PyObject *v, PyObject *w)
-{
- if (!PyString_Check(v)) {
- Py_INCREF(Py_NotImplemented);
- return Py_NotImplemented;
- }
- return PyString_Format(v, w);
-}
-
-static PyNumberMethods string_as_number = {
- 0, /*nb_add*/
- 0, /*nb_subtract*/
- 0, /*nb_multiply*/
- string_mod, /*nb_remainder*/
-};
-
PyDoc_STRVAR(string_doc,
-"str(object) -> string\n\
+"bytes(iterable_of_ints) -> bytes.\n\
+bytes(string, encoding[, errors]) -> bytes\n\
+bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer.\n\
+bytes(memory_view) -> bytes.\n\
\n\
-Return a nice string representation of the object.\n\
-If the argument is a string, the return value is the same object.");
+Construct an immutable array of bytes from:\n\
+ - an iterable yielding integers in range(256)\n\
+ - a text string encoded using the specified encoding\n\
+ - a bytes or a buffer object\n\
+ - any object implementing the buffer API.");
static PyObject *str_iter(PyObject *seq);
PyTypeObject PyString_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
- "str8",
+ "bytes",
sizeof(PyStringObject),
sizeof(char),
string_dealloc, /* tp_dealloc */
@@ -3257,8 +3094,8 @@ PyTypeObject PyString_Type = {
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
- string_repr, /* tp_repr */
- &string_as_number, /* tp_as_number */
+ (reprfunc)string_repr, /* tp_repr */
+ 0, /* tp_as_number */
&string_as_sequence, /* tp_as_sequence */
&string_as_mapping, /* tp_as_mapping */
(hashfunc)string_hash, /* tp_hash */
@@ -3294,14 +3131,15 @@ void
PyString_Concat(register PyObject **pv, register PyObject *w)
{
register PyObject *v;
+ assert(pv != NULL);
if (*pv == NULL)
return;
- if (w == NULL || !PyString_Check(*pv)) {
+ if (w == NULL) {
Py_DECREF(*pv);
*pv = NULL;
return;
}
- v = string_concat((PyStringObject *) *pv, w);
+ v = string_concat(*pv, w);
Py_DECREF(*pv);
*pv = v;
}
@@ -3334,8 +3172,7 @@ _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
register PyObject *v;
register PyStringObject *sv;
v = *pv;
- if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0 ||
- PyString_CHECK_INTERNED(v)) {
+ if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0) {
*pv = 0;
Py_DECREF(v);
PyErr_BadInternalCall();
@@ -3359,85 +3196,6 @@ _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
return 0;
}
-/* Helpers for formatstring */
-
-Py_LOCAL_INLINE(PyObject *)
-getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
-{
- Py_ssize_t argidx = *p_argidx;
- if (argidx < arglen) {
- (*p_argidx)++;
- if (arglen < 0)
- return args;
- else
- return PyTuple_GetItem(args, argidx);
- }
- PyErr_SetString(PyExc_TypeError,
- "not enough arguments for format string");
- return NULL;
-}
-
-/* Format codes
- * F_LJUST '-'
- * F_SIGN '+'
- * F_BLANK ' '
- * F_ALT '#'
- * F_ZERO '0'
- */
-#define F_LJUST (1<<0)
-#define F_SIGN (1<<1)
-#define F_BLANK (1<<2)
-#define F_ALT (1<<3)
-#define F_ZERO (1<<4)
-
-Py_LOCAL_INLINE(int)
-formatfloat(char *buf, size_t buflen, int flags,
- int prec, int type, PyObject *v)
-{
- /* fmt = '%#.' + `prec` + `type`
- worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
- char fmt[20];
- double x;
- x = PyFloat_AsDouble(v);
- if (x == -1.0 && PyErr_Occurred()) {
- PyErr_Format(PyExc_TypeError, "float argument required, "
- "not %.200s", Py_Type(v)->tp_name);
- return -1;
- }
- if (prec < 0)
- prec = 6;
- if (type == 'f' && fabs(x)/1e25 >= 1e25)
- type = 'g';
- /* Worst case length calc to ensure no buffer overrun:
-
- 'g' formats:
- fmt = %#.<prec>g
- buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
- for any double rep.)
- len = 1 + prec + 1 + 2 + 5 = 9 + prec
-
- 'f' formats:
- buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
- len = 1 + 50 + 1 + prec = 52 + prec
-
- If prec=0 the effective precision is 1 (the leading digit is
- always given), therefore increase the length by one.
-
- */
- if (((type == 'g' || type == 'G') &&
- buflen <= (size_t)10 + (size_t)prec) ||
- (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
- PyErr_SetString(PyExc_OverflowError,
- "formatted float is too long (precision too large?)");
- return -1;
- }
- PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
- (flags&F_ALT) ? "#" : "",
- prec, type);
- PyOS_ascii_formatd(buf, buflen, fmt, x);
- return (int)strlen(buf);
-}
-
/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
* the F_ALT flag, for Python's long (unbounded) ints. It's not used for
* Python's regular ints.
@@ -3516,7 +3274,8 @@ _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
}
llen = PyString_Size(result);
if (llen > INT_MAX) {
- PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
+ PyErr_SetString(PyExc_ValueError,
+ "string too large in _PyString_FormatLong");
return NULL;
}
len = (int)llen;
@@ -3534,7 +3293,7 @@ _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
(type == 'o' || type == 'x' || type == 'X'))) {
assert(buf[sign] == '0');
assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
- buf[sign+1] == 'o');
+ buf[sign+1] == 'o');
numnondigits -= 2;
buf += 2;
len -= 2;
@@ -3580,623 +3339,6 @@ _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
return result;
}
-Py_LOCAL_INLINE(int)
-formatint(char *buf, size_t buflen, int flags,
- int prec, int type, PyObject *v)
-{
- /* fmt = '%#.' + `prec` + 'l' + `type`
- worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
- + 1 + 1 = 24 */
- char fmt[64]; /* plenty big enough! */
- char *sign;
- long x;
-
- x = PyInt_AsLong(v);
- if (x == -1 && PyErr_Occurred()) {
- PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
- Py_Type(v)->tp_name);
- return -1;
- }
- if (x < 0 && type == 'u') {
- type = 'd';
- }
- if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
- sign = "-";
- else
- sign = "";
- if (prec < 0)
- prec = 1;
-
- if ((flags & F_ALT) &&
- (type == 'x' || type == 'X' || type == 'o')) {
- /* When converting under %#o, %#x or %#X, there are a number
- * of issues that cause pain:
- * - for %#o, we want a different base marker than C
- * - when 0 is being converted, the C standard leaves off
- * the '0x' or '0X', which is inconsistent with other
- * %#x/%#X conversions and inconsistent with Python's
- * hex() function
- * - there are platforms that violate the standard and
- * convert 0 with the '0x' or '0X'
- * (Metrowerks, Compaq Tru64)
- * - there are platforms that give '0x' when converting
- * under %#X, but convert 0 in accordance with the
- * standard (OS/2 EMX)
- *
- * We can achieve the desired consistency by inserting our
- * own '0x' or '0X' prefix, and substituting %x/%X in place
- * of %#x/%#X.
- *
- * Note that this is the same approach as used in
- * formatint() in unicodeobject.c
- */
- PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
- sign, type, prec, type);
- }
- else {
- PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
- sign, (flags&F_ALT) ? "#" : "",
- prec, type);
- }
-
- /* buf = '+'/'-'/'' + '0o'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
- * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
- */
- if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
- PyErr_SetString(PyExc_OverflowError,
- "formatted integer is too long (precision too large?)");
- return -1;
- }
- if (sign[0])
- PyOS_snprintf(buf, buflen, fmt, -x);
- else
- PyOS_snprintf(buf, buflen, fmt, x);
- return (int)strlen(buf);
-}
-
-Py_LOCAL_INLINE(int)
-formatchar(char *buf, size_t buflen, PyObject *v)
-{
- /* presume that the buffer is at least 2 characters long */
- if (PyString_Check(v)) {
- if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
- return -1;
- }
- else {
- if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
- return -1;
- }
- buf[1] = '\0';
- return 1;
-}
-
-/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
-
- FORMATBUFLEN is the length of the buffer in which the floats, ints, &
- chars are formatted. XXX This is a magic number. Each formatting
- routine does bounds checking to ensure no overflow, but a better
- solution may be to malloc a buffer of appropriate size for each
- format. For now, the current solution is sufficient.
-*/
-#define FORMATBUFLEN (size_t)120
-
-PyObject *
-PyString_Format(PyObject *format, PyObject *args)
-{
- char *fmt, *res;
- Py_ssize_t arglen, argidx;
- Py_ssize_t reslen, rescnt, fmtcnt;
- int args_owned = 0;
- PyObject *result, *orig_args;
- PyObject *v, *w;
- PyObject *dict = NULL;
- if (format == NULL || !PyString_Check(format) || args == NULL) {
- PyErr_BadInternalCall();
- return NULL;
- }
- orig_args = args;
- fmt = PyString_AS_STRING(format);
- fmtcnt = PyString_GET_SIZE(format);
- reslen = rescnt = fmtcnt + 100;
- result = PyString_FromStringAndSize((char *)NULL, reslen);
- if (result == NULL)
- return NULL;
- res = PyString_AsString(result);
- if (PyTuple_Check(args)) {
- arglen = PyTuple_GET_SIZE(args);
- argidx = 0;
- }
- else {
- arglen = -1;
- argidx = -2;
- }
- if (Py_Type(args)->tp_as_mapping && !PyTuple_Check(args) &&
- !PyString_Check(args) && !PyUnicode_Check(args))
- dict = args;
- while (--fmtcnt >= 0) {
- if (*fmt != '%') {
- if (--rescnt < 0) {
- rescnt = fmtcnt + 100;
- reslen += rescnt;
- if (_PyString_Resize(&result, reslen) < 0)
- return NULL;
- res = PyString_AS_STRING(result)
- + reslen - rescnt;
- --rescnt;
- }
- *res++ = *fmt++;
- }
- else {
- /* Got a format specifier */
- int flags = 0;
- Py_ssize_t width = -1;
- int prec = -1;
- int c = '\0';
- int fill;
- PyObject *v = NULL;
- PyObject *temp = NULL;
- char *pbuf;
- int sign;
- Py_ssize_t len;
- char formatbuf[FORMATBUFLEN];
- /* For format{float,int,char}() */
- char *fmt_start = fmt;
- Py_ssize_t argidx_start = argidx;
-
- fmt++;
- if (*fmt == '(') {
- char *keystart;
- Py_ssize_t keylen;
- PyObject *key;
- int pcount = 1;
-
- if (dict == NULL) {
- PyErr_SetString(PyExc_TypeError,
- "format requires a mapping");
- goto error;
- }
- ++fmt;
- --fmtcnt;
- keystart = fmt;
- /* Skip over balanced parentheses */
- while (pcount > 0 && --fmtcnt >= 0) {
- if (*fmt == ')')
- --pcount;
- else if (*fmt == '(')
- ++pcount;
- fmt++;
- }
- keylen = fmt - keystart - 1;
- if (fmtcnt < 0 || pcount > 0) {
- PyErr_SetString(PyExc_ValueError,
- "incomplete format key");
- goto error;
- }
- key = PyString_FromStringAndSize(keystart,
- keylen);
- if (key == NULL)
- goto error;
- if (args_owned) {
- Py_DECREF(args);
- args_owned = 0;
- }
- args = PyObject_GetItem(dict, key);
- Py_DECREF(key);
- if (args == NULL) {
- goto error;
- }
- args_owned = 1;
- arglen = -1;
- argidx = -2;
- }
- while (--fmtcnt >= 0) {
- switch (c = *fmt++) {
- case '-': flags |= F_LJUST; continue;
- case '+': flags |= F_SIGN; continue;
- case ' ': flags |= F_BLANK; continue;
- case '#': flags |= F_ALT; continue;
- case '0': flags |= F_ZERO; continue;
- }
- break;
- }
- if (c == '*') {
- v = getnextarg(args, arglen, &argidx);
- if (v == NULL)
- goto error;
- if (!PyInt_Check(v)) {
- PyErr_SetString(PyExc_TypeError,
- "* wants int");
- goto error;
- }
- width = PyInt_AsLong(v);
- if (width == -1 && PyErr_Occurred())
- goto error;
- if (width < 0) {
- flags |= F_LJUST;
- width = -width;
- }
- if (--fmtcnt >= 0)
- c = *fmt++;
- }
- else if (c >= 0 && ISDIGIT(c)) {
- width = c - '0';
- while (--fmtcnt >= 0) {
- c = Py_CHARMASK(*fmt++);
- if (!ISDIGIT(c))
- break;
- if ((width*10) / 10 != width) {
- PyErr_SetString(
- PyExc_ValueError,
- "width too big");
- goto error;
- }
- width = width*10 + (c - '0');
- }
- }
- if (c == '.') {
- prec = 0;
- if (--fmtcnt >= 0)
- c = *fmt++;
- if (c == '*') {
- v = getnextarg(args, arglen, &argidx);
- if (v == NULL)
- goto error;
- if (!PyInt_Check(v)) {
- PyErr_SetString(
- PyExc_TypeError,
- "* wants int");
- goto error;
- }
- prec = PyInt_AsLong(v);
- if (prec == -1 && PyErr_Occurred())
- goto error;
- if (prec < 0)
- prec = 0;
- if (--fmtcnt >= 0)
- c = *fmt++;
- }
- else if (c >= 0 && ISDIGIT(c)) {
- prec = c - '0';
- while (--fmtcnt >= 0) {
- c = Py_CHARMASK(*fmt++);
- if (!ISDIGIT(c))
- break;
- if ((prec*10) / 10 != prec) {
- PyErr_SetString(
- PyExc_ValueError,
- "prec too big");
- goto error;
- }
- prec = prec*10 + (c - '0');
- }
- }
- } /* prec */
- if (fmtcnt >= 0) {
- if (c == 'h' || c == 'l' || c == 'L') {
- if (--fmtcnt >= 0)
- c = *fmt++;
- }
- }
- if (fmtcnt < 0) {
- PyErr_SetString(PyExc_ValueError,
- "incomplete format");
- goto error;
- }
- if (c != '%') {
- v = getnextarg(args, arglen, &argidx);
- if (v == NULL)
- goto error;
- }
- sign = 0;
- fill = ' ';
- switch (c) {
- case '%':
- pbuf = "%";
- len = 1;
- break;
- case 's':
- if (PyUnicode_Check(v)) {
- fmt = fmt_start;
- argidx = argidx_start;
- goto unicode;
- }
- temp = _PyObject_Str(v);
- if (temp != NULL && PyUnicode_Check(temp)) {
- Py_DECREF(temp);
- fmt = fmt_start;
- argidx = argidx_start;
- goto unicode;
- }
- /* Fall through */
- case 'r':
- if (c == 'r')
- temp = PyObject_ReprStr8(v);
- if (temp == NULL)
- goto error;
- if (!PyString_Check(temp)) {
- PyErr_SetString(PyExc_TypeError,
- "%s argument has non-string str()/repr()");
- Py_DECREF(temp);
- goto error;
- }
- pbuf = PyString_AS_STRING(temp);
- len = PyString_GET_SIZE(temp);
- if (prec >= 0 && len > prec)
- len = prec;
- break;
- case 'i':
- case 'd':
- case 'u':
- case 'o':
- case 'x':
- case 'X':
- if (c == 'i')
- c = 'd';
- if (PyLong_Check(v)) {
- int ilen;
- temp = _PyString_FormatLong(v, flags,
- prec, c, &pbuf, &ilen);
- len = ilen;
- if (!temp)
- goto error;
- sign = 1;
- }
- else {
- pbuf = formatbuf;
- len = formatint(pbuf,
- sizeof(formatbuf),
- flags, prec, c, v);
- if (len < 0)
- goto error;
- sign = 1;
- }
- if (flags & F_ZERO)
- fill = '0';
- break;
- case 'e':
- case 'E':
- case 'f':
- case 'F':
- case 'g':
- case 'G':
- if (c == 'F')
- c = 'f';
- pbuf = formatbuf;
- len = formatfloat(pbuf, sizeof(formatbuf),
- flags, prec, c, v);
- if (len < 0)
- goto error;
- sign = 1;
- if (flags & F_ZERO)
- fill = '0';
- break;
- case 'c':
- if (PyUnicode_Check(v)) {
- fmt = fmt_start;
- argidx = argidx_start;
- goto unicode;
- }
- pbuf = formatbuf;
- len = formatchar(pbuf, sizeof(formatbuf), v);
- if (len < 0)
- goto error;
- break;
- default:
- PyErr_Format(PyExc_ValueError,
- "unsupported format character '%c' (0x%x) "
- "at index %zd",
- c, c,
- (Py_ssize_t)(fmt - 1 -
- PyString_AsString(format)));
- goto error;
- }
- if (sign) {
- if (*pbuf == '-' || *pbuf == '+') {
- sign = *pbuf++;
- len--;
- }
- else if (flags & F_SIGN)
- sign = '+';
- else if (flags & F_BLANK)
- sign = ' ';
- else
- sign = 0;
- }
- if (width < len)
- width = len;
- if (rescnt - (sign != 0) < width) {
- reslen -= rescnt;
- rescnt = width + fmtcnt + 100;
- reslen += rescnt;
- if (reslen < 0) {
- Py_DECREF(result);
- Py_XDECREF(temp);
- return PyErr_NoMemory();
- }
- if (_PyString_Resize(&result, reslen) < 0) {
- Py_XDECREF(temp);
- return NULL;
- }
- res = PyString_AS_STRING(result)
- + reslen - rescnt;
- }
- if (sign) {
- if (fill != ' ')
- *res++ = sign;
- rescnt--;
- if (width > len)
- width--;
- }
- if ((flags & F_ALT) &&
- (c == 'x' || c == 'X' || c == 'o')) {
- assert(pbuf[0] == '0');
- assert(pbuf[1] == c);
- if (fill != ' ') {
- *res++ = *pbuf++;
- *res++ = *pbuf++;
- }
- rescnt -= 2;
- width -= 2;
- if (width < 0)
- width = 0;
- len -= 2;
- }
- if (width > len && !(flags & F_LJUST)) {
- do {
- --rescnt;
- *res++ = fill;
- } while (--width > len);
- }
- if (fill == ' ') {
- if (sign)
- *res++ = sign;
- if ((flags & F_ALT) &&
- (c == 'x' || c == 'X' || c == 'o')) {
- assert(pbuf[0] == '0');
- assert(pbuf[1] == c);
- *res++ = *pbuf++;
- *res++ = *pbuf++;
- }
- }
- Py_MEMCPY(res, pbuf, len);
- res += len;
- rescnt -= len;
- while (--width >= len) {
- --rescnt;
- *res++ = ' ';
- }
- if (dict && (argidx < arglen) && c != '%') {
- PyErr_SetString(PyExc_TypeError,
- "not all arguments converted during string formatting");
- Py_XDECREF(temp);
- goto error;
- }
- Py_XDECREF(temp);
- } /* '%' */
- } /* until end */
- if (argidx < arglen && !dict) {
- PyErr_SetString(PyExc_TypeError,
- "not all arguments converted during string formatting");
- goto error;
- }
- if (args_owned) {
- Py_DECREF(args);
- }
- _PyString_Resize(&result, reslen - rescnt);
- return result;
-
- unicode:
- if (args_owned) {
- Py_DECREF(args);
- args_owned = 0;
- }
- /* Fiddle args right (remove the first argidx arguments) */
- if (PyTuple_Check(orig_args) && argidx > 0) {
- PyObject *v;
- Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
- v = PyTuple_New(n);
- if (v == NULL)
- goto error;
- while (--n >= 0) {
- PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
- Py_INCREF(w);
- PyTuple_SET_ITEM(v, n, w);
- }
- args = v;
- } else {
- Py_INCREF(orig_args);
- args = orig_args;
- }
- args_owned = 1;
- /* Take what we have of the result and let the Unicode formatting
- function format the rest of the input. */
- rescnt = res - PyString_AS_STRING(result);
- if (_PyString_Resize(&result, rescnt))
- goto error;
- fmtcnt = PyString_GET_SIZE(format) - \
- (fmt - PyString_AS_STRING(format));
- format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
- if (format == NULL)
- goto error;
- v = PyUnicode_Format(format, args);
- Py_DECREF(format);
- if (v == NULL)
- goto error;
- /* Paste what we have (result) to what the Unicode formatting
- function returned (v) and return the result (or error) */
- w = PyUnicode_Concat(result, v);
- Py_DECREF(result);
- Py_DECREF(v);
- Py_DECREF(args);
- return w;
-
- error:
- Py_DECREF(result);
- if (args_owned) {
- Py_DECREF(args);
- }
- return NULL;
-}
-
-void
-PyString_InternInPlace(PyObject **p)
-{
- register PyStringObject *s = (PyStringObject *)(*p);
- PyObject *t;
- if (s == NULL || !PyString_Check(s))
- Py_FatalError("PyString_InternInPlace: strings only please!");
- /* If it's a string subclass, we don't really know what putting
- it in the interned dict might do. */
- if (!PyString_CheckExact(s))
- return;
- if (PyString_CHECK_INTERNED(s))
- return;
- if (interned == NULL) {
- interned = PyDict_New();
- if (interned == NULL) {
- PyErr_Clear(); /* Don't leave an exception */
- return;
- }
- }
- t = PyDict_GetItem(interned, (PyObject *)s);
- if (t) {
- Py_INCREF(t);
- Py_DECREF(*p);
- *p = t;
- return;
- }
-
- if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
- PyErr_Clear();
- return;
- }
- /* The two references in interned are not counted by refcnt.
- The string deallocator will take care of this */
- Py_Refcnt(s) -= 2;
- PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
-}
-
-void
-PyString_InternImmortal(PyObject **p)
-{
- PyString_InternInPlace(p);
- if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
- PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
- Py_INCREF(*p);
- }
-}
-
-
-PyObject *
-PyString_InternFromString(const char *cp)
-{
- PyObject *s = PyString_FromString(cp);
- if (s == NULL)
- return NULL;
- PyString_InternInPlace(&s);
- return s;
-}
-
void
PyString_Fini(void)
{
@@ -4209,58 +3351,6 @@ PyString_Fini(void)
nullstring = NULL;
}
-void _Py_ReleaseInternedStrings(void)
-{
- PyObject *keys;
- PyStringObject *s;
- Py_ssize_t i, n;
- Py_ssize_t immortal_size = 0, mortal_size = 0;
-
- if (interned == NULL || !PyDict_Check(interned))
- return;
- keys = PyDict_Keys(interned);
- if (keys == NULL || !PyList_Check(keys)) {
- PyErr_Clear();
- return;
- }
-
- /* Since _Py_ReleaseInternedStrings() is intended to help a leak
- detector, interned strings are not forcibly deallocated; rather, we
- give them their stolen references back, and then clear and DECREF
- the interned dict. */
-
- n = PyList_GET_SIZE(keys);
- fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
- n);
- for (i = 0; i < n; i++) {
- s = (PyStringObject *) PyList_GET_ITEM(keys, i);
- switch (s->ob_sstate) {
- case SSTATE_NOT_INTERNED:
- /* XXX Shouldn't happen */
- break;
- case SSTATE_INTERNED_IMMORTAL:
- Py_Refcnt(s) += 1;
- immortal_size += Py_Size(s);
- break;
- case SSTATE_INTERNED_MORTAL:
- Py_Refcnt(s) += 2;
- mortal_size += Py_Size(s);
- break;
- default:
- Py_FatalError("Inconsistent interned string state.");
- }
- s->ob_sstate = SSTATE_NOT_INTERNED;
- }
- fprintf(stderr, "total size of all interned strings: "
- "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
- "mortal/immortal\n", mortal_size, immortal_size);
- Py_DECREF(keys);
- PyDict_Clear(interned);
- Py_DECREF(interned);
- interned = NULL;
-}
-
-
/*********************** Str Iterator ****************************/
typedef struct {