summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorTim Peters <tim.peters@gmail.com>2001-01-19 03:03:47 (GMT)
committerTim Peters <tim.peters@gmail.com>2001-01-19 03:03:47 (GMT)
commit19fe14e76ac3619e633b10c0e31effc2dad3c543 (patch)
tree9a6355f1a18463a771ea59d8882ac20cbf30d296 /Objects
parente3d6e41d81427a8e2bdda9bf3994c2c0c00c19a6 (diff)
downloadcpython-19fe14e76ac3619e633b10c0e31effc2dad3c543.zip
cpython-19fe14e76ac3619e633b10c0e31effc2dad3c543.tar.gz
cpython-19fe14e76ac3619e633b10c0e31effc2dad3c543.tar.bz2
Derivative of patch #102549, "simpler, faster(!) implementation of string.join".
Also fixes two long-standing bugs (present in 2.0): 1. .join() didn't check that the result size fit in an int. 2. string.join(s) when len(s)==1 returned s[0] regardless of s[0]'s type; e.g., "".join([3]) returned 3 (overly optimistic optimization). I resisted a keen temptation to make .join() apply str() automagically.
Diffstat (limited to 'Objects')
-rw-r--r--Objects/stringobject.c90
1 files changed, 52 insertions, 38 deletions
diff --git a/Objects/stringobject.c b/Objects/stringobject.c
index eed4687..df3ab49 100644
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -794,46 +794,55 @@ static PyObject *
string_join(PyStringObject *self, PyObject *args)
{
char *sep = PyString_AS_STRING(self);
- int seplen = PyString_GET_SIZE(self);
+ const int seplen = PyString_GET_SIZE(self);
PyObject *res = NULL;
- int reslen = 0;
char *p;
int seqlen = 0;
- int sz = 100;
- int i, slen, sz_incr;
+ size_t sz = 0;
+ int i;
PyObject *orig, *seq, *item;
if (!PyArg_ParseTuple(args, "O:join", &orig))
return NULL;
- if (!(seq = PySequence_Fast(orig, ""))) {
+ seq = PySequence_Fast(orig, "");
+ if (seq == NULL) {
if (PyErr_ExceptionMatches(PyExc_TypeError))
PyErr_Format(PyExc_TypeError,
"sequence expected, %.80s found",
orig->ob_type->tp_name);
return NULL;
}
- /* From here on out, errors go through finally: for proper
- * reference count manipulations.
- */
+
seqlen = PySequence_Size(seq);
+ if (seqlen == 0) {
+ Py_DECREF(seq);
+ return PyString_FromString("");
+ }
if (seqlen == 1) {
item = PySequence_Fast_GET_ITEM(seq, 0);
+ if (!PyString_Check(item) && !PyUnicode_Check(item)) {
+ PyErr_Format(PyExc_TypeError,
+ "sequence item 0: expected string,"
+ " %.80s found",
+ item->ob_type->tp_name);
+ Py_DECREF(seq);
+ return NULL;
+ }
Py_INCREF(item);
Py_DECREF(seq);
return item;
}
- if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
- goto finally;
-
- p = PyString_AS_STRING(res);
-
+ /* There are at least two things to join. Do a pre-pass to figure out
+ * the total amount of space we'll need (sz), see whether any argument
+ * is absurd, and defer to the Unicode join if appropriate.
+ */
for (i = 0; i < seqlen; i++) {
+ const size_t old_sz = sz;
item = PySequence_Fast_GET_ITEM(seq, i);
if (!PyString_Check(item)){
if (PyUnicode_Check(item)) {
- Py_DECREF(res);
Py_DECREF(seq);
return PyUnicode_Join((PyObject *)self, orig);
}
@@ -841,40 +850,45 @@ string_join(PyStringObject *self, PyObject *args)
"sequence item %i: expected string,"
" %.80s found",
i, item->ob_type->tp_name);
- goto finally;
+ Py_DECREF(seq);
+ return NULL;
}
- slen = PyString_GET_SIZE(item);
- while (reslen + slen + seplen >= sz) {
- /* at least double the size of the string */
- sz_incr = slen + seplen > sz ? slen + seplen : sz;
- if (_PyString_Resize(&res, sz + sz_incr)) {
- goto finally;
- }
- sz += sz_incr;
- p = PyString_AS_STRING(res) + reslen;
+ sz += PyString_GET_SIZE(item);
+ if (i != 0)
+ sz += seplen;
+ if (sz < old_sz || sz > INT_MAX) {
+ PyErr_SetString(PyExc_OverflowError,
+ "join() is too long for a Python string");
+ Py_DECREF(seq);
+ return NULL;
}
- if (i > 0) {
+ }
+
+ /* Allocate result space. */
+ res = PyString_FromStringAndSize((char*)NULL, (int)sz);
+ if (res == NULL) {
+ Py_DECREF(seq);
+ return NULL;
+ }
+
+ /* Catenate everything. */
+ p = PyString_AS_STRING(res);
+ for (i = 0; i < seqlen; ++i) {
+ size_t n;
+ item = PySequence_Fast_GET_ITEM(seq, i);
+ n = PyString_GET_SIZE(item);
+ memcpy(p, PyString_AS_STRING(item), n);
+ p += n;
+ if (i < seqlen - 1) {
memcpy(p, sep, seplen);
p += seplen;
- reslen += seplen;
}
- memcpy(p, PyString_AS_STRING(item), slen);
- p += slen;
- reslen += slen;
}
- if (_PyString_Resize(&res, reslen))
- goto finally;
- Py_DECREF(seq);
- return res;
- finally:
Py_DECREF(seq);
- Py_XDECREF(res);
- return NULL;
+ return res;
}
-
-
static long
string_find_internal(PyStringObject *self, PyObject *args, int dir)
{