diff options
author | Raymond Hettinger <python@rcn.com> | 2004-08-06 18:43:09 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2004-08-06 18:43:09 (GMT) |
commit | 52a21b8e65e2a231595cfec639701266202438a2 (patch) | |
tree | 9c8c9ba3ea81643f19e5cd30281264bd993b6ddb | |
parent | d09d9664e6a0cde5e0c7a0234ef837e11df757f1 (diff) | |
download | cpython-52a21b8e65e2a231595cfec639701266202438a2.zip cpython-52a21b8e65e2a231595cfec639701266202438a2.tar.gz cpython-52a21b8e65e2a231595cfec639701266202438a2.tar.bz2 |
SF patch #980695: efficient string concatenation
(Original patch by Armin Rigo).
-rw-r--r-- | Doc/lib/libstdtypes.tex | 12 | ||||
-rw-r--r-- | Misc/NEWS | 5 | ||||
-rw-r--r-- | Python/ceval.c | 93 |
3 files changed, 107 insertions, 3 deletions
diff --git a/Doc/lib/libstdtypes.tex b/Doc/lib/libstdtypes.tex index c33360d..938dc6e 100644 --- a/Doc/lib/libstdtypes.tex +++ b/Doc/lib/libstdtypes.tex @@ -455,7 +455,7 @@ and \var{j} are integers: \lineiii{\var{x} not in \var{s}}{\code{0} if an item of \var{s} is equal to \var{x}, else \code{1}}{(1)} \hline - \lineiii{\var{s} + \var{t}}{the concatenation of \var{s} and \var{t}}{} + \lineiii{\var{s} + \var{t}}{the concatenation of \var{s} and \var{t}}{(6)} \lineiii{\var{s} * \var{n}\textrm{,} \var{n} * \var{s}}{\var{n} shallow copies of \var{s} concatenated}{(2)} \hline \lineiii{\var{s}[\var{i}]}{\var{i}'th item of \var{s}, origin 0}{(3)} @@ -536,6 +536,16 @@ In Python 2.3 and beyond, \var{x} may be a string of any length. (which end depends on the sign of \var{k}). Note, \var{k} cannot be zero. +\item[(6)] If \var{s} and \var{t} are both strings, some Python +implementations such as CPython can usally perform an inplace optimization +for assignments of the form \code{\var{s}=\var{s}+\var{t}} or +\code{\var{s}+=\var{t}}. When applicable, this optimization makes +quadratic run-time much less likely. This optimization is both version +and implementation dependent. For performance sensitive code, it is +preferrable to use the \method{str.join()} method which assures consistent +linear concatenation performance across versions and implementations. +\versionchanged[Formerly, string concatenation never occurred inplace]{2.4} + \end{description} @@ -12,6 +12,11 @@ What's New in Python 2.4 alpha 2? Core and builtins ----------------- +- Patch #980695: Implements efficient string concatenation for statements + of the form s=s+t and s+=t. This will vary across implementations. + Accordingly, the str.join() method is strongly preferred for performance + sensitive code. + - PEP-0318, Function Decorators have been added to the language. These are implemented using the Java-style @decorator syntax, like so: @staticmethod diff --git a/Python/ceval.c b/Python/ceval.c index 6c457e1..4dd31ab 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -85,6 +85,8 @@ static int exec_statement(PyFrameObject *, static void set_exc_info(PyThreadState *, PyObject *, PyObject *, PyObject *); static void reset_exc_info(PyThreadState *); static void format_exc_check_arg(PyObject *, char *, PyObject *); +static PyObject *string_concatenate(PyObject *, PyObject *, + PyFrameObject *, unsigned char *); #define NAME_ERROR_MSG \ "name '%.200s' is not defined" @@ -550,6 +552,7 @@ PyEval_EvalFrame(PyFrameObject *f) #define INSTR_OFFSET() (next_instr - first_instr) #define NEXTOP() (*next_instr++) #define NEXTARG() (next_instr += 2, (next_instr[-1]<<8) + next_instr[-2]) +#define PEEKARG() ((next_instr[2]<<8) + next_instr[1]) #define JUMPTO(x) (next_instr = first_instr + (x)) #define JUMPBY(x) (next_instr += (x)) @@ -580,8 +583,7 @@ PyEval_EvalFrame(PyFrameObject *f) #endif #define PREDICTED(op) PRED_##op: next_instr++ -#define PREDICTED_WITH_ARG(op) PRED_##op: oparg = (next_instr[2]<<8) + \ - next_instr[1]; next_instr += 3 +#define PREDICTED_WITH_ARG(op) PRED_##op: oparg = PEEKARG(); next_instr += 3 /* Stack manipulation macros */ @@ -1066,11 +1068,18 @@ PyEval_EvalFrame(PyFrameObject *f) goto slow_add; x = PyInt_FromLong(i); } + else if (PyString_CheckExact(v) && + PyString_CheckExact(w)) { + x = string_concatenate(v, w, f, next_instr); + /* string_concatenate consumed the ref to v */ + goto skip_decref_vx; + } else { slow_add: x = PyNumber_Add(v, w); } Py_DECREF(v); + skip_decref_vx: Py_DECREF(w); SET_TOP(x); if (x != NULL) continue; @@ -1261,11 +1270,18 @@ PyEval_EvalFrame(PyFrameObject *f) goto slow_iadd; x = PyInt_FromLong(i); } + else if (PyString_CheckExact(v) && + PyString_CheckExact(w)) { + x = string_concatenate(v, w, f, next_instr); + /* string_concatenate consumed the ref to v */ + goto skip_decref_v; + } else { slow_iadd: x = PyNumber_InPlaceAdd(v, w); } Py_DECREF(v); + skip_decref_v: Py_DECREF(w); SET_TOP(x); if (x != NULL) continue; @@ -4191,6 +4207,79 @@ format_exc_check_arg(PyObject *exc, char *format_str, PyObject *obj) PyErr_Format(exc, format_str, obj_str); } +static PyObject * +string_concatenate(PyObject *v, PyObject *w, + PyFrameObject *f, unsigned char *next_instr) +{ + /* This function implements 'variable += expr' when both arguments + are strings. */ + + if (v->ob_refcnt == 2) { + /* In the common case, there are 2 references to the value + * stored in 'variable' when the += is performed: one on the + * value stack (in 'v') and one still stored in the 'variable'. + * We try to delete the variable now to reduce the refcnt to 1. + */ + switch (*next_instr) { + case STORE_FAST: + { + int oparg = PEEKARG(); + PyObject **fastlocals = f->f_localsplus; + if (GETLOCAL(oparg) == v) + SETLOCAL(oparg, NULL); + break; + } + case STORE_DEREF: + { + PyObject **freevars = f->f_localsplus + f->f_nlocals; + PyObject *c = freevars[PEEKARG()]; + if (PyCell_GET(c) == v) + PyCell_Set(c, NULL); + break; + } + case STORE_NAME: + { + PyObject *names = f->f_code->co_names; + PyObject *name = GETITEM(names, PEEKARG()); + PyObject *locals = f->f_locals; + if (PyDict_CheckExact(locals) && + PyDict_GetItem(locals, name) == v) { + if (PyDict_DelItem(locals, name) != 0) { + PyErr_Clear(); + } + } + break; + } + } + } + + if (v->ob_refcnt == 1) { + /* Now we own the last reference to 'v', so we can resize it + * in-place. + */ + int v_len = PyString_GET_SIZE(v); + int w_len = PyString_GET_SIZE(w); + if (_PyString_Resize(&v, v_len + w_len) != 0) { + /* XXX if _PyString_Resize() fails, 'v' has been + * deallocated so it cannot be put back into 'variable'. + * The MemoryError is raised when there is no value in + * 'variable', which might (very remotely) be a cause + * of incompatibilities. + */ + return NULL; + } + /* copy 'w' into the newly allocated area of 'v' */ + memcpy(PyString_AS_STRING(v) + v_len, + PyString_AS_STRING(w), w_len); + return v; + } + else { + /* When in-place resizing is not an option. */ + PyString_Concat(&v, w); + return v; + } +} + #ifdef DYNAMIC_EXECUTION_PROFILE static PyObject * |