diff options
author | Guido van Rossum <guido@python.org> | 2002-08-19 21:43:18 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2002-08-19 21:43:18 (GMT) |
commit | 45ec02aed14685c353e55841b5acbc0dadee76f8 (patch) | |
tree | 0c146fdf0d488f279f0baf64b0f1fa0484274a73 /Objects | |
parent | d8dbf847b6a819ef73d7bf0c05eafbdb9aee9956 (diff) | |
download | cpython-45ec02aed14685c353e55841b5acbc0dadee76f8.zip cpython-45ec02aed14685c353e55841b5acbc0dadee76f8.tar.gz cpython-45ec02aed14685c353e55841b5acbc0dadee76f8.tar.bz2 |
SF patch 576101, by Oren Tirosh: alternative implementation of
interning. I modified Oren's patch significantly, but the basic idea
and most of the implementation is unchanged. Interned strings created
with PyString_InternInPlace() are now mortal, and you must keep a
reference to the resulting string around; use the new function
PyString_InternImmortal() to create immortal interned strings.
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/classobject.c | 44 | ||||
-rw-r--r-- | Objects/dictobject.c | 12 | ||||
-rw-r--r-- | Objects/stringobject.c | 179 |
3 files changed, 135 insertions, 100 deletions
diff --git a/Objects/classobject.c b/Objects/classobject.c index 32e8d2a..cbbb3c4 100644 --- a/Objects/classobject.c +++ b/Objects/classobject.c @@ -2300,37 +2300,38 @@ instancemethod_traverse(PyMethodObject *im, visitproc visit, void *arg) return 0; } -static char * -getclassname(PyObject *class) +static void +getclassname(PyObject *class, char *buf, int bufsize) { PyObject *name; + assert(bufsize > 1); + strcpy(buf, "?"); /* Default outcome */ if (class == NULL) - name = NULL; - else - name = PyObject_GetAttrString(class, "__name__"); + return; + name = PyObject_GetAttrString(class, "__name__"); if (name == NULL) { /* This function cannot return an exception */ PyErr_Clear(); - return "?"; + return; } - if (!PyString_Check(name)) { - Py_DECREF(name); - return "?"; + if (PyString_Check(name)) { + strncpy(buf, PyString_AS_STRING(name), bufsize); + buf[bufsize-1] = '\0'; } - PyString_InternInPlace(&name); Py_DECREF(name); - return PyString_AS_STRING(name); } -static char * -getinstclassname(PyObject *inst) +static void +getinstclassname(PyObject *inst, char *buf, int bufsize) { PyObject *class; - char *name; - if (inst == NULL) - return "nothing"; + if (inst == NULL) { + assert(bufsize > strlen("nothing")); + strcpy(buf, "nothing"); + return; + } class = PyObject_GetAttrString(inst, "__class__"); if (class == NULL) { @@ -2339,9 +2340,8 @@ getinstclassname(PyObject *inst) class = (PyObject *)(inst->ob_type); Py_INCREF(class); } - name = getclassname(class); + getclassname(class, buf, bufsize); Py_XDECREF(class); - return name; } static PyObject * @@ -2366,14 +2366,18 @@ instancemethod_call(PyObject *func, PyObject *arg, PyObject *kw) return NULL; } if (!ok) { + char clsbuf[256]; + char instbuf[256]; + getclassname(class, clsbuf, sizeof(clsbuf)); + getinstclassname(self, instbuf, sizeof(instbuf)); PyErr_Format(PyExc_TypeError, "unbound method %s%s must be called with " "%s instance as first argument " "(got %s%s instead)", PyEval_GetFuncName(func), PyEval_GetFuncDesc(func), - getclassname(class), - getinstclassname(self), + clsbuf, + instbuf, self == NULL ? "" : " instance"); return NULL; } diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 86c74ba..b7b499e 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -511,15 +511,9 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value) } mp = (dictobject *)op; if (PyString_CheckExact(key)) { - if (((PyStringObject *)key)->ob_sinterned != NULL) { - key = ((PyStringObject *)key)->ob_sinterned; - hash = ((PyStringObject *)key)->ob_shash; - } - else { - hash = ((PyStringObject *)key)->ob_shash; - if (hash == -1) - hash = PyObject_Hash(key); - } + hash = ((PyStringObject *)key)->ob_shash; + if (hash == -1) + hash = PyObject_Hash(key); } else { hash = PyObject_Hash(key); diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 7d3881e..7a48627 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -15,6 +15,17 @@ int null_strings, one_strings; static PyStringObject *characters[UCHAR_MAX + 1]; static PyStringObject *nullstring; +/* This dictionary holds all interned strings. Note that references to + strings in this dictionary are *not* counted in the string's ob_refcnt. + When the interned string reaches a refcnt of 0 the string deallocation + function will delete the reference from this dictionary. + + Another way to look at this is that to say that the actual reference + count of a string is: s->ob_refcnt + (s->ob_sstate?2:0) +*/ +static PyObject *interned; + + /* For both PyString_FromString() and PyString_FromStringAndSize(), the parameter `size' denotes number of characters to allocate, not counting any @@ -69,7 +80,7 @@ PyString_FromStringAndSize(const char *str, int size) return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; - op->ob_sinterned = NULL; + op->ob_sstate = SSTATE_NOT_INTERNED; if (str != NULL) memcpy(op->ob_sval, str, size); op->ob_sval[size] = '\0'; @@ -125,7 +136,7 @@ PyString_FromString(const char *str) return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; - op->ob_sinterned = NULL; + op->ob_sstate = SSTATE_NOT_INTERNED; memcpy(op->ob_sval, str, size+1); /* share short strings */ if (size == 0) { @@ -486,6 +497,24 @@ PyObject *PyString_AsEncodedString(PyObject *str, static void string_dealloc(PyObject *op) { + switch (PyString_CHECK_INTERNED(op)) { + case SSTATE_NOT_INTERNED: + break; + + case SSTATE_INTERNED_MORTAL: + /* revive dead object temporarily for DelItem */ + op->ob_refcnt = 3; + if (PyDict_DelItem(interned, op) != 0) + Py_FatalError( + "deletion of interned string failed"); + break; + + case SSTATE_INTERNED_IMMORTAL: + Py_FatalError("Immortal interned string died."); + + default: + Py_FatalError("Inconsistent interned string state."); + } op->ob_type->tp_free(op); } @@ -885,7 +914,7 @@ string_concat(register PyStringObject *a, register PyObject *bb) return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; - op->ob_sinterned = NULL; + op->ob_sstate = SSTATE_NOT_INTERNED; memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size); memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size); op->ob_sval[size] = '\0'; @@ -928,7 +957,7 @@ string_repeat(register PyStringObject *a, register int n) return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; - op->ob_sinterned = NULL; + op->ob_sstate = SSTATE_NOT_INTERNED; for (i = 0; i < size; i += a->ob_size) memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size); op->ob_sval[size] = '\0'; @@ -1093,9 +1122,6 @@ string_hash(PyStringObject *a) if (a->ob_shash != -1) return a->ob_shash; - if (a->ob_sinterned != NULL) - return (a->ob_shash = - ((PyStringObject *)(a->ob_sinterned))->ob_shash); len = a->ob_size; p = (unsigned char *) a->ob_sval; x = *p << 7; @@ -3067,8 +3093,7 @@ str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1); ((PyStringObject *)pnew)->ob_shash = ((PyStringObject *)tmp)->ob_shash; - ((PyStringObject *)pnew)->ob_sinterned = - ((PyStringObject *)tmp)->ob_sinterned; + ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED; } Py_DECREF(tmp); return pnew; @@ -3983,22 +4008,6 @@ PyString_Format(PyObject *format, PyObject *args) return NULL; } - - -/* This dictionary will leak at PyString_Fini() time. That's acceptable - * because PyString_Fini() specifically frees interned strings that are - * only referenced by this dictionary. The CVS log entry for revision 2.45 - * says: - * - * Change the Fini function to only remove otherwise unreferenced - * strings from the interned table. There are references in - * hard-to-find static variables all over the interpreter, and it's not - * worth trying to get rid of all those; but "uninterning" isn't fair - * either and may cause subtle failures later -- so we have to keep them - * in the interned table. - */ -static PyObject *interned; - void PyString_InternInPlace(PyObject **p) { @@ -4006,49 +4015,57 @@ PyString_InternInPlace(PyObject **p) PyObject *t; if (s == NULL || !PyString_Check(s)) Py_FatalError("PyString_InternInPlace: strings only please!"); - if ((t = s->ob_sinterned) != NULL) { - if (t == (PyObject *)s) - return; - Py_INCREF(t); - *p = t; - Py_DECREF(s); + if (PyString_CHECK_INTERNED(s)) return; - } if (interned == NULL) { interned = PyDict_New(); - if (interned == NULL) + if (interned == NULL) { + PyErr_Clear(); /* Don't leave an exception */ return; + } } if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) { Py_INCREF(t); - *p = s->ob_sinterned = t; - Py_DECREF(s); + Py_DECREF(*p); + *p = t; return; } - /* Ensure that only true string objects appear in the intern dict, - and as the value of ob_sinterned. */ - if (PyString_CheckExact(s)) { - t = (PyObject *)s; - if (PyDict_SetItem(interned, t, t) == 0) { - s->ob_sinterned = t; - return; - } - } - else { + /* Ensure that only true string objects appear in the intern dict */ + if (!PyString_CheckExact(s)) { t = PyString_FromStringAndSize(PyString_AS_STRING(s), PyString_GET_SIZE(s)); - if (t != NULL) { - if (PyDict_SetItem(interned, t, t) == 0) { - *p = s->ob_sinterned = t; - Py_DECREF(s); - return; - } - Py_DECREF(t); + if (t == NULL) { + PyErr_Clear(); + return; } + } else { + t = (PyObject*) s; + Py_INCREF(t); + } + + if (PyDict_SetItem(interned, t, t) == 0) { + /* The two references in interned are not counted by + refcnt. The string deallocator will take care of this */ + ((PyObject *)t)->ob_refcnt-=2; + PyString_CHECK_INTERNED(t) = SSTATE_INTERNED_MORTAL; + Py_DECREF(*p); + *p = t; + return; } + Py_DECREF(t); PyErr_Clear(); } +void +PyString_InternImmortal(PyObject **p) +{ + PyString_InternInPlace(p); + if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) { + PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL; + Py_INCREF(*p); + } +} + PyObject * PyString_InternFromString(const char *cp) @@ -4070,28 +4087,48 @@ PyString_Fini(void) } Py_XDECREF(nullstring); nullstring = NULL; - if (interned) { - int pos, changed; - PyObject *key, *value; - do { - changed = 0; - pos = 0; - while (PyDict_Next(interned, &pos, &key, &value)) { - if (key->ob_refcnt == 2 && key == value) { - PyDict_DelItem(interned, key); - changed = 1; - } - } - } while (changed); - } } void _Py_ReleaseInternedStrings(void) { - if (interned) { - fprintf(stderr, "releasing interned strings\n"); - PyDict_Clear(interned); - Py_DECREF(interned); - interned = NULL; + PyObject *keys; + PyStringObject *s; + int i, n; + + if (interned == NULL || !PyDict_Check(interned)) + return; + keys = PyDict_Keys(interned); + if (keys == NULL || !PyList_Check(keys)) { + PyErr_Clear(); + return; + } + + /* Since _Py_ReleaseInternedStrings() is intended to help a leak + detector, interned strings are not forcibly deallocated; rather, we + give them their stolen references back, and then clear and DECREF + the interned dict. */ + + fprintf(stderr, "releasing interned strings\n"); + n = PyList_GET_SIZE(keys); + for (i = 0; i < n; i++) { + s = (PyStringObject *) PyList_GET_ITEM(keys, i); + switch (s->ob_sstate) { + case SSTATE_NOT_INTERNED: + /* XXX Shouldn't happen */ + break; + case SSTATE_INTERNED_IMMORTAL: + s->ob_refcnt += 1; + break; + case SSTATE_INTERNED_MORTAL: + s->ob_refcnt += 2; + break; + default: + Py_FatalError("Inconsistent interned string state."); + } + s->ob_sstate = SSTATE_NOT_INTERNED; } + Py_DECREF(keys); + PyDict_Clear(interned); + Py_DECREF(interned); + interned = NULL; } |