diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2007-06-10 09:51:05 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2007-06-10 09:51:05 (GMT) |
commit | 5b222135f8d2492713994f2cb003980e87ce6a72 (patch) | |
tree | 3ac3a6a1d7805360ed779e884ca6c4b3f000321f /Objects | |
parent | 38e43c25eede3fa77d90ac8183cc0335f4861f4a (diff) | |
download | cpython-5b222135f8d2492713994f2cb003980e87ce6a72.zip cpython-5b222135f8d2492713994f2cb003980e87ce6a72.tar.gz cpython-5b222135f8d2492713994f2cb003980e87ce6a72.tar.bz2 |
Make identifiers str (not str8) objects throughout.
This affects the parser, various object implementations,
and all places that put identifiers into C string literals.
In testing, a number of crashes occurred as code would
fail when the recursion limit was reached (such as the
Unicode interning dictionary having key/value pairs where
key is not value). To solve these, I added an overflowed
flag, which allows for 50 more recursions after the
limit was reached and the exception was raised, and
a recursion_critical flag, which indicates that recursion
absolutely must be allowed, i.e. that a certain call
must not cause a stack overflow exception.
There are still some places where both str and str8 are
accepted as identifiers; these should eventually be
removed.
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/abstract.c | 12 | ||||
-rw-r--r-- | Objects/bytesobject.c | 2 | ||||
-rw-r--r-- | Objects/classobject.c | 10 | ||||
-rw-r--r-- | Objects/codeobject.c | 16 | ||||
-rw-r--r-- | Objects/complexobject.c | 4 | ||||
-rw-r--r-- | Objects/descrobject.c | 7 | ||||
-rw-r--r-- | Objects/dictobject.c | 10 | ||||
-rw-r--r-- | Objects/frameobject.c | 6 | ||||
-rw-r--r-- | Objects/funcobject.c | 4 | ||||
-rw-r--r-- | Objects/methodobject.c | 4 | ||||
-rw-r--r-- | Objects/moduleobject.c | 2 | ||||
-rw-r--r-- | Objects/object.c | 108 | ||||
-rw-r--r-- | Objects/stringobject.c | 12 | ||||
-rw-r--r-- | Objects/typeobject.c | 89 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 97 |
15 files changed, 222 insertions, 161 deletions
diff --git a/Objects/abstract.c b/Objects/abstract.c index 84b3384..6e63852 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -207,7 +207,7 @@ PyObject_DelItemString(PyObject *o, char *key) null_error(); return -1; } - okey = PyString_FromString(key); + okey = PyUnicode_FromString(key); if (okey == NULL) return -1; ret = PyObject_DelItem(o, okey); @@ -1598,7 +1598,7 @@ PyMapping_GetItemString(PyObject *o, char *key) if (key == NULL) return null_error(); - okey = PyString_FromString(key); + okey = PyUnicode_FromString(key); if (okey == NULL) return NULL; r = PyObject_GetItem(o, okey); @@ -1617,7 +1617,7 @@ PyMapping_SetItemString(PyObject *o, char *key, PyObject *value) return -1; } - okey = PyString_FromString(key); + okey = PyUnicode_FromString(key); if (okey == NULL) return -1; r = PyObject_SetItem(o, okey, value); @@ -1989,11 +1989,13 @@ abstract_get_bases(PyObject *cls) PyObject *bases; if (__bases__ == NULL) { - __bases__ = PyString_FromString("__bases__"); + __bases__ = PyUnicode_FromString("__bases__"); if (__bases__ == NULL) return NULL; } + Py_ALLOW_RECURSION bases = PyObject_GetAttr(cls, __bases__); + Py_END_ALLOW_RECURSION if (bases == NULL) { if (PyErr_ExceptionMatches(PyExc_AttributeError)) PyErr_Clear(); @@ -2067,7 +2069,7 @@ recursive_isinstance(PyObject *inst, PyObject *cls, int recursion_depth) int retval = 0; if (__class__ == NULL) { - __class__ = PyString_FromString("__class__"); + __class__ = PyUnicode_FromString("__class__"); if (__class__ == NULL) return -1; } diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 2a1dbcb..532e637 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1078,7 +1078,7 @@ bytes_count(PyBytesObject *self, PyObject *args) else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len)) return NULL; - _adjust_indices(&start, &end, PyString_GET_SIZE(self)); + _adjust_indices(&start, &end, PyBytes_GET_SIZE(self)); return PyInt_FromSsize_t( stringlib_count(str + start, end - start, sub, sub_len) diff --git a/Objects/classobject.c b/Objects/classobject.c index b7711d5..3cf64de 100644 --- a/Objects/classobject.c +++ b/Objects/classobject.c @@ -100,7 +100,7 @@ method_get_doc(PyMethodObject *im, void *context) { static PyObject *docstr; if (docstr == NULL) { - docstr= PyString_InternFromString("__doc__"); + docstr= PyUnicode_InternFromString("__doc__"); if (docstr == NULL) return NULL; } @@ -235,12 +235,12 @@ method_repr(PyMethodObject *a) return NULL; PyErr_Clear(); } - else if (!PyString_Check(funcname)) { + else if (!PyUnicode_Check(funcname)) { Py_DECREF(funcname); funcname = NULL; } else - sfuncname = PyString_AS_STRING(funcname); + sfuncname = PyUnicode_AsString(funcname); if (klass == NULL) klassname = NULL; else { @@ -250,12 +250,12 @@ method_repr(PyMethodObject *a) return NULL; PyErr_Clear(); } - else if (!PyString_Check(klassname)) { + else if (!PyUnicode_Check(klassname)) { Py_DECREF(klassname); klassname = NULL; } else - sklassname = PyString_AS_STRING(klassname); + sklassname = PyUnicode_AsString(klassname); } if (self == NULL) result = PyUnicode_FromFormat("<unbound method %s.%s>", diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 6763950..c735193 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -32,10 +32,10 @@ intern_strings(PyObject *tuple) for (i = PyTuple_GET_SIZE(tuple); --i >= 0; ) { PyObject *v = PyTuple_GET_ITEM(tuple, i); - if (v == NULL || !PyString_CheckExact(v)) { + if (v == NULL || !PyUnicode_CheckExact(v)) { Py_FatalError("non-string found in code slot"); } - PyString_InternInPlace(&PyTuple_GET_ITEM(tuple, i)); + PyUnicode_InternInPlace(&PyTuple_GET_ITEM(tuple, i)); } } @@ -58,7 +58,7 @@ PyCode_New(int argcount, int kwonlyargcount, varnames == NULL || !PyTuple_Check(varnames) || freevars == NULL || !PyTuple_Check(freevars) || cellvars == NULL || !PyTuple_Check(cellvars) || - name == NULL || !PyString_Check(name) || + name == NULL || (!PyString_Check(name) && !PyUnicode_Check(name)) || filename == NULL || !PyString_Check(filename) || lnotab == NULL || !PyString_Check(lnotab) || !PyObject_CheckReadBuffer(code)) { @@ -148,10 +148,10 @@ validate_and_copy_tuple(PyObject *tup) for (i = 0; i < len; i++) { item = PyTuple_GET_ITEM(tup, i); - if (PyString_CheckExact(item)) { + if (PyUnicode_CheckExact(item)) { Py_INCREF(item); } - else if (!PyString_Check(item)) { + else if (!PyUnicode_Check(item)) { PyErr_Format( PyExc_TypeError, "name tuples must contain only " @@ -161,9 +161,9 @@ validate_and_copy_tuple(PyObject *tup) return NULL; } else { - item = PyString_FromStringAndSize( - PyString_AS_STRING(item), - PyString_GET_SIZE(item)); + item = PyUnicode_FromUnicode( + PyUnicode_AS_UNICODE(item), + PyUnicode_GET_SIZE(item)); if (item == NULL) { Py_DECREF(newtuple); return NULL; diff --git a/Objects/complexobject.c b/Objects/complexobject.c index ed2e475..4580ef2 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -269,7 +269,7 @@ PyComplex_AsCComplex(PyObject *op) { PyObject *complexfunc; if (!complex_str) { - if (!(complex_str = PyString_FromString("__complex__"))) + if (!(complex_str = PyUnicode_FromString("__complex__"))) return cv; } complexfunc = _PyType_Lookup(op->ob_type, complex_str); @@ -900,7 +900,7 @@ complex_new(PyTypeObject *type, PyObject *args, PyObject *kwds) /* XXX Hack to support classes with __complex__ method */ if (complexstr == NULL) { - complexstr = PyString_InternFromString("__complex__"); + complexstr = PyUnicode_InternFromString("__complex__"); if (complexstr == NULL) return NULL; } diff --git a/Objects/descrobject.c b/Objects/descrobject.c index e9ccefa..acd2400 100644 --- a/Objects/descrobject.c +++ b/Objects/descrobject.c @@ -15,7 +15,10 @@ descr_dealloc(PyDescrObject *descr) static char * descr_name(PyDescrObject *descr) { - if (descr->d_name != NULL && PyString_Check(descr->d_name)) + if (descr->d_name != NULL && PyUnicode_Check(descr->d_name)) + return PyUnicode_AsString(descr->d_name); + else if (descr->d_name != NULL && PyString_Check(descr->d_name)) + /* XXX this should not happen */ return PyString_AS_STRING(descr->d_name); else return "?"; @@ -581,7 +584,7 @@ descr_new(PyTypeObject *descrtype, PyTypeObject *type, const char *name) if (descr != NULL) { Py_XINCREF(type); descr->d_type = type; - descr->d_name = PyString_InternFromString(name); + descr->d_name = PyUnicode_InternFromString(name); if (descr->d_name == NULL) { Py_DECREF(descr); descr = NULL; diff --git a/Objects/dictobject.c b/Objects/dictobject.c index b45a664..639c3c5 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1040,7 +1040,7 @@ dict_subscript(dictobject *mp, register PyObject *key) static PyObject *missing_str = NULL; if (missing_str == NULL) missing_str = - PyString_InternFromString("__missing__"); + PyUnicode_InternFromString("__missing__"); missing = _PyType_Lookup(mp->ob_type, missing_str); if (missing != NULL) return PyObject_CallFunctionObjArgs(missing, @@ -2073,7 +2073,7 @@ PyObject * PyDict_GetItemString(PyObject *v, const char *key) { PyObject *kv, *rv; - kv = PyString_FromString(key); + kv = PyUnicode_FromString(key); if (kv == NULL) return NULL; rv = PyDict_GetItem(v, kv); @@ -2086,10 +2086,10 @@ PyDict_SetItemString(PyObject *v, const char *key, PyObject *item) { PyObject *kv; int err; - kv = PyString_FromString(key); + kv = PyUnicode_FromString(key); if (kv == NULL) return -1; - PyString_InternInPlace(&kv); /* XXX Should we really? */ + PyUnicode_InternInPlace(&kv); /* XXX Should we really? */ err = PyDict_SetItem(v, kv, item); Py_DECREF(kv); return err; @@ -2100,7 +2100,7 @@ PyDict_DelItemString(PyObject *v, const char *key) { PyObject *kv; int err; - kv = PyString_FromString(key); + kv = PyUnicode_FromString(key); if (kv == NULL) return -1; err = PyDict_DelItem(v, kv); diff --git a/Objects/frameobject.c b/Objects/frameobject.c index f780b3a..bb27f1c 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -542,7 +542,7 @@ static PyObject *builtin_object; int _PyFrame_Init() { - builtin_object = PyString_InternFromString("__builtins__"); + builtin_object = PyUnicode_InternFromString("__builtins__"); return (builtin_object != NULL); } @@ -722,7 +722,7 @@ map_to_dict(PyObject *map, Py_ssize_t nmap, PyObject *dict, PyObject **values, for (j = nmap; --j >= 0; ) { PyObject *key = PyTuple_GET_ITEM(map, j); PyObject *value = values[j]; - assert(PyString_Check(key)); + assert(PyString_Check(key)/*XXX this should go*/ || PyUnicode_Check(key)); if (deref) { assert(PyCell_Check(value)); value = PyCell_GET(value); @@ -770,7 +770,7 @@ dict_to_map(PyObject *map, Py_ssize_t nmap, PyObject *dict, PyObject **values, for (j = nmap; --j >= 0; ) { PyObject *key = PyTuple_GET_ITEM(map, j); PyObject *value = PyObject_GetItem(dict, key); - assert(PyString_Check(key)); + assert(PyUnicode_Check(key)); /* We only care about NULLs if clear is true. */ if (value == NULL) { PyErr_Clear(); diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 6f17e7a..ff1b4c8 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -322,7 +322,7 @@ func_set_name(PyFunctionObject *op, PyObject *value) /* Not legal to del f.func_name or to set it to anything * other than a string object. */ - if (value == NULL || !PyString_Check(value)) { + if (value == NULL || (!PyString_Check(value) && !PyUnicode_Check(value))) { PyErr_SetString(PyExc_TypeError, "__name__ must be set to a string object"); return -1; @@ -516,7 +516,7 @@ func_new(PyTypeObject* type, PyObject* args, PyObject* kw) if (nfree != nclosure) return PyErr_Format(PyExc_ValueError, "%s requires closure of length %zd, not %zd", - PyString_AS_STRING(code->co_name), + PyUnicode_AsString(code->co_name), nfree, nclosure); if (nclosure) { Py_ssize_t i; diff --git a/Objects/methodobject.c b/Objects/methodobject.c index 6199805..2d1c688 100644 --- a/Objects/methodobject.c +++ b/Objects/methodobject.c @@ -143,7 +143,7 @@ meth_get__doc__(PyCFunctionObject *m, void *closure) static PyObject * meth_get__name__(PyCFunctionObject *m, void *closure) { - return PyString_FromString(m->m_ml->ml_name); + return PyUnicode_FromString(m->m_ml->ml_name); } static int @@ -297,7 +297,7 @@ listmethodchain(PyMethodChain *chain) i = 0; for (c = chain; c != NULL; c = c->link) { for (ml = c->methods; ml->ml_name != NULL; ml++) { - PyList_SetItem(v, i, PyString_FromString(ml->ml_name)); + PyList_SetItem(v, i, PyUnicode_FromString(ml->ml_name)); i++; } } diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index daf24eb..7c5e47f 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -22,7 +22,7 @@ PyModule_New(const char *name) m = PyObject_GC_New(PyModuleObject, &PyModule_Type); if (m == NULL) return NULL; - nameobj = PyString_FromString(name); + nameobj = PyUnicode_FromString(name); m->md_dict = PyDict_New(); if (m->md_dict == NULL || nameobj == NULL) goto fail; diff --git a/Objects/object.c b/Objects/object.c index be7d501..c701af0 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -465,7 +465,7 @@ PyObject_Unicode(PyObject *v) check this before trying the __unicode__ method. */ if (unicodestr == NULL) { - unicodestr= PyString_InternFromString("__unicode__"); + unicodestr= PyUnicode_InternFromString("__unicode__"); if (unicodestr == NULL) return NULL; } @@ -852,7 +852,7 @@ PyObject_GetAttrString(PyObject *v, const char *name) if (v->ob_type->tp_getattr != NULL) return (*v->ob_type->tp_getattr)(v, (char*)name); - w = PyString_InternFromString(name); + w = PyUnicode_InternFromString(name); if (w == NULL) return NULL; res = PyObject_GetAttr(v, w); @@ -880,7 +880,7 @@ PyObject_SetAttrString(PyObject *v, const char *name, PyObject *w) if (v->ob_type->tp_setattr != NULL) return (*v->ob_type->tp_setattr)(v, (char*)name, w); - s = PyString_InternFromString(name); + s = PyUnicode_InternFromString(name); if (s == NULL) return -1; res = PyObject_SetAttr(v, s, w); @@ -893,30 +893,19 @@ PyObject_GetAttr(PyObject *v, PyObject *name) { PyTypeObject *tp = v->ob_type; - if (!PyString_Check(name)) { - /* The Unicode to string conversion is done here because the - existing tp_getattro slots expect a string object as name - and we wouldn't want to break those. */ - if (PyUnicode_Check(name)) { - name = _PyUnicode_AsDefaultEncodedString(name, NULL); - if (name == NULL) - return NULL; - } - else - { - PyErr_Format(PyExc_TypeError, - "attribute name must be string, not '%.200s'", - name->ob_type->tp_name); - return NULL; - } + if (!PyUnicode_Check(name)) { + PyErr_Format(PyExc_TypeError, + "attribute name must be string, not '%.200s'", + name->ob_type->tp_name); + return NULL; } if (tp->tp_getattro != NULL) return (*tp->tp_getattro)(v, name); if (tp->tp_getattr != NULL) - return (*tp->tp_getattr)(v, PyString_AS_STRING(name)); + return (*tp->tp_getattr)(v, PyUnicode_AsString(name)); PyErr_Format(PyExc_AttributeError, "'%.50s' object has no attribute '%.400s'", - tp->tp_name, PyString_AS_STRING(name)); + tp->tp_name, PyUnicode_AsString(name)); return NULL; } @@ -938,33 +927,22 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value) PyTypeObject *tp = v->ob_type; int err; - if (!PyString_Check(name)) { - /* The Unicode to string conversion is done here because the - existing tp_setattro slots expect a string object as name - and we wouldn't want to break those. */ - if (PyUnicode_Check(name)) { - name = _PyUnicode_AsDefaultEncodedString(name, NULL); - if (name == NULL) - return -1; - } - else - { - PyErr_Format(PyExc_TypeError, - "attribute name must be string, not '%.200s'", - name->ob_type->tp_name); - return -1; - } + if (!PyUnicode_Check(name)) { + PyErr_Format(PyExc_TypeError, + "attribute name must be string, not '%.200s'", + name->ob_type->tp_name); + return -1; } Py_INCREF(name); - PyString_InternInPlace(&name); + PyUnicode_InternInPlace(&name); if (tp->tp_setattro != NULL) { err = (*tp->tp_setattro)(v, name, value); Py_DECREF(name); return err; } if (tp->tp_setattr != NULL) { - err = (*tp->tp_setattr)(v, PyString_AS_STRING(name), value); + err = (*tp->tp_setattr)(v, PyUnicode_AsString(name), value); Py_DECREF(name); return err; } @@ -976,14 +954,14 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value) "(%s .%.100s)", tp->tp_name, value==NULL ? "del" : "assign to", - PyString_AS_STRING(name)); + PyUnicode_AsString(name)); else PyErr_Format(PyExc_TypeError, "'%.100s' object has only read-only attributes " "(%s .%.100s)", tp->tp_name, value==NULL ? "del" : "assign to", - PyString_AS_STRING(name)); + PyUnicode_AsString(name)); return -1; } @@ -1033,22 +1011,11 @@ PyObject_GenericGetAttr(PyObject *obj, PyObject *name) Py_ssize_t dictoffset; PyObject **dictptr; - if (!PyString_Check(name)){ - /* The Unicode to string conversion is done here because the - existing tp_setattro slots expect a string object as name - and we wouldn't want to break those. */ - if (PyUnicode_Check(name)) { - name = PyUnicode_AsEncodedString(name, NULL, NULL); - if (name == NULL) - return NULL; - } - else - { - PyErr_Format(PyExc_TypeError, - "attribute name must be string, not '%.200s'", - name->ob_type->tp_name); - return NULL; - } + if (!PyUnicode_Check(name)){ + PyErr_Format(PyExc_TypeError, + "attribute name must be string, not '%.200s'", + name->ob_type->tp_name); + return NULL; } else Py_INCREF(name); @@ -1134,7 +1101,7 @@ PyObject_GenericGetAttr(PyObject *obj, PyObject *name) PyErr_Format(PyExc_AttributeError, "'%.50s' object has no attribute '%.400s'", - tp->tp_name, PyString_AS_STRING(name)); + tp->tp_name, PyUnicode_AsString(name)); done: Py_DECREF(name); return res; @@ -1149,22 +1116,11 @@ PyObject_GenericSetAttr(PyObject *obj, PyObject *name, PyObject *value) PyObject **dictptr; int res = -1; - if (!PyString_Check(name)){ - /* The Unicode to string conversion is done here because the - existing tp_setattro slots expect a string object as name - and we wouldn't want to break those. */ - if (PyUnicode_Check(name)) { - name = PyUnicode_AsEncodedString(name, NULL, NULL); - if (name == NULL) - return -1; - } - else - { - PyErr_Format(PyExc_TypeError, - "attribute name must be string, not '%.200s'", - name->ob_type->tp_name); - return -1; - } + if (!PyUnicode_Check(name)){ + PyErr_Format(PyExc_TypeError, + "attribute name must be string, not '%.200s'", + name->ob_type->tp_name); + return -1; } else Py_INCREF(name); @@ -1212,13 +1168,13 @@ PyObject_GenericSetAttr(PyObject *obj, PyObject *name, PyObject *value) if (descr == NULL) { PyErr_Format(PyExc_AttributeError, "'%.100s' object has no attribute '%.200s'", - tp->tp_name, PyString_AS_STRING(name)); + tp->tp_name, PyUnicode_AsString(name)); goto done; } PyErr_Format(PyExc_AttributeError, "'%.50s' object attribute '%.400s' is read-only", - tp->tp_name, PyString_AS_STRING(name)); + tp->tp_name, PyUnicode_AsString(name)); done: Py_DECREF(name); return res; diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 60e6129..92bc95b 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -686,6 +686,11 @@ string_getbuffer(register PyObject *op) Py_ssize_t PyString_Size(register PyObject *op) { + if (PyUnicode_Check(op)) { + op = _PyUnicode_AsDefaultEncodedString(op, NULL); + if (!op) + return -1; + } if (!PyString_Check(op)) return string_getsize(op); return ((PyStringObject *)op) -> ob_size; @@ -694,6 +699,11 @@ PyString_Size(register PyObject *op) /*const*/ char * PyString_AsString(register PyObject *op) { + if (PyUnicode_Check(op)) { + op = _PyUnicode_AsDefaultEncodedString(op, NULL); + if (!op) + return NULL; + } if (!PyString_Check(op)) return string_getbuffer(op); return ((PyStringObject *)op) -> ob_sval; @@ -824,7 +834,7 @@ PyString_Repr(PyObject *obj, int smartquotes) { static const char *hexdigits = "0123456789abcdef"; register PyStringObject* op = (PyStringObject*) obj; - Py_ssize_t length = PyUnicode_GET_SIZE(op); + Py_ssize_t length = PyString_GET_SIZE(op); size_t newsize = 2 + 4 * op->ob_size; PyObject *v; if (newsize > PY_SSIZE_T_MAX || newsize / 4 != op->ob_size) { diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 5983011..ab86f54 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -35,7 +35,7 @@ type_name(PyTypeObject *type, void *context) s = type->tp_name; else s++; - return PyString_FromString(s); + return PyUnicode_FromString(s); } } @@ -97,9 +97,9 @@ type_module(PyTypeObject *type, void *context) else { s = strrchr(type->tp_name, '.'); if (s != NULL) - return PyString_FromStringAndSize( + return PyUnicode_FromStringAndSize( type->tp_name, (Py_ssize_t)(s - type->tp_name)); - return PyString_FromString("__builtin__"); + return PyUnicode_FromString("__builtin__"); } } @@ -371,7 +371,7 @@ type_repr(PyTypeObject *type) mod = type_module(type, NULL); if (mod == NULL) PyErr_Clear(); - else if (!PyString_Check(mod)) { + else if (!PyUnicode_Check(mod)) { Py_DECREF(mod); mod = NULL; } @@ -384,11 +384,11 @@ type_repr(PyTypeObject *type) else kind = "type"; - if (mod != NULL && strcmp(PyString_AS_STRING(mod), "__builtin__")) { + if (mod != NULL && strcmp(PyUnicode_AsString(mod), "__builtin__")) { rtn = PyUnicode_FromFormat("<%s '%s.%s'>", kind, - PyString_AS_STRING(mod), - PyString_AS_STRING(name)); + PyUnicode_AsString(mod), + PyUnicode_AsString(name)); } else rtn = PyUnicode_FromFormat("<%s '%s'>", kind, type->tp_name); @@ -859,7 +859,7 @@ lookup_maybe(PyObject *self, char *attrstr, PyObject **attrobj) PyObject *res; if (*attrobj == NULL) { - *attrobj = PyString_InternFromString(attrstr); + *attrobj = PyUnicode_InternFromString(attrstr); if (*attrobj == NULL) return NULL; } @@ -1415,7 +1415,7 @@ get_dict_descriptor(PyTypeObject *type) PyObject *descr; if (dict_str == NULL) { - dict_str = PyString_InternFromString("__dict__"); + dict_str = PyUnicode_InternFromString("__dict__"); if (dict_str == NULL) return NULL; } @@ -1564,14 +1564,14 @@ valid_identifier(PyObject *s) unsigned char *p; Py_ssize_t i, n; - if (!PyString_Check(s)) { + if (!PyUnicode_Check(s)) { PyErr_Format(PyExc_TypeError, "__slots__ items must be strings, not '%.200s'", s->ob_type->tp_name); return 0; } - p = (unsigned char *) PyString_AS_STRING(s); - n = PyString_GET_SIZE(s); + p = (unsigned char *) PyUnicode_AsString(s); + n = strlen((char*)p)/*XXX PyString_GET_SIZE(s)*/; /* We must reject an empty name. As a hack, we bump the length to 1 so that the loop will balk on the trailing \0. */ if (n == 0) @@ -1792,22 +1792,13 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds) return NULL; } - tmp = _unicode_to_string(slots, nslots); - if (tmp == NULL) - goto bad_slots; - if (tmp != slots) { - Py_DECREF(slots); - slots = tmp; - } /* Check for valid slot names and two special cases */ for (i = 0; i < nslots; i++) { PyObject *tmp = PyTuple_GET_ITEM(slots, i); - char *s; if (!valid_identifier(tmp)) goto bad_slots; - assert(PyString_Check(tmp)); - s = PyString_AS_STRING(tmp); - if (strcmp(s, "__dict__") == 0) { + assert(PyUnicode_Check(tmp)); + if (PyUnicode_CompareWithASCIIString(tmp, "__dict__") == 0) { if (!may_add_dict || add_dict) { PyErr_SetString(PyExc_TypeError, "__dict__ slot disallowed: " @@ -1816,7 +1807,7 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds) } add_dict++; } - if (strcmp(s, "__weakref__") == 0) { + if (PyUnicode_CompareWithASCIIString(tmp, "__weakref__") == 0) { if (!may_add_weak || add_weak) { PyErr_SetString(PyExc_TypeError, "__weakref__ slot disallowed: " @@ -1836,11 +1827,11 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds) if (newslots == NULL) goto bad_slots; for (i = j = 0; i < nslots; i++) { - char *s; tmp = PyTuple_GET_ITEM(slots, i); - s = PyString_AS_STRING(tmp); - if ((add_dict && strcmp(s, "__dict__") == 0) || - (add_weak && strcmp(s, "__weakref__") == 0)) + if ((add_dict && + PyUnicode_CompareWithASCIIString(tmp, "__dict__") == 0) || + (add_weak && + PyUnicode_CompareWithASCIIString(tmp, "__weakref__") == 0)) continue; tmp =_Py_Mangle(name, tmp); if (!tmp) @@ -1917,7 +1908,15 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds) type->tp_as_sequence = &et->as_sequence; type->tp_as_mapping = &et->as_mapping; type->tp_as_buffer = &et->as_buffer; - type->tp_name = PyString_AS_STRING(name); + if (PyString_Check(name)) + type->tp_name = PyString_AsString(name); + else { + type->tp_name = PyUnicode_AsString(name); + if (!type->tp_name) { + Py_DECREF(type); + return NULL; + } + } /* Set tp_base and tp_bases */ type->tp_bases = bases; @@ -1980,7 +1979,7 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds) slotoffset = base->tp_basicsize; if (slots != NULL) { for (i = 0; i < nslots; i++, mp++) { - mp->name = PyString_AS_STRING( + mp->name = PyUnicode_AsString( PyTuple_GET_ITEM(slots, i)); mp->type = T_OBJECT_EX; mp->offset = slotoffset; @@ -2157,7 +2156,7 @@ type_getattro(PyTypeObject *type, PyObject *name) /* Give up */ PyErr_Format(PyExc_AttributeError, "type object '%.50s' has no attribute '%.400s'", - type->tp_name, PyString_AS_STRING(name)); + type->tp_name, PyUnicode_AsString(name)); return NULL; } @@ -2473,7 +2472,7 @@ object_repr(PyObject *self) mod = type_module(type, NULL); if (mod == NULL) PyErr_Clear(); - else if (!PyString_Check(mod)) { + else if (!PyUnicode_Check(mod)) { Py_DECREF(mod); mod = NULL; } @@ -2482,8 +2481,8 @@ object_repr(PyObject *self) return NULL; if (mod != NULL && strcmp(PyString_AS_STRING(mod), "__builtin__")) rtn = PyUnicode_FromFormat("<%s.%s object at %p>", - PyString_AS_STRING(mod), - PyString_AS_STRING(name), + PyUnicode_AsString(mod), + PyUnicode_AsString(name), self); else rtn = PyUnicode_FromFormat("<%s object at %p>", @@ -2686,7 +2685,7 @@ import_copy_reg(void) static PyObject *copy_reg_str; if (!copy_reg_str) { - copy_reg_str = PyString_InternFromString("copy_reg"); + copy_reg_str = PyUnicode_InternFromString("copy_reg"); if (copy_reg_str == NULL) return NULL; } @@ -4330,7 +4329,7 @@ slot_sq_item(PyObject *self, Py_ssize_t i) descrgetfunc f; if (getitem_str == NULL) { - getitem_str = PyString_InternFromString("__getitem__"); + getitem_str = PyUnicode_InternFromString("__getitem__"); if (getitem_str == NULL) return NULL; } @@ -4760,13 +4759,13 @@ slot_tp_getattr_hook(PyObject *self, PyObject *name) static PyObject *getattr_str = NULL; if (getattr_str == NULL) { - getattr_str = PyString_InternFromString("__getattr__"); + getattr_str = PyUnicode_InternFromString("__getattr__"); if (getattr_str == NULL) return NULL; } if (getattribute_str == NULL) { getattribute_str = - PyString_InternFromString("__getattribute__"); + PyUnicode_InternFromString("__getattribute__"); if (getattribute_str == NULL) return NULL; } @@ -4898,7 +4897,7 @@ slot_tp_descr_get(PyObject *self, PyObject *obj, PyObject *type) static PyObject *get_str = NULL; if (get_str == NULL) { - get_str = PyString_InternFromString("__get__"); + get_str = PyUnicode_InternFromString("__get__"); if (get_str == NULL) return NULL; } @@ -4968,7 +4967,7 @@ slot_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) Py_ssize_t i, n; if (new_str == NULL) { - new_str = PyString_InternFromString("__new__"); + new_str = PyUnicode_InternFromString("__new__"); if (new_str == NULL) return NULL; } @@ -5490,7 +5489,7 @@ init_slotdefs(void) if (initialized) return; for (p = slotdefs; p->name; p++) { - p->name_strobj = PyString_InternFromString(p->name); + p->name_strobj = PyUnicode_InternFromString(p->name); if (!p->name_strobj) Py_FatalError("Out of memory interning slotdef names"); } @@ -5717,9 +5716,9 @@ super_getattro(PyObject *self, PyObject *name) if (!skip) { /* We want __class__ to return the class of the super object (i.e. super, or a subclass), not the class of su->obj. */ - skip = (PyString_Check(name) && - PyString_GET_SIZE(name) == 9 && - strcmp(PyString_AS_STRING(name), "__class__") == 0); + skip = (PyUnicode_Check(name) && + PyUnicode_GET_SIZE(name) == 9 && + PyUnicode_CompareWithASCIIString(name, "__class__") == 0); } if (!skip) { @@ -5809,7 +5808,7 @@ supercheck(PyTypeObject *type, PyObject *obj) PyObject *class_attr; if (class_str == NULL) { - class_str = PyString_FromString("__class__"); + class_str = PyUnicode_FromString("__class__"); if (class_str == NULL) return NULL; } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e793418..87c5c99 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -458,8 +458,10 @@ PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size) /* Copy the Unicode data into the new object */ if (u != NULL) { Py_UNICODE *p = unicode->str; - while ((*p++ = *u++)) - ; + while (size--) + *p++ = *u++; + /* Don't need to write trailing 0 because + that's already done by _PyUnicode_New */ } return (PyObject *)unicode; @@ -1184,6 +1186,16 @@ PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode, return v; } +char* +PyUnicode_AsString(PyObject *unicode) +{ + assert(PyUnicode_Check(unicode)); + unicode = _PyUnicode_AsDefaultEncodedString(unicode, NULL); + if (!unicode) + return NULL; + return PyString_AsString(unicode); +} + Py_UNICODE *PyUnicode_AsUnicode(PyObject *unicode) { if (!PyUnicode_Check(unicode)) { @@ -3247,7 +3259,7 @@ PyObject *PyUnicode_DecodeASCII(const char *s, goto onError; } } - if (p - PyUnicode_AS_UNICODE(v) < PyString_GET_SIZE(v)) + if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v)) if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0) goto onError; Py_XDECREF(errorHandler); @@ -5861,6 +5873,24 @@ int PyUnicode_Compare(PyObject *left, return -1; } +int +PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) +{ + int i; + Py_UNICODE *id; + assert(PyUnicode_Check(uni)); + id = PyUnicode_AS_UNICODE(uni); + /* Compare Unicode string and source character set string */ + for (i = 0; id[i] && str[i]; i++) + if (id[i] != str[i]) + return ((int)id[i] < (int)str[i]) ? -1 : 1; + if (id[i]) + return 1; /* uni is longer */ + if (str[i]) + return -1; /* str is longer */ + return 0; +} + PyObject *PyUnicode_RichCompare(PyObject *left, PyObject *right, int op) @@ -8671,7 +8701,13 @@ PyUnicode_InternInPlace(PyObject **p) return; } } + /* It might be that the GetItem call fails even + though the key is present in the dictionary, + namely when this happens during a stack overflow. */ + Py_ALLOW_RECURSION t = PyDict_GetItem(interned, (PyObject *)s); + Py_END_ALLOW_RECURSION + if (t) { Py_INCREF(t); Py_DECREF(*p); @@ -8679,10 +8715,13 @@ PyUnicode_InternInPlace(PyObject **p) return; } + PyThreadState_GET()->recursion_critical = 1; if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) { PyErr_Clear(); + PyThreadState_GET()->recursion_critical = 0; return; } + PyThreadState_GET()->recursion_critical = 0; /* The two references in interned are not counted by refcnt. The deallocator will take care of this */ s->ob_refcnt -= 2; @@ -8879,6 +8918,58 @@ unicode_iter(PyObject *seq) return (PyObject *)it; } +size_t +Py_UNICODE_strlen(const Py_UNICODE *u) +{ + int res = 0; + while(*u++) + res++; + return res; +} + +Py_UNICODE* +Py_UNICODE_strcpy(Py_UNICODE *s1, const Py_UNICODE *s2) +{ + Py_UNICODE *u = s1; + while ((*u++ = *s2++)); + return s1; +} + +Py_UNICODE* +Py_UNICODE_strncpy(Py_UNICODE *s1, const Py_UNICODE *s2, size_t n) +{ + Py_UNICODE *u = s1; + while ((*u++ = *s2++)) + if (n-- == 0) + break; + return s1; +} + +int +Py_UNICODE_strcmp(const Py_UNICODE *s1, const Py_UNICODE *s2) +{ + while (*s1 && *s2 && *s1 == *s2) + s1++, s2++; + if (*s1 && *s2) + return (*s1 < *s2) ? -1 : +1; + if (*s1) + return 1; + if (*s2) + return -1; + return 0; +} + +Py_UNICODE* +Py_UNICODE_strchr(const Py_UNICODE *s, Py_UNICODE c) +{ + const Py_UNICODE *p; + for (p = s; *p; p++) + if (*p == c) + return (Py_UNICODE*)p; + return NULL; +} + + #ifdef __cplusplus } #endif |