From 5b222135f8d2492713994f2cb003980e87ce6a72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20v=2E=20L=C3=B6wis?= Date: Sun, 10 Jun 2007 09:51:05 +0000 Subject: Make identifiers str (not str8) objects throughout. This affects the parser, various object implementations, and all places that put identifiers into C string literals. In testing, a number of crashes occurred as code would fail when the recursion limit was reached (such as the Unicode interning dictionary having key/value pairs where key is not value). To solve these, I added an overflowed flag, which allows for 50 more recursions after the limit was reached and the exception was raised, and a recursion_critical flag, which indicates that recursion absolutely must be allowed, i.e. that a certain call must not cause a stack overflow exception. There are still some places where both str and str8 are accepted as identifiers; these should eventually be removed. --- Include/ceval.h | 13 +++++- Include/pystate.h | 4 ++ Include/stringobject.h | 4 +- Include/unicodeobject.h | 36 ++++++++++++++-- Lib/test/test_frozen.py | 2 + Lib/test/test_new.py | 2 +- Lib/test/test_sys.py | 12 +++--- Modules/_codecsmodule.c | 2 +- Modules/_hotshot.c | 2 +- Modules/cPickle.c | 12 +++--- Modules/gcmodule.c | 2 +- Modules/unicodedata.c | 2 +- Objects/abstract.c | 12 +++--- Objects/bytesobject.c | 2 +- Objects/classobject.c | 10 ++--- Objects/codeobject.c | 16 +++---- Objects/complexobject.c | 4 +- Objects/descrobject.c | 7 +++- Objects/dictobject.c | 10 ++--- Objects/frameobject.c | 6 +-- Objects/funcobject.c | 4 +- Objects/methodobject.c | 4 +- Objects/moduleobject.c | 2 +- Objects/object.c | 108 ++++++++++++++---------------------------------- Objects/stringobject.c | 12 +++++- Objects/typeobject.c | 89 ++++++++++++++++++++------------------- Objects/unicodeobject.c | 97 +++++++++++++++++++++++++++++++++++++++++-- Parser/tokenizer.c | 15 ++++++- Python/Python-ast.c | 2 + Python/ast.c | 23 +++++++---- Python/bltinmodule.c | 24 +++++------ Python/ceval.c | 25 +++++++---- Python/compile.c | 86 +++++++++++++++++++------------------- Python/future.c | 2 +- Python/import.c | 56 ++++++++++++++++++++----- Python/modsupport.c | 4 +- Python/pystate.c | 2 + Python/pythonrun.c | 2 + Python/symtable.c | 30 +++++++------- Python/sysmodule.c | 4 +- 40 files changed, 462 insertions(+), 289 deletions(-) diff --git a/Include/ceval.h b/Include/ceval.h index 15b29c6..c9c59eb 100644 --- a/Include/ceval.h +++ b/Include/ceval.h @@ -50,7 +50,10 @@ PyAPI_FUNC(int) Py_GetRecursionLimit(void); (_Py_MakeRecCheck(PyThreadState_GET()->recursion_depth) && \ _Py_CheckRecursiveCall(where)) #define Py_LeaveRecursiveCall() \ - (--PyThreadState_GET()->recursion_depth) + do{ if((--PyThreadState_GET()->recursion_depth) < \ + _Py_CheckRecursionLimit - 50); \ + PyThreadState_GET()->overflowed = 0; \ + } while(0) PyAPI_FUNC(int) _Py_CheckRecursiveCall(char *where); PyAPI_DATA(int) _Py_CheckRecursionLimit; #ifdef USE_STACKCHECK @@ -59,6 +62,14 @@ PyAPI_DATA(int) _Py_CheckRecursionLimit; # define _Py_MakeRecCheck(x) (++(x) > _Py_CheckRecursionLimit) #endif +#define Py_ALLOW_RECURSION \ + do { unsigned char _old = PyThreadState_GET()->recursion_critical;\ + PyThreadState_GET()->recursion_critical = 1; + +#define Py_END_ALLOW_RECURSION \ + PyThreadState_GET()->recursion_critical = _old; \ + } while(0); + PyAPI_FUNC(const char *) PyEval_GetFuncName(PyObject *); PyAPI_FUNC(const char *) PyEval_GetFuncDesc(PyObject *); diff --git a/Include/pystate.h b/Include/pystate.h index 4919d99..0681e65 100644 --- a/Include/pystate.h +++ b/Include/pystate.h @@ -61,6 +61,10 @@ typedef struct _ts { struct _frame *frame; int recursion_depth; + char overflowed; /* The stack has overflowed. Allow 50 more calls + to handle the runtime error. */ + char recursion_critical; /* The current calls must not cause + a stack overflow. */ /* 'tracing' keeps track of the execution depth when tracing/profiling. This is to prevent the actual trace/profile code from being recorded in the trace/profile. */ diff --git a/Include/stringobject.h b/Include/stringobject.h index 2b8cc2f..0a932f0 100644 --- a/Include/stringobject.h +++ b/Include/stringobject.h @@ -84,8 +84,8 @@ PyAPI_FUNC(void) _Py_ReleaseInternedStrings(void); #define PyString_CHECK_INTERNED(op) (((PyStringObject *)(op))->ob_sstate) /* Macro, trading safety for speed */ -#define PyString_AS_STRING(op) (((PyStringObject *)(op))->ob_sval) -#define PyString_GET_SIZE(op) (((PyStringObject *)(op))->ob_size) +#define PyString_AS_STRING(op) (assert(PyString_Check(op)),(((PyStringObject *)(op))->ob_sval)) +#define PyString_GET_SIZE(op) (assert(PyString_Check(op)),(((PyStringObject *)(op))->ob_size)) /* _PyString_Join(sep, x) is like sep.join(x). sep must be PyStringObject*, x must be an iterable object. */ diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 2a27dbc..1f6b729 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -410,13 +410,13 @@ PyAPI_DATA(PyTypeObject) PyUnicode_Type; /* Fast access macros */ #define PyUnicode_GET_SIZE(op) \ - (((PyUnicodeObject *)(op))->length) + (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length)) #define PyUnicode_GET_DATA_SIZE(op) \ - (((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE)) + (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE))) #define PyUnicode_AS_UNICODE(op) \ - (((PyUnicodeObject *)(op))->str) + (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->str)) #define PyUnicode_AS_DATA(op) \ - ((const char *)((PyUnicodeObject *)(op))->str) + (assert(PyUnicode_Check(op)),((const char *)((PyUnicodeObject *)(op))->str)) /* --- Constants ---------------------------------------------------------- */ @@ -627,6 +627,13 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal); PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString( PyObject *, const char *); +/* Return a char* holding the default encoded value of the + Unicode object. +*/ + +PyAPI_FUNC(char *) PyUnicode_AsString(PyObject*); + + /* Returns the currently active default encoding. The default encoding is currently implemented as run-time settable @@ -1193,6 +1200,11 @@ PyAPI_FUNC(int) PyUnicode_Compare( PyObject *right /* Right string */ ); +PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString( + PyObject *left, + const char *right + ); + /* Rich compare two strings and return one of the following: - NULL in case an exception was raised @@ -1310,6 +1322,22 @@ PyAPI_FUNC(int) _PyUnicode_IsAlpha( Py_UNICODE ch /* Unicode character */ ); +PyAPI_FUNC(size_t) Py_UNICODE_strlen(const Py_UNICODE *u); + +PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy( + Py_UNICODE *s1, const Py_UNICODE *s2); + +PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy( + Py_UNICODE *s1, const Py_UNICODE *s2, size_t n); + +PyAPI_FUNC(int) Py_UNICODE_strcmp( + const Py_UNICODE *s1, const Py_UNICODE *s2); + +PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr( + const Py_UNICODE *s, Py_UNICODE c + ); + + #ifdef __cplusplus } #endif diff --git a/Lib/test/test_frozen.py b/Lib/test/test_frozen.py index 678b9a8..f1299f0 100644 --- a/Lib/test/test_frozen.py +++ b/Lib/test/test_frozen.py @@ -10,6 +10,8 @@ from test.test_support import TestFailed import sys, os +raise TestFailed, "test currently causes assertion in debug mode" + try: import __hello__ except ImportError as x: diff --git a/Lib/test/test_new.py b/Lib/test/test_new.py index c919621..797a8c3 100644 --- a/Lib/test/test_new.py +++ b/Lib/test/test_new.py @@ -143,7 +143,7 @@ class NewTest(unittest.TestCase): firstlineno, lnotab) # new.code used to be a way to mutate a tuple... - class S(str8): + class S(str): pass t = (S("ab"),) d = new.code(argcount, kwonlyargcount, nlocals, stacksize, diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index b038ff4..e72b7f8 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -321,12 +321,6 @@ class SysModuleTest(unittest.TestCase): self.assertRaises(TypeError, sys.intern, S("abc")) - # It's still safe to pass these strings to routines that - # call intern internally, e.g. PyObject_SetAttr(). - s = S("abc") - setattr(s, s, s) - self.assertEqual(getattr(s, s), s) - s = "never interned as unicode before" self.assert_(sys.intern(s) is s) s2 = s.swapcase().swapcase() @@ -338,6 +332,12 @@ class SysModuleTest(unittest.TestCase): self.assertRaises(TypeError, sys.intern, U("abc")) + # It's still safe to pass these strings to routines that + # call intern internally, e.g. PyObject_SetAttr(). + s = U("abc") + setattr(s, s, s) + self.assertEqual(getattr(s, s), s) + def test_main(): test.test_support.run_unittest(SysModuleTest) diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index cd766c3..de5270d 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -172,7 +172,7 @@ escape_encode(PyObject *self, &PyString_Type, &str, &errors)) return NULL; - size = PyUnicode_GET_SIZE(str); + size = PyString_GET_SIZE(str); newsize = 4*size; if (newsize > PY_SSIZE_T_MAX || newsize / 4 != size) { PyErr_SetString(PyExc_OverflowError, diff --git a/Modules/_hotshot.c b/Modules/_hotshot.c index 21bd383..fc4a1de 100644 --- a/Modules/_hotshot.c +++ b/Modules/_hotshot.c @@ -810,7 +810,7 @@ get_fileno(ProfilerObject *self, PyCodeObject *fcode) PyObject *name = PyDict_GetItem(dict, obj); if (name == NULL) { if (pack_define_func(self, fileno, fcode->co_firstlineno, - PyString_AS_STRING(fcode->co_name)) < 0) { + PyUnicode_AsString(fcode->co_name)) < 0) { Py_DECREF(obj); return -1; } diff --git a/Modules/cPickle.c b/Modules/cPickle.c index 68990c9..a4dff7b 100644 --- a/Modules/cPickle.c +++ b/Modules/cPickle.c @@ -1829,8 +1829,8 @@ save_global(Picklerobject *self, PyObject *args, PyObject *name) (name_size = PyString_Size(global_name)) < 0) goto finally; - module_str = PyString_AS_STRING((PyStringObject *)module); - name_str = PyString_AS_STRING((PyStringObject *)global_name); + module_str = PyUnicode_AsString(module); + name_str = PyUnicode_AsString(global_name); /* XXX This can be doing a relative import. Clearly it shouldn't, but I don't know how to stop it. :-( */ @@ -1842,7 +1842,7 @@ save_global(Picklerobject *self, PyObject *args, PyObject *name) "OS", args, module); goto finally; } - klass = PyObject_GetAttrString(mod, name_str); + klass = PyObject_GetAttr(mod, global_name); if (klass == NULL) { cPickle_ErrFormat(PicklingError, "Can't pickle %s: attribute lookup %s.%s " @@ -2223,7 +2223,7 @@ save(Picklerobject *self, PyObject *args, int pers_save) res = save_string(self, args, 0); goto finally; } - if ((type == &PyUnicode_Type) && (PyString_GET_SIZE(args) < 2)) { + if ((type == &PyUnicode_Type) && (PyUnicode_GET_SIZE(args) < 2)) { res = save_unicode(self, args, 0); goto finally; } @@ -3584,7 +3584,7 @@ load_global(Unpicklerobject *self) Py_DECREF(module_name); return bad_readline(); } - if ((class_name = PyString_FromStringAndSize(s, len - 1))) { + if ((class_name = PyUnicode_FromStringAndSize(s, len - 1))) { class = find_class(module_name, class_name, self->find_class); Py_DECREF(class_name); @@ -5379,7 +5379,7 @@ init_stuff(PyObject *module_dict) { PyObject *copy_reg, *t, *r; -#define INIT_STR(S) if (!( S ## _str=PyString_InternFromString(#S))) return -1; +#define INIT_STR(S) if (!( S ## _str=PyUnicode_InternFromString(#S))) return -1; if (PyType_Ready(&Unpicklertype) < 0) return -1; diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 2dd058e..adcdb5f 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -713,7 +713,7 @@ collect(int generation) double t1 = 0.0; if (delstr == NULL) { - delstr = PyString_InternFromString("__del__"); + delstr = PyUnicode_InternFromString("__del__"); if (delstr == NULL) Py_FatalError("gc couldn't allocate \"__del__\""); } diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c index da8af34..f660046 100644 --- a/Modules/unicodedata.c +++ b/Modules/unicodedata.c @@ -515,7 +515,7 @@ nfd_nfkd(PyObject *self, PyObject *input, int k) /* Hangul Decomposition adds three characters in a single step, so we need atleast that much room. */ if (space < 3) { - Py_ssize_t newsize = PyString_GET_SIZE(result) + 10; + Py_ssize_t newsize = PyUnicode_GET_SIZE(result) + 10; space += 10; if (PyUnicode_Resize(&result, newsize) == -1) return NULL; diff --git a/Objects/abstract.c b/Objects/abstract.c index 84b3384..6e63852 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -207,7 +207,7 @@ PyObject_DelItemString(PyObject *o, char *key) null_error(); return -1; } - okey = PyString_FromString(key); + okey = PyUnicode_FromString(key); if (okey == NULL) return -1; ret = PyObject_DelItem(o, okey); @@ -1598,7 +1598,7 @@ PyMapping_GetItemString(PyObject *o, char *key) if (key == NULL) return null_error(); - okey = PyString_FromString(key); + okey = PyUnicode_FromString(key); if (okey == NULL) return NULL; r = PyObject_GetItem(o, okey); @@ -1617,7 +1617,7 @@ PyMapping_SetItemString(PyObject *o, char *key, PyObject *value) return -1; } - okey = PyString_FromString(key); + okey = PyUnicode_FromString(key); if (okey == NULL) return -1; r = PyObject_SetItem(o, okey, value); @@ -1989,11 +1989,13 @@ abstract_get_bases(PyObject *cls) PyObject *bases; if (__bases__ == NULL) { - __bases__ = PyString_FromString("__bases__"); + __bases__ = PyUnicode_FromString("__bases__"); if (__bases__ == NULL) return NULL; } + Py_ALLOW_RECURSION bases = PyObject_GetAttr(cls, __bases__); + Py_END_ALLOW_RECURSION if (bases == NULL) { if (PyErr_ExceptionMatches(PyExc_AttributeError)) PyErr_Clear(); @@ -2067,7 +2069,7 @@ recursive_isinstance(PyObject *inst, PyObject *cls, int recursion_depth) int retval = 0; if (__class__ == NULL) { - __class__ = PyString_FromString("__class__"); + __class__ = PyUnicode_FromString("__class__"); if (__class__ == NULL) return -1; } diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 2a1dbcb..532e637 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1078,7 +1078,7 @@ bytes_count(PyBytesObject *self, PyObject *args) else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len)) return NULL; - _adjust_indices(&start, &end, PyString_GET_SIZE(self)); + _adjust_indices(&start, &end, PyBytes_GET_SIZE(self)); return PyInt_FromSsize_t( stringlib_count(str + start, end - start, sub, sub_len) diff --git a/Objects/classobject.c b/Objects/classobject.c index b7711d5..3cf64de 100644 --- a/Objects/classobject.c +++ b/Objects/classobject.c @@ -100,7 +100,7 @@ method_get_doc(PyMethodObject *im, void *context) { static PyObject *docstr; if (docstr == NULL) { - docstr= PyString_InternFromString("__doc__"); + docstr= PyUnicode_InternFromString("__doc__"); if (docstr == NULL) return NULL; } @@ -235,12 +235,12 @@ method_repr(PyMethodObject *a) return NULL; PyErr_Clear(); } - else if (!PyString_Check(funcname)) { + else if (!PyUnicode_Check(funcname)) { Py_DECREF(funcname); funcname = NULL; } else - sfuncname = PyString_AS_STRING(funcname); + sfuncname = PyUnicode_AsString(funcname); if (klass == NULL) klassname = NULL; else { @@ -250,12 +250,12 @@ method_repr(PyMethodObject *a) return NULL; PyErr_Clear(); } - else if (!PyString_Check(klassname)) { + else if (!PyUnicode_Check(klassname)) { Py_DECREF(klassname); klassname = NULL; } else - sklassname = PyString_AS_STRING(klassname); + sklassname = PyUnicode_AsString(klassname); } if (self == NULL) result = PyUnicode_FromFormat("", diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 6763950..c735193 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -32,10 +32,10 @@ intern_strings(PyObject *tuple) for (i = PyTuple_GET_SIZE(tuple); --i >= 0; ) { PyObject *v = PyTuple_GET_ITEM(tuple, i); - if (v == NULL || !PyString_CheckExact(v)) { + if (v == NULL || !PyUnicode_CheckExact(v)) { Py_FatalError("non-string found in code slot"); } - PyString_InternInPlace(&PyTuple_GET_ITEM(tuple, i)); + PyUnicode_InternInPlace(&PyTuple_GET_ITEM(tuple, i)); } } @@ -58,7 +58,7 @@ PyCode_New(int argcount, int kwonlyargcount, varnames == NULL || !PyTuple_Check(varnames) || freevars == NULL || !PyTuple_Check(freevars) || cellvars == NULL || !PyTuple_Check(cellvars) || - name == NULL || !PyString_Check(name) || + name == NULL || (!PyString_Check(name) && !PyUnicode_Check(name)) || filename == NULL || !PyString_Check(filename) || lnotab == NULL || !PyString_Check(lnotab) || !PyObject_CheckReadBuffer(code)) { @@ -148,10 +148,10 @@ validate_and_copy_tuple(PyObject *tup) for (i = 0; i < len; i++) { item = PyTuple_GET_ITEM(tup, i); - if (PyString_CheckExact(item)) { + if (PyUnicode_CheckExact(item)) { Py_INCREF(item); } - else if (!PyString_Check(item)) { + else if (!PyUnicode_Check(item)) { PyErr_Format( PyExc_TypeError, "name tuples must contain only " @@ -161,9 +161,9 @@ validate_and_copy_tuple(PyObject *tup) return NULL; } else { - item = PyString_FromStringAndSize( - PyString_AS_STRING(item), - PyString_GET_SIZE(item)); + item = PyUnicode_FromUnicode( + PyUnicode_AS_UNICODE(item), + PyUnicode_GET_SIZE(item)); if (item == NULL) { Py_DECREF(newtuple); return NULL; diff --git a/Objects/complexobject.c b/Objects/complexobject.c index ed2e475..4580ef2 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -269,7 +269,7 @@ PyComplex_AsCComplex(PyObject *op) { PyObject *complexfunc; if (!complex_str) { - if (!(complex_str = PyString_FromString("__complex__"))) + if (!(complex_str = PyUnicode_FromString("__complex__"))) return cv; } complexfunc = _PyType_Lookup(op->ob_type, complex_str); @@ -900,7 +900,7 @@ complex_new(PyTypeObject *type, PyObject *args, PyObject *kwds) /* XXX Hack to support classes with __complex__ method */ if (complexstr == NULL) { - complexstr = PyString_InternFromString("__complex__"); + complexstr = PyUnicode_InternFromString("__complex__"); if (complexstr == NULL) return NULL; } diff --git a/Objects/descrobject.c b/Objects/descrobject.c index e9ccefa..acd2400 100644 --- a/Objects/descrobject.c +++ b/Objects/descrobject.c @@ -15,7 +15,10 @@ descr_dealloc(PyDescrObject *descr) static char * descr_name(PyDescrObject *descr) { - if (descr->d_name != NULL && PyString_Check(descr->d_name)) + if (descr->d_name != NULL && PyUnicode_Check(descr->d_name)) + return PyUnicode_AsString(descr->d_name); + else if (descr->d_name != NULL && PyString_Check(descr->d_name)) + /* XXX this should not happen */ return PyString_AS_STRING(descr->d_name); else return "?"; @@ -581,7 +584,7 @@ descr_new(PyTypeObject *descrtype, PyTypeObject *type, const char *name) if (descr != NULL) { Py_XINCREF(type); descr->d_type = type; - descr->d_name = PyString_InternFromString(name); + descr->d_name = PyUnicode_InternFromString(name); if (descr->d_name == NULL) { Py_DECREF(descr); descr = NULL; diff --git a/Objects/dictobject.c b/Objects/dictobject.c index b45a664..639c3c5 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1040,7 +1040,7 @@ dict_subscript(dictobject *mp, register PyObject *key) static PyObject *missing_str = NULL; if (missing_str == NULL) missing_str = - PyString_InternFromString("__missing__"); + PyUnicode_InternFromString("__missing__"); missing = _PyType_Lookup(mp->ob_type, missing_str); if (missing != NULL) return PyObject_CallFunctionObjArgs(missing, @@ -2073,7 +2073,7 @@ PyObject * PyDict_GetItemString(PyObject *v, const char *key) { PyObject *kv, *rv; - kv = PyString_FromString(key); + kv = PyUnicode_FromString(key); if (kv == NULL) return NULL; rv = PyDict_GetItem(v, kv); @@ -2086,10 +2086,10 @@ PyDict_SetItemString(PyObject *v, const char *key, PyObject *item) { PyObject *kv; int err; - kv = PyString_FromString(key); + kv = PyUnicode_FromString(key); if (kv == NULL) return -1; - PyString_InternInPlace(&kv); /* XXX Should we really? */ + PyUnicode_InternInPlace(&kv); /* XXX Should we really? */ err = PyDict_SetItem(v, kv, item); Py_DECREF(kv); return err; @@ -2100,7 +2100,7 @@ PyDict_DelItemString(PyObject *v, const char *key) { PyObject *kv; int err; - kv = PyString_FromString(key); + kv = PyUnicode_FromString(key); if (kv == NULL) return -1; err = PyDict_DelItem(v, kv); diff --git a/Objects/frameobject.c b/Objects/frameobject.c index f780b3a..bb27f1c 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -542,7 +542,7 @@ static PyObject *builtin_object; int _PyFrame_Init() { - builtin_object = PyString_InternFromString("__builtins__"); + builtin_object = PyUnicode_InternFromString("__builtins__"); return (builtin_object != NULL); } @@ -722,7 +722,7 @@ map_to_dict(PyObject *map, Py_ssize_t nmap, PyObject *dict, PyObject **values, for (j = nmap; --j >= 0; ) { PyObject *key = PyTuple_GET_ITEM(map, j); PyObject *value = values[j]; - assert(PyString_Check(key)); + assert(PyString_Check(key)/*XXX this should go*/ || PyUnicode_Check(key)); if (deref) { assert(PyCell_Check(value)); value = PyCell_GET(value); @@ -770,7 +770,7 @@ dict_to_map(PyObject *map, Py_ssize_t nmap, PyObject *dict, PyObject **values, for (j = nmap; --j >= 0; ) { PyObject *key = PyTuple_GET_ITEM(map, j); PyObject *value = PyObject_GetItem(dict, key); - assert(PyString_Check(key)); + assert(PyUnicode_Check(key)); /* We only care about NULLs if clear is true. */ if (value == NULL) { PyErr_Clear(); diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 6f17e7a..ff1b4c8 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -322,7 +322,7 @@ func_set_name(PyFunctionObject *op, PyObject *value) /* Not legal to del f.func_name or to set it to anything * other than a string object. */ - if (value == NULL || !PyString_Check(value)) { + if (value == NULL || (!PyString_Check(value) && !PyUnicode_Check(value))) { PyErr_SetString(PyExc_TypeError, "__name__ must be set to a string object"); return -1; @@ -516,7 +516,7 @@ func_new(PyTypeObject* type, PyObject* args, PyObject* kw) if (nfree != nclosure) return PyErr_Format(PyExc_ValueError, "%s requires closure of length %zd, not %zd", - PyString_AS_STRING(code->co_name), + PyUnicode_AsString(code->co_name), nfree, nclosure); if (nclosure) { Py_ssize_t i; diff --git a/Objects/methodobject.c b/Objects/methodobject.c index 6199805..2d1c688 100644 --- a/Objects/methodobject.c +++ b/Objects/methodobject.c @@ -143,7 +143,7 @@ meth_get__doc__(PyCFunctionObject *m, void *closure) static PyObject * meth_get__name__(PyCFunctionObject *m, void *closure) { - return PyString_FromString(m->m_ml->ml_name); + return PyUnicode_FromString(m->m_ml->ml_name); } static int @@ -297,7 +297,7 @@ listmethodchain(PyMethodChain *chain) i = 0; for (c = chain; c != NULL; c = c->link) { for (ml = c->methods; ml->ml_name != NULL; ml++) { - PyList_SetItem(v, i, PyString_FromString(ml->ml_name)); + PyList_SetItem(v, i, PyUnicode_FromString(ml->ml_name)); i++; } } diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index daf24eb..7c5e47f 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -22,7 +22,7 @@ PyModule_New(const char *name) m = PyObject_GC_New(PyModuleObject, &PyModule_Type); if (m == NULL) return NULL; - nameobj = PyString_FromString(name); + nameobj = PyUnicode_FromString(name); m->md_dict = PyDict_New(); if (m->md_dict == NULL || nameobj == NULL) goto fail; diff --git a/Objects/object.c b/Objects/object.c index be7d501..c701af0 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -465,7 +465,7 @@ PyObject_Unicode(PyObject *v) check this before trying the __unicode__ method. */ if (unicodestr == NULL) { - unicodestr= PyString_InternFromString("__unicode__"); + unicodestr= PyUnicode_InternFromString("__unicode__"); if (unicodestr == NULL) return NULL; } @@ -852,7 +852,7 @@ PyObject_GetAttrString(PyObject *v, const char *name) if (v->ob_type->tp_getattr != NULL) return (*v->ob_type->tp_getattr)(v, (char*)name); - w = PyString_InternFromString(name); + w = PyUnicode_InternFromString(name); if (w == NULL) return NULL; res = PyObject_GetAttr(v, w); @@ -880,7 +880,7 @@ PyObject_SetAttrString(PyObject *v, const char *name, PyObject *w) if (v->ob_type->tp_setattr != NULL) return (*v->ob_type->tp_setattr)(v, (char*)name, w); - s = PyString_InternFromString(name); + s = PyUnicode_InternFromString(name); if (s == NULL) return -1; res = PyObject_SetAttr(v, s, w); @@ -893,30 +893,19 @@ PyObject_GetAttr(PyObject *v, PyObject *name) { PyTypeObject *tp = v->ob_type; - if (!PyString_Check(name)) { - /* The Unicode to string conversion is done here because the - existing tp_getattro slots expect a string object as name - and we wouldn't want to break those. */ - if (PyUnicode_Check(name)) { - name = _PyUnicode_AsDefaultEncodedString(name, NULL); - if (name == NULL) - return NULL; - } - else - { - PyErr_Format(PyExc_TypeError, - "attribute name must be string, not '%.200s'", - name->ob_type->tp_name); - return NULL; - } + if (!PyUnicode_Check(name)) { + PyErr_Format(PyExc_TypeError, + "attribute name must be string, not '%.200s'", + name->ob_type->tp_name); + return NULL; } if (tp->tp_getattro != NULL) return (*tp->tp_getattro)(v, name); if (tp->tp_getattr != NULL) - return (*tp->tp_getattr)(v, PyString_AS_STRING(name)); + return (*tp->tp_getattr)(v, PyUnicode_AsString(name)); PyErr_Format(PyExc_AttributeError, "'%.50s' object has no attribute '%.400s'", - tp->tp_name, PyString_AS_STRING(name)); + tp->tp_name, PyUnicode_AsString(name)); return NULL; } @@ -938,33 +927,22 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value) PyTypeObject *tp = v->ob_type; int err; - if (!PyString_Check(name)) { - /* The Unicode to string conversion is done here because the - existing tp_setattro slots expect a string object as name - and we wouldn't want to break those. */ - if (PyUnicode_Check(name)) { - name = _PyUnicode_AsDefaultEncodedString(name, NULL); - if (name == NULL) - return -1; - } - else - { - PyErr_Format(PyExc_TypeError, - "attribute name must be string, not '%.200s'", - name->ob_type->tp_name); - return -1; - } + if (!PyUnicode_Check(name)) { + PyErr_Format(PyExc_TypeError, + "attribute name must be string, not '%.200s'", + name->ob_type->tp_name); + return -1; } Py_INCREF(name); - PyString_InternInPlace(&name); + PyUnicode_InternInPlace(&name); if (tp->tp_setattro != NULL) { err = (*tp->tp_setattro)(v, name, value); Py_DECREF(name); return err; } if (tp->tp_setattr != NULL) { - err = (*tp->tp_setattr)(v, PyString_AS_STRING(name), value); + err = (*tp->tp_setattr)(v, PyUnicode_AsString(name), value); Py_DECREF(name); return err; } @@ -976,14 +954,14 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value) "(%s .%.100s)", tp->tp_name, value==NULL ? "del" : "assign to", - PyString_AS_STRING(name)); + PyUnicode_AsString(name)); else PyErr_Format(PyExc_TypeError, "'%.100s' object has only read-only attributes " "(%s .%.100s)", tp->tp_name, value==NULL ? "del" : "assign to", - PyString_AS_STRING(name)); + PyUnicode_AsString(name)); return -1; } @@ -1033,22 +1011,11 @@ PyObject_GenericGetAttr(PyObject *obj, PyObject *name) Py_ssize_t dictoffset; PyObject **dictptr; - if (!PyString_Check(name)){ - /* The Unicode to string conversion is done here because the - existing tp_setattro slots expect a string object as name - and we wouldn't want to break those. */ - if (PyUnicode_Check(name)) { - name = PyUnicode_AsEncodedString(name, NULL, NULL); - if (name == NULL) - return NULL; - } - else - { - PyErr_Format(PyExc_TypeError, - "attribute name must be string, not '%.200s'", - name->ob_type->tp_name); - return NULL; - } + if (!PyUnicode_Check(name)){ + PyErr_Format(PyExc_TypeError, + "attribute name must be string, not '%.200s'", + name->ob_type->tp_name); + return NULL; } else Py_INCREF(name); @@ -1134,7 +1101,7 @@ PyObject_GenericGetAttr(PyObject *obj, PyObject *name) PyErr_Format(PyExc_AttributeError, "'%.50s' object has no attribute '%.400s'", - tp->tp_name, PyString_AS_STRING(name)); + tp->tp_name, PyUnicode_AsString(name)); done: Py_DECREF(name); return res; @@ -1149,22 +1116,11 @@ PyObject_GenericSetAttr(PyObject *obj, PyObject *name, PyObject *value) PyObject **dictptr; int res = -1; - if (!PyString_Check(name)){ - /* The Unicode to string conversion is done here because the - existing tp_setattro slots expect a string object as name - and we wouldn't want to break those. */ - if (PyUnicode_Check(name)) { - name = PyUnicode_AsEncodedString(name, NULL, NULL); - if (name == NULL) - return -1; - } - else - { - PyErr_Format(PyExc_TypeError, - "attribute name must be string, not '%.200s'", - name->ob_type->tp_name); - return -1; - } + if (!PyUnicode_Check(name)){ + PyErr_Format(PyExc_TypeError, + "attribute name must be string, not '%.200s'", + name->ob_type->tp_name); + return -1; } else Py_INCREF(name); @@ -1212,13 +1168,13 @@ PyObject_GenericSetAttr(PyObject *obj, PyObject *name, PyObject *value) if (descr == NULL) { PyErr_Format(PyExc_AttributeError, "'%.100s' object has no attribute '%.200s'", - tp->tp_name, PyString_AS_STRING(name)); + tp->tp_name, PyUnicode_AsString(name)); goto done; } PyErr_Format(PyExc_AttributeError, "'%.50s' object attribute '%.400s' is read-only", - tp->tp_name, PyString_AS_STRING(name)); + tp->tp_name, PyUnicode_AsString(name)); done: Py_DECREF(name); return res; diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 60e6129..92bc95b 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -686,6 +686,11 @@ string_getbuffer(register PyObject *op) Py_ssize_t PyString_Size(register PyObject *op) { + if (PyUnicode_Check(op)) { + op = _PyUnicode_AsDefaultEncodedString(op, NULL); + if (!op) + return -1; + } if (!PyString_Check(op)) return string_getsize(op); return ((PyStringObject *)op) -> ob_size; @@ -694,6 +699,11 @@ PyString_Size(register PyObject *op) /*const*/ char * PyString_AsString(register PyObject *op) { + if (PyUnicode_Check(op)) { + op = _PyUnicode_AsDefaultEncodedString(op, NULL); + if (!op) + return NULL; + } if (!PyString_Check(op)) return string_getbuffer(op); return ((PyStringObject *)op) -> ob_sval; @@ -824,7 +834,7 @@ PyString_Repr(PyObject *obj, int smartquotes) { static const char *hexdigits = "0123456789abcdef"; register PyStringObject* op = (PyStringObject*) obj; - Py_ssize_t length = PyUnicode_GET_SIZE(op); + Py_ssize_t length = PyString_GET_SIZE(op); size_t newsize = 2 + 4 * op->ob_size; PyObject *v; if (newsize > PY_SSIZE_T_MAX || newsize / 4 != op->ob_size) { diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 5983011..ab86f54 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -35,7 +35,7 @@ type_name(PyTypeObject *type, void *context) s = type->tp_name; else s++; - return PyString_FromString(s); + return PyUnicode_FromString(s); } } @@ -97,9 +97,9 @@ type_module(PyTypeObject *type, void *context) else { s = strrchr(type->tp_name, '.'); if (s != NULL) - return PyString_FromStringAndSize( + return PyUnicode_FromStringAndSize( type->tp_name, (Py_ssize_t)(s - type->tp_name)); - return PyString_FromString("__builtin__"); + return PyUnicode_FromString("__builtin__"); } } @@ -371,7 +371,7 @@ type_repr(PyTypeObject *type) mod = type_module(type, NULL); if (mod == NULL) PyErr_Clear(); - else if (!PyString_Check(mod)) { + else if (!PyUnicode_Check(mod)) { Py_DECREF(mod); mod = NULL; } @@ -384,11 +384,11 @@ type_repr(PyTypeObject *type) else kind = "type"; - if (mod != NULL && strcmp(PyString_AS_STRING(mod), "__builtin__")) { + if (mod != NULL && strcmp(PyUnicode_AsString(mod), "__builtin__")) { rtn = PyUnicode_FromFormat("<%s '%s.%s'>", kind, - PyString_AS_STRING(mod), - PyString_AS_STRING(name)); + PyUnicode_AsString(mod), + PyUnicode_AsString(name)); } else rtn = PyUnicode_FromFormat("<%s '%s'>", kind, type->tp_name); @@ -859,7 +859,7 @@ lookup_maybe(PyObject *self, char *attrstr, PyObject **attrobj) PyObject *res; if (*attrobj == NULL) { - *attrobj = PyString_InternFromString(attrstr); + *attrobj = PyUnicode_InternFromString(attrstr); if (*attrobj == NULL) return NULL; } @@ -1415,7 +1415,7 @@ get_dict_descriptor(PyTypeObject *type) PyObject *descr; if (dict_str == NULL) { - dict_str = PyString_InternFromString("__dict__"); + dict_str = PyUnicode_InternFromString("__dict__"); if (dict_str == NULL) return NULL; } @@ -1564,14 +1564,14 @@ valid_identifier(PyObject *s) unsigned char *p; Py_ssize_t i, n; - if (!PyString_Check(s)) { + if (!PyUnicode_Check(s)) { PyErr_Format(PyExc_TypeError, "__slots__ items must be strings, not '%.200s'", s->ob_type->tp_name); return 0; } - p = (unsigned char *) PyString_AS_STRING(s); - n = PyString_GET_SIZE(s); + p = (unsigned char *) PyUnicode_AsString(s); + n = strlen((char*)p)/*XXX PyString_GET_SIZE(s)*/; /* We must reject an empty name. As a hack, we bump the length to 1 so that the loop will balk on the trailing \0. */ if (n == 0) @@ -1792,22 +1792,13 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds) return NULL; } - tmp = _unicode_to_string(slots, nslots); - if (tmp == NULL) - goto bad_slots; - if (tmp != slots) { - Py_DECREF(slots); - slots = tmp; - } /* Check for valid slot names and two special cases */ for (i = 0; i < nslots; i++) { PyObject *tmp = PyTuple_GET_ITEM(slots, i); - char *s; if (!valid_identifier(tmp)) goto bad_slots; - assert(PyString_Check(tmp)); - s = PyString_AS_STRING(tmp); - if (strcmp(s, "__dict__") == 0) { + assert(PyUnicode_Check(tmp)); + if (PyUnicode_CompareWithASCIIString(tmp, "__dict__") == 0) { if (!may_add_dict || add_dict) { PyErr_SetString(PyExc_TypeError, "__dict__ slot disallowed: " @@ -1816,7 +1807,7 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds) } add_dict++; } - if (strcmp(s, "__weakref__") == 0) { + if (PyUnicode_CompareWithASCIIString(tmp, "__weakref__") == 0) { if (!may_add_weak || add_weak) { PyErr_SetString(PyExc_TypeError, "__weakref__ slot disallowed: " @@ -1836,11 +1827,11 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds) if (newslots == NULL) goto bad_slots; for (i = j = 0; i < nslots; i++) { - char *s; tmp = PyTuple_GET_ITEM(slots, i); - s = PyString_AS_STRING(tmp); - if ((add_dict && strcmp(s, "__dict__") == 0) || - (add_weak && strcmp(s, "__weakref__") == 0)) + if ((add_dict && + PyUnicode_CompareWithASCIIString(tmp, "__dict__") == 0) || + (add_weak && + PyUnicode_CompareWithASCIIString(tmp, "__weakref__") == 0)) continue; tmp =_Py_Mangle(name, tmp); if (!tmp) @@ -1917,7 +1908,15 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds) type->tp_as_sequence = &et->as_sequence; type->tp_as_mapping = &et->as_mapping; type->tp_as_buffer = &et->as_buffer; - type->tp_name = PyString_AS_STRING(name); + if (PyString_Check(name)) + type->tp_name = PyString_AsString(name); + else { + type->tp_name = PyUnicode_AsString(name); + if (!type->tp_name) { + Py_DECREF(type); + return NULL; + } + } /* Set tp_base and tp_bases */ type->tp_bases = bases; @@ -1980,7 +1979,7 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds) slotoffset = base->tp_basicsize; if (slots != NULL) { for (i = 0; i < nslots; i++, mp++) { - mp->name = PyString_AS_STRING( + mp->name = PyUnicode_AsString( PyTuple_GET_ITEM(slots, i)); mp->type = T_OBJECT_EX; mp->offset = slotoffset; @@ -2157,7 +2156,7 @@ type_getattro(PyTypeObject *type, PyObject *name) /* Give up */ PyErr_Format(PyExc_AttributeError, "type object '%.50s' has no attribute '%.400s'", - type->tp_name, PyString_AS_STRING(name)); + type->tp_name, PyUnicode_AsString(name)); return NULL; } @@ -2473,7 +2472,7 @@ object_repr(PyObject *self) mod = type_module(type, NULL); if (mod == NULL) PyErr_Clear(); - else if (!PyString_Check(mod)) { + else if (!PyUnicode_Check(mod)) { Py_DECREF(mod); mod = NULL; } @@ -2482,8 +2481,8 @@ object_repr(PyObject *self) return NULL; if (mod != NULL && strcmp(PyString_AS_STRING(mod), "__builtin__")) rtn = PyUnicode_FromFormat("<%s.%s object at %p>", - PyString_AS_STRING(mod), - PyString_AS_STRING(name), + PyUnicode_AsString(mod), + PyUnicode_AsString(name), self); else rtn = PyUnicode_FromFormat("<%s object at %p>", @@ -2686,7 +2685,7 @@ import_copy_reg(void) static PyObject *copy_reg_str; if (!copy_reg_str) { - copy_reg_str = PyString_InternFromString("copy_reg"); + copy_reg_str = PyUnicode_InternFromString("copy_reg"); if (copy_reg_str == NULL) return NULL; } @@ -4330,7 +4329,7 @@ slot_sq_item(PyObject *self, Py_ssize_t i) descrgetfunc f; if (getitem_str == NULL) { - getitem_str = PyString_InternFromString("__getitem__"); + getitem_str = PyUnicode_InternFromString("__getitem__"); if (getitem_str == NULL) return NULL; } @@ -4760,13 +4759,13 @@ slot_tp_getattr_hook(PyObject *self, PyObject *name) static PyObject *getattr_str = NULL; if (getattr_str == NULL) { - getattr_str = PyString_InternFromString("__getattr__"); + getattr_str = PyUnicode_InternFromString("__getattr__"); if (getattr_str == NULL) return NULL; } if (getattribute_str == NULL) { getattribute_str = - PyString_InternFromString("__getattribute__"); + PyUnicode_InternFromString("__getattribute__"); if (getattribute_str == NULL) return NULL; } @@ -4898,7 +4897,7 @@ slot_tp_descr_get(PyObject *self, PyObject *obj, PyObject *type) static PyObject *get_str = NULL; if (get_str == NULL) { - get_str = PyString_InternFromString("__get__"); + get_str = PyUnicode_InternFromString("__get__"); if (get_str == NULL) return NULL; } @@ -4968,7 +4967,7 @@ slot_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) Py_ssize_t i, n; if (new_str == NULL) { - new_str = PyString_InternFromString("__new__"); + new_str = PyUnicode_InternFromString("__new__"); if (new_str == NULL) return NULL; } @@ -5490,7 +5489,7 @@ init_slotdefs(void) if (initialized) return; for (p = slotdefs; p->name; p++) { - p->name_strobj = PyString_InternFromString(p->name); + p->name_strobj = PyUnicode_InternFromString(p->name); if (!p->name_strobj) Py_FatalError("Out of memory interning slotdef names"); } @@ -5717,9 +5716,9 @@ super_getattro(PyObject *self, PyObject *name) if (!skip) { /* We want __class__ to return the class of the super object (i.e. super, or a subclass), not the class of su->obj. */ - skip = (PyString_Check(name) && - PyString_GET_SIZE(name) == 9 && - strcmp(PyString_AS_STRING(name), "__class__") == 0); + skip = (PyUnicode_Check(name) && + PyUnicode_GET_SIZE(name) == 9 && + PyUnicode_CompareWithASCIIString(name, "__class__") == 0); } if (!skip) { @@ -5809,7 +5808,7 @@ supercheck(PyTypeObject *type, PyObject *obj) PyObject *class_attr; if (class_str == NULL) { - class_str = PyString_FromString("__class__"); + class_str = PyUnicode_FromString("__class__"); if (class_str == NULL) return NULL; } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e793418..87c5c99 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -458,8 +458,10 @@ PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size) /* Copy the Unicode data into the new object */ if (u != NULL) { Py_UNICODE *p = unicode->str; - while ((*p++ = *u++)) - ; + while (size--) + *p++ = *u++; + /* Don't need to write trailing 0 because + that's already done by _PyUnicode_New */ } return (PyObject *)unicode; @@ -1184,6 +1186,16 @@ PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode, return v; } +char* +PyUnicode_AsString(PyObject *unicode) +{ + assert(PyUnicode_Check(unicode)); + unicode = _PyUnicode_AsDefaultEncodedString(unicode, NULL); + if (!unicode) + return NULL; + return PyString_AsString(unicode); +} + Py_UNICODE *PyUnicode_AsUnicode(PyObject *unicode) { if (!PyUnicode_Check(unicode)) { @@ -3247,7 +3259,7 @@ PyObject *PyUnicode_DecodeASCII(const char *s, goto onError; } } - if (p - PyUnicode_AS_UNICODE(v) < PyString_GET_SIZE(v)) + if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v)) if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0) goto onError; Py_XDECREF(errorHandler); @@ -5861,6 +5873,24 @@ int PyUnicode_Compare(PyObject *left, return -1; } +int +PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) +{ + int i; + Py_UNICODE *id; + assert(PyUnicode_Check(uni)); + id = PyUnicode_AS_UNICODE(uni); + /* Compare Unicode string and source character set string */ + for (i = 0; id[i] && str[i]; i++) + if (id[i] != str[i]) + return ((int)id[i] < (int)str[i]) ? -1 : 1; + if (id[i]) + return 1; /* uni is longer */ + if (str[i]) + return -1; /* str is longer */ + return 0; +} + PyObject *PyUnicode_RichCompare(PyObject *left, PyObject *right, int op) @@ -8671,7 +8701,13 @@ PyUnicode_InternInPlace(PyObject **p) return; } } + /* It might be that the GetItem call fails even + though the key is present in the dictionary, + namely when this happens during a stack overflow. */ + Py_ALLOW_RECURSION t = PyDict_GetItem(interned, (PyObject *)s); + Py_END_ALLOW_RECURSION + if (t) { Py_INCREF(t); Py_DECREF(*p); @@ -8679,10 +8715,13 @@ PyUnicode_InternInPlace(PyObject **p) return; } + PyThreadState_GET()->recursion_critical = 1; if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) { PyErr_Clear(); + PyThreadState_GET()->recursion_critical = 0; return; } + PyThreadState_GET()->recursion_critical = 0; /* The two references in interned are not counted by refcnt. The deallocator will take care of this */ s->ob_refcnt -= 2; @@ -8879,6 +8918,58 @@ unicode_iter(PyObject *seq) return (PyObject *)it; } +size_t +Py_UNICODE_strlen(const Py_UNICODE *u) +{ + int res = 0; + while(*u++) + res++; + return res; +} + +Py_UNICODE* +Py_UNICODE_strcpy(Py_UNICODE *s1, const Py_UNICODE *s2) +{ + Py_UNICODE *u = s1; + while ((*u++ = *s2++)); + return s1; +} + +Py_UNICODE* +Py_UNICODE_strncpy(Py_UNICODE *s1, const Py_UNICODE *s2, size_t n) +{ + Py_UNICODE *u = s1; + while ((*u++ = *s2++)) + if (n-- == 0) + break; + return s1; +} + +int +Py_UNICODE_strcmp(const Py_UNICODE *s1, const Py_UNICODE *s2) +{ + while (*s1 && *s2 && *s1 == *s2) + s1++, s2++; + if (*s1 && *s2) + return (*s1 < *s2) ? -1 : +1; + if (*s1) + return 1; + if (*s2) + return -1; + return 0; +} + +Py_UNICODE* +Py_UNICODE_strchr(const Py_UNICODE *s, Py_UNICODE c) +{ + const Py_UNICODE *p; + for (p = s; *p; p++) + if (*p == c) + return (Py_UNICODE*)p; + return NULL; +} + + #ifdef __cplusplus } #endif diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index f3eeb2c..e7dada6 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -18,6 +18,17 @@ #include "abstract.h" #endif /* PGEN */ +#define is_potential_identifier_start(c) (\ + (c >= 'a' && c <= 'z')\ + || (c >= 'A' && c <= 'Z')\ + || c == '_') + +#define is_potential_identifier_char(c) (\ + (c >= 'a' && c <= 'z')\ + || (c >= 'A' && c <= 'Z')\ + || (c >= '0' && c <= '9')\ + || c == '_') + extern char *PyOS_Readline(FILE *, FILE *, char *); /* Return malloc'ed string including trailing \n; empty malloc'ed string for EOF; @@ -1209,7 +1220,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end) } /* Identifier (most frequent token!) */ - if (isalpha(c) || c == '_') { + if (is_potential_identifier_start(c)) { /* Process r"", u"" and ur"" */ switch (c) { case 'r': @@ -1227,7 +1238,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end) goto letter_quote; break; } - while (isalnum(c) || c == '_') { + while (is_potential_identifier_char(c)) { c = tok_nextc(tok); } tok_backup(tok, c); diff --git a/Python/Python-ast.c b/Python/Python-ast.c index 791b32d..18c2eb5 100644 --- a/Python/Python-ast.c +++ b/Python/Python-ast.c @@ -3280,3 +3280,5 @@ PyObject* PyAST_mod2obj(mod_ty t) init_types(); return ast2obj_mod(t); } + + diff --git a/Python/ast.c b/Python/ast.c index e0bd18e..b34411b 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -48,7 +48,8 @@ static PyObject *parsestrplus(struct compiling *, const node *n, static identifier new_identifier(const char* n, PyArena *arena) { - PyObject* id = PyString_InternFromString(n); + PyObject* id = PyUnicode_DecodeUTF8(n, strlen(n), NULL); + PyUnicode_InternInPlace(&id); PyArena_AddPyObject(arena, id); return id; } @@ -334,12 +335,10 @@ static const char* FORBIDDEN[] = { static int forbidden_name(expr_ty e, const node *n) { - const char *id; const char **p; - assert(PyString_Check(e->v.Name.id)); - id = PyString_AS_STRING(e->v.Name.id); + assert(PyUnicode_Check(e->v.Name.id)); for (p = FORBIDDEN; *p; p++) { - if (strcmp(*p, id) == 0) { + if (PyUnicode_CompareWithASCIIString(e->v.Name.id, *p) == 0) { ast_error(n, "assignment to keyword"); return 1; } @@ -375,7 +374,7 @@ set_context(expr_ty e, expr_context_ty ctx, const node *n) switch (e->kind) { case Attribute_kind: if (ctx == Store && - !strcmp(PyString_AS_STRING(e->v.Attribute.attr), "None")) { + !PyUnicode_CompareWithASCIIString(e->v.Attribute.attr, "None")) { return ast_error(n, "assignment to None"); } e->v.Attribute.ctx = ctx; @@ -2235,6 +2234,7 @@ alias_for_import_name(struct compiling *c, const node *n) int i; size_t len; char *s; + PyObject *uni; len = 0; for (i = 0; i < NCH(n); i += 2) @@ -2255,13 +2255,20 @@ alias_for_import_name(struct compiling *c, const node *n) } --s; *s = '\0'; - PyString_InternInPlace(&str); + uni = PyUnicode_DecodeUTF8(PyString_AS_STRING(str), + PyString_GET_SIZE(str), + NULL); + Py_DECREF(str); + if (!uni) + return NULL; + str = uni; + PyUnicode_InternInPlace(&str); PyArena_AddPyObject(c->c_arena, str); return alias(str, NULL, c->c_arena); } break; case STAR: - str = PyString_InternFromString("*"); + str = PyUnicode_InternFromString("*"); PyArena_AddPyObject(c->c_arena, str); return alias(str, NULL, c->c_arena); default: diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 97b2c5e..d4c8a74 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -48,7 +48,7 @@ builtin___build_class__(PyObject *self, PyObject *args, PyObject *kwds) } func = PyTuple_GET_ITEM(args, 0); /* Better be callable */ name = PyTuple_GET_ITEM(args, 1); - if (!PyString_Check(name)) { + if ((!PyString_Check(name) && !PyUnicode_Check(name))) { PyErr_SetString(PyExc_TypeError, "__build_class__: name is not a string"); return NULL; @@ -835,20 +835,23 @@ globals and locals. If only globals is given, locals defaults to it."); static PyObject * builtin_getattr(PyObject *self, PyObject *args) { - PyObject *v, *result, *dflt = NULL; + PyObject *v, *result, *dflt = NULL, *release = NULL; PyObject *name; if (!PyArg_UnpackTuple(args, "getattr", 2, 3, &v, &name, &dflt)) return NULL; - if (PyUnicode_Check(name)) { - name = _PyUnicode_AsDefaultEncodedString(name, NULL); - if (name == NULL) + + if (PyString_Check(name)) { + release = PyString_AsDecodedObject(name, NULL, NULL); + if (!release) return NULL; + name = release; } - if (!PyString_Check(name)) { + if (!PyUnicode_Check(name)) { PyErr_SetString(PyExc_TypeError, "getattr(): attribute name must be string"); + Py_XDECREF(release); return NULL; } result = PyObject_GetAttr(v, name); @@ -859,6 +862,7 @@ builtin_getattr(PyObject *self, PyObject *args) Py_INCREF(dflt); result = dflt; } + Py_XDECREF(release); return result; } @@ -894,13 +898,7 @@ builtin_hasattr(PyObject *self, PyObject *args) if (!PyArg_UnpackTuple(args, "hasattr", 2, 2, &v, &name)) return NULL; - if (PyUnicode_Check(name)) { - name = _PyUnicode_AsDefaultEncodedString(name, NULL); - if (name == NULL) - return NULL; - } - - if (!PyString_Check(name)) { + if (!PyUnicode_Check(name)) { PyErr_SetString(PyExc_TypeError, "hasattr(): attribute name must be string"); return NULL; diff --git a/Python/ceval.c b/Python/ceval.c index 710a0d1..bb05a16 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -454,8 +454,19 @@ _Py_CheckRecursiveCall(char *where) return -1; } #endif + if (tstate->recursion_critical) + /* Somebody asked that we don't check for recursion. */ + return 0; + if (tstate->overflowed) { + if (tstate->recursion_depth > recursion_limit + 50) { + /* Overflowing while handling an overflow. Give up. */ + Py_FatalError("Cannot recover from stack overflow."); + } + return 0; + } if (tstate->recursion_depth > recursion_limit) { --tstate->recursion_depth; + tstate->overflowed = 1; PyErr_Format(PyExc_RuntimeError, "maximum recursion depth exceeded%s", where); @@ -2759,7 +2770,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals, vars into frame. This isn't too efficient right now. */ if (PyTuple_GET_SIZE(co->co_cellvars)) { int i, j, nargs, found; - char *cellname, *argname; + Py_UNICODE *cellname, *argname; PyObject *c; nargs = co->co_argcount; @@ -2776,13 +2787,13 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals, list so that we can march over it more efficiently? */ for (i = 0; i < PyTuple_GET_SIZE(co->co_cellvars); ++i) { - cellname = PyString_AS_STRING( + cellname = PyUnicode_AS_UNICODE( PyTuple_GET_ITEM(co->co_cellvars, i)); found = 0; for (j = 0; j < nargs; j++) { - argname = PyString_AS_STRING( + argname = PyUnicode_AS_UNICODE( PyTuple_GET_ITEM(co->co_varnames, j)); - if (strcmp(cellname, argname) == 0) { + if (Py_UNICODE_strcmp(cellname, argname) == 0) { c = PyCell_New(GETLOCAL(j)); if (c == NULL) goto fail; @@ -3428,7 +3439,7 @@ PyEval_GetFuncName(PyObject *func) if (PyMethod_Check(func)) return PyEval_GetFuncName(PyMethod_GET_FUNCTION(func)); else if (PyFunction_Check(func)) - return PyString_AsString(((PyFunctionObject*)func)->func_name); + return PyUnicode_AsString(((PyFunctionObject*)func)->func_name); else if (PyCFunction_Check(func)) return ((PyCFunctionObject*)func)->m_ml->ml_name; else @@ -4052,8 +4063,8 @@ import_all_from(PyObject *locals, PyObject *v) break; } if (skip_leading_underscores && - PyString_Check(name) && - PyString_AS_STRING(name)[0] == '_') + PyUnicode_Check(name) && + PyUnicode_AS_UNICODE(name)[0] == '_') { Py_DECREF(name); continue; diff --git a/Python/compile.c b/Python/compile.c index 359de58..fde4591 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -194,16 +194,16 @@ _Py_Mangle(PyObject *privateobj, PyObject *ident) { /* Name mangling: __private becomes _classname__private. This is independent from how the name is used. */ - const char *p, *name = PyString_AsString(ident); - char *buffer; + const Py_UNICODE *p, *name = PyUnicode_AS_UNICODE(ident); + Py_UNICODE *buffer; size_t nlen, plen; - if (privateobj == NULL || !PyString_Check(privateobj) || + if (privateobj == NULL || !PyUnicode_Check(privateobj) || name == NULL || name[0] != '_' || name[1] != '_') { Py_INCREF(ident); return ident; } - p = PyString_AsString(privateobj); - nlen = strlen(name); + p = PyUnicode_AS_UNICODE(privateobj); + nlen = Py_UNICODE_strlen(name); /* Don't mangle __id__ or names with dots. The only time a name with a dot can occur is when @@ -214,26 +214,26 @@ _Py_Mangle(PyObject *privateobj, PyObject *ident) mangling of the module name, e.g. __M.X. */ if ((name[nlen-1] == '_' && name[nlen-2] == '_') - || strchr(name, '.')) { + || Py_UNICODE_strchr(name, '.')) { Py_INCREF(ident); return ident; /* Don't mangle __whatever__ */ } /* Strip leading underscores from class name */ while (*p == '_') p++; - if (*p == '\0') { + if (*p == 0) { Py_INCREF(ident); return ident; /* Don't mangle if class is just underscores */ } - plen = strlen(p); - ident = PyString_FromStringAndSize(NULL, 1 + nlen + plen); + plen = Py_UNICODE_strlen(p); + ident = PyUnicode_FromStringAndSize(NULL, 1 + nlen + plen); if (!ident) return 0; /* ident = "_" + p[:plen] + name # i.e. 1+plen+nlen bytes */ - buffer = PyString_AS_STRING(ident); + buffer = PyUnicode_AS_UNICODE(ident); buffer[0] = '_'; - strncpy(buffer+1, p, plen); - strcpy(buffer+1+plen, name); + Py_UNICODE_strncpy(buffer+1, p, plen); + Py_UNICODE_strcpy(buffer+1+plen, name); return ident; } @@ -259,7 +259,7 @@ PyAST_Compile(mod_ty mod, const char *filename, PyCompilerFlags *flags, int merged; if (!__doc__) { - __doc__ = PyString_InternFromString("__doc__"); + __doc__ = PyUnicode_InternFromString("__doc__"); if (!__doc__) return NULL; } @@ -551,7 +551,7 @@ compiler_new_tmpname(struct compiler *c) { char tmpname[256]; PyOS_snprintf(tmpname, sizeof(tmpname), "_[%d]", ++c->u->u_tmpname); - return PyString_FromString(tmpname); + return PyUnicode_FromString(tmpname); } /* Allocate a new block and return a pointer to it. @@ -1143,7 +1143,7 @@ compiler_mod(struct compiler *c, mod_ty mod) int addNone = 1; static PyObject *module; if (!module) { - module = PyString_FromString(""); + module = PyUnicode_FromString(""); if (!module) return NULL; } @@ -1362,7 +1362,7 @@ compiler_visit_annotations(struct compiler *c, arguments_ty args, goto error; if (!return_str) { - return_str = PyString_InternFromString("return"); + return_str = PyUnicode_InternFromString("return"); if (!return_str) goto error; } @@ -1488,12 +1488,12 @@ compiler_class(struct compiler *c, stmt_ty s) /* initialize statics */ if (build_class == NULL) { - build_class = PyString_FromString("__build_class__"); + build_class = PyUnicode_FromString("__build_class__"); if (build_class == NULL) return 0; } if (locals == NULL) { - locals = PyString_FromString("__locals__"); + locals = PyUnicode_FromString("__locals__"); if (locals == NULL) return 0; } @@ -1533,7 +1533,7 @@ compiler_class(struct compiler *c, stmt_ty s) /* ... and store it into f_locals */ ADDOP_IN_SCOPE(c, STORE_LOCALS); /* load __name__ ... */ - str = PyString_InternFromString("__name__"); + str = PyUnicode_InternFromString("__name__"); if (!str || !compiler_nameop(c, str, Load)) { Py_XDECREF(str); compiler_exit_scope(c); @@ -1541,7 +1541,7 @@ compiler_class(struct compiler *c, stmt_ty s) } Py_DECREF(str); /* ... and store it as __module__ */ - str = PyString_InternFromString("__module__"); + str = PyUnicode_InternFromString("__module__"); if (!str || !compiler_nameop(c, str, Store)) { Py_XDECREF(str); compiler_exit_scope(c); @@ -1627,7 +1627,7 @@ compiler_lambda(struct compiler *c, expr_ty e) assert(e->kind == Lambda_kind); if (!name) { - name = PyString_InternFromString(""); + name = PyUnicode_InternFromString(""); if (!name) return 0; } @@ -2027,17 +2027,17 @@ compiler_import_as(struct compiler *c, identifier name, identifier asname) If there is a dot in name, we need to split it and emit a LOAD_ATTR for each name. */ - const char *src = PyString_AS_STRING(name); - const char *dot = strchr(src, '.'); + const Py_UNICODE *src = PyUnicode_AS_UNICODE(name); + const Py_UNICODE *dot = Py_UNICODE_strchr(src, '.'); if (dot) { /* Consume the base module name to get the first attribute */ src = dot + 1; while (dot) { /* NB src is only defined when dot != NULL */ PyObject *attr; - dot = strchr(src, '.'); - attr = PyString_FromStringAndSize(src, - dot ? dot - src : strlen(src)); + dot = Py_UNICODE_strchr(src, '.'); + attr = PyUnicode_FromUnicode(src, + dot ? dot - src : Py_UNICODE_strlen(src)); if (!attr) return -1; ADDOP_O(c, LOAD_ATTR, attr, names); @@ -2081,11 +2081,11 @@ compiler_import(struct compiler *c, stmt_ty s) } else { identifier tmp = alias->name; - const char *base = PyString_AS_STRING(alias->name); - char *dot = strchr(base, '.'); + const Py_UNICODE *base = PyUnicode_AS_UNICODE(alias->name); + Py_UNICODE *dot = Py_UNICODE_strchr(base, '.'); if (dot) - tmp = PyString_FromStringAndSize(base, - dot - base); + tmp = PyUnicode_FromUnicode(base, + dot - base); r = compiler_nameop(c, tmp, Store); if (dot) { Py_DECREF(tmp); @@ -2122,8 +2122,8 @@ compiler_from_import(struct compiler *c, stmt_ty s) } if (s->lineno > c->c_future->ff_lineno) { - if (!strcmp(PyString_AS_STRING(s->v.ImportFrom.module), - "__future__")) { + if (!PyUnicode_CompareWithASCIIString(s->v.ImportFrom.module, + "__future__")) { Py_DECREF(level); Py_DECREF(names); return compiler_error(c, @@ -2142,7 +2142,7 @@ compiler_from_import(struct compiler *c, stmt_ty s) alias_ty alias = (alias_ty)asdl_seq_GET(s->v.ImportFrom.names, i); identifier store_name; - if (i == 0 && *PyString_AS_STRING(alias->name) == '*') { + if (i == 0 && *PyUnicode_AS_UNICODE(alias->name) == '*') { assert(n == 1); ADDOP(c, IMPORT_STAR); return 1; @@ -2172,7 +2172,7 @@ compiler_assert(struct compiler *c, stmt_ty s) if (Py_OptimizeFlag) return 1; if (assertion_error == NULL) { - assertion_error = PyString_FromString("AssertionError"); + assertion_error = PyUnicode_FromString("AssertionError"); if (assertion_error == NULL) return 0; } @@ -2417,7 +2417,7 @@ compiler_nameop(struct compiler *c, identifier name, expr_context_ty ctx) /* First check for assignment to __debug__. Param? */ if ((ctx == Store || ctx == AugStore || ctx == Del) - && !strcmp(PyString_AS_STRING(name), "__debug__")) { + && !PyUnicode_CompareWithASCIIString(name, "__debug__")) { return compiler_error(c, "can not assign to __debug__"); } @@ -2455,7 +2455,7 @@ mangled = _Py_Mangle(c->u->u_private, name); } /* XXX Leave assert here, but handle __doc__ and the like better */ - assert(scope || PyString_AS_STRING(name)[0] == '_'); + assert(scope || PyUnicode_AS_UNICODE(name)[0] == '_'); switch (optype) { case OP_DEREF: @@ -2889,7 +2889,7 @@ compiler_genexp(struct compiler *c, expr_ty e) { static identifier name; if (!name) { - name = PyString_FromString(""); + name = PyUnicode_FromString(""); if (!name) return 0; } @@ -2904,7 +2904,7 @@ compiler_listcomp(struct compiler *c, expr_ty e) { static identifier name; if (!name) { - name = PyString_FromString(""); + name = PyUnicode_FromString(""); if (!name) return 0; } @@ -2919,7 +2919,7 @@ compiler_setcomp(struct compiler *c, expr_ty e) { static identifier name; if (!name) { - name = PyString_FromString(""); + name = PyUnicode_FromString(""); if (!name) return 0; } @@ -2957,8 +2957,8 @@ expr_constant(expr_ty e) case Name_kind: /* __debug__ is not assignable, so we can optimize * it away in if and while statements */ - if (strcmp(PyString_AS_STRING(e->v.Name.id), - "__debug__") == 0) + if (PyUnicode_CompareWithASCIIString(e->v.Name.id, + "__debug__") == 0) return ! Py_OptimizeFlag; /* fall through */ default: @@ -2999,12 +2999,12 @@ compiler_with(struct compiler *c, stmt_ty s) assert(s->kind == With_kind); if (!enter_attr) { - enter_attr = PyString_InternFromString("__enter__"); + enter_attr = PyUnicode_InternFromString("__enter__"); if (!enter_attr) return 0; } if (!exit_attr) { - exit_attr = PyString_InternFromString("__exit__"); + exit_attr = PyUnicode_InternFromString("__exit__"); if (!exit_attr) return 0; } diff --git a/Python/future.c b/Python/future.c index d6f11a4..2092f58 100644 --- a/Python/future.c +++ b/Python/future.c @@ -55,7 +55,7 @@ future_parse(PyFutureFeatures *ff, mod_ty mod, const char *filename) static PyObject *future; if (!future) { - future = PyString_InternFromString("__future__"); + future = PyUnicode_InternFromString("__future__"); if (!future) return 0; } diff --git a/Python/import.c b/Python/import.c index 75f1e01..0e4e50c 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1920,7 +1920,7 @@ PyImport_ImportFrozenModule(char *name) if (m == NULL) goto err_return; d = PyModule_GetDict(m); - s = PyString_InternFromString(name); + s = PyUnicode_InternFromString(name); if (s == NULL) goto err_return; err = PyDict_SetItemString(d, "__path__", s); @@ -1949,7 +1949,7 @@ PyImport_ImportModule(const char *name) PyObject *pname; PyObject *result; - pname = PyString_FromString(name); + pname = PyUnicode_FromString(name); if (pname == NULL) return NULL; result = PyImport_Import(pname); @@ -2084,12 +2084,12 @@ get_parent(PyObject *globals, char *buf, Py_ssize_t *p_buflen, int level) return Py_None; if (namestr == NULL) { - namestr = PyString_InternFromString("__name__"); + namestr = PyUnicode_InternFromString("__name__"); if (namestr == NULL) return NULL; } if (pathstr == NULL) { - pathstr = PyString_InternFromString("__path__"); + pathstr = PyUnicode_InternFromString("__path__"); if (pathstr == NULL) return NULL; } @@ -2097,9 +2097,18 @@ get_parent(PyObject *globals, char *buf, Py_ssize_t *p_buflen, int level) *buf = '\0'; *p_buflen = 0; modname = PyDict_GetItem(globals, namestr); - if (modname == NULL || !PyString_Check(modname)) + if (modname == NULL || (!PyString_Check(modname) && !PyUnicode_Check(modname))) return Py_None; + if (PyUnicode_Check(modname)) { + /* XXX need to support Unicode better */ + modname = _PyUnicode_AsDefaultEncodedString(modname, NULL); + if (!modname) { + PyErr_Clear(); + return NULL; + } + } + modpath = PyDict_GetItem(globals, pathstr); if (modpath != NULL) { Py_ssize_t len = PyString_GET_SIZE(modname); @@ -2254,13 +2263,23 @@ ensure_fromlist(PyObject *mod, PyObject *fromlist, char *buf, Py_ssize_t buflen, } return 0; } - if (!PyString_Check(item)) { + if (PyString_Check(item)) { + /* XXX there shouldn't be any str8 objects here */ + PyObject *uni = PyUnicode_DecodeASCII(PyString_AsString(item), + PyString_Size(item), + "strict"); + Py_DECREF(item); + if (!uni) + return 0; + item = uni; + } + if (!PyUnicode_Check(item)) { PyErr_SetString(PyExc_TypeError, - "Item in ``from list'' not a string"); + "Item in ``from list'' not a unicode string"); Py_DECREF(item); return 0; } - if (PyString_AS_STRING(item)[0] == '*') { + if (PyUnicode_AS_UNICODE(item)[0] == '*') { PyObject *all; Py_DECREF(item); /* See if the package defines __all__ */ @@ -2279,9 +2298,23 @@ ensure_fromlist(PyObject *mod, PyObject *fromlist, char *buf, Py_ssize_t buflen, } hasit = PyObject_HasAttr(mod, item); if (!hasit) { - char *subname = PyString_AS_STRING(item); + PyObject *item8; + char *subname; PyObject *submod; char *p; + if (!Py_FileSystemDefaultEncoding) { + item8 = PyUnicode_EncodeASCII(PyUnicode_AsUnicode(item), + PyUnicode_GetSize(item), + "strict"); + } else { + item8 = PyUnicode_AsEncodedObject(item, + Py_FileSystemDefaultEncoding, "strict"); + } + if (!item8) { + PyErr_SetString(PyExc_ValueError, "Cannot encode path item"); + return 0; + } + subname = PyBytes_AsString(item8); if (buflen + strlen(subname) >= MAXPATHLEN) { PyErr_SetString(PyExc_ValueError, "Module name too long"); @@ -2292,6 +2325,7 @@ ensure_fromlist(PyObject *mod, PyObject *fromlist, char *buf, Py_ssize_t buflen, *p++ = '.'; strcpy(p, subname); submod = import_submodule(mod, subname, buf); + Py_DECREF(item8); Py_XDECREF(submod); if (submod == NULL) { Py_DECREF(item); @@ -2515,10 +2549,10 @@ PyImport_Import(PyObject *module_name) /* Initialize constant string objects */ if (silly_list == NULL) { - import_str = PyString_InternFromString("__import__"); + import_str = PyUnicode_InternFromString("__import__"); if (import_str == NULL) return NULL; - builtins_str = PyString_InternFromString("__builtins__"); + builtins_str = PyUnicode_InternFromString("__builtins__"); if (builtins_str == NULL) return NULL; silly_list = Py_BuildValue("[s]", "__doc__"); diff --git a/Python/modsupport.c b/Python/modsupport.c index a272ce3..1ea08c3 100644 --- a/Python/modsupport.c +++ b/Python/modsupport.c @@ -65,7 +65,7 @@ Py_InitModule4(const char *name, PyMethodDef *methods, const char *doc, return NULL; d = PyModule_GetDict(m); if (methods != NULL) { - n = PyString_FromString(name); + n = PyUnicode_FromString(name); if (n == NULL) return NULL; for (ml = methods; ml->ml_name != NULL; ml++) { @@ -689,5 +689,5 @@ PyModule_AddIntConstant(PyObject *m, const char *name, long value) int PyModule_AddStringConstant(PyObject *m, const char *name, const char *value) { - return PyModule_AddObject(m, name, PyString_FromString(value)); + return PyModule_AddObject(m, name, PyUnicode_FromString(value)); } diff --git a/Python/pystate.c b/Python/pystate.c index 086789d..1914ba8 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -167,6 +167,8 @@ PyThreadState_New(PyInterpreterState *interp) tstate->frame = NULL; tstate->recursion_depth = 0; + tstate->overflowed = 0; + tstate->recursion_critical = 0; tstate->tracing = 0; tstate->use_tracing = 0; tstate->tick_counter = 0; diff --git a/Python/pythonrun.c b/Python/pythonrun.c index c2005f1..5daf7dd 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -1133,6 +1133,8 @@ PyErr_Display(PyObject *exception, PyObject *value, PyObject *tb) PyObject *f = PySys_GetObject("stderr"); Py_INCREF(value); if (f == NULL) + _PyObject_Dump(value); + if (f == NULL) fprintf(stderr, "lost sys.stderr\n"); else { fflush(stdout); diff --git a/Python/symtable.c b/Python/symtable.c index f3a2c78..5df7318 100644 --- a/Python/symtable.c +++ b/Python/symtable.c @@ -92,7 +92,7 @@ ste_repr(PySTEntryObject *ste) PyOS_snprintf(buf, sizeof(buf), "", - PyString_AS_STRING(ste->ste_name), + PyUnicode_AsString(ste->ste_name), PyInt_AS_LONG(ste->ste_id), ste->ste_lineno); return PyUnicode_FromString(buf); } @@ -190,7 +190,7 @@ static identifier top = NULL, lambda = NULL, genexpr = NULL, listcomp = NULL, setcomp = NULL; #define GET_IDENTIFIER(VAR) \ - ((VAR) ? (VAR) : ((VAR) = PyString_InternFromString(# VAR))) + ((VAR) ? (VAR) : ((VAR) = PyUnicode_InternFromString(# VAR))) #define DUPLICATE_ARGUMENT \ "duplicate argument '%s' in function definition" @@ -390,13 +390,13 @@ analyze_name(PySTEntryObject *ste, PyObject *scopes, PyObject *name, long flags, if (flags & DEF_PARAM) { PyErr_Format(PyExc_SyntaxError, "name '%s' is parameter and global", - PyString_AS_STRING(name)); + PyUnicode_AsString(name)); return 0; } if (flags & DEF_NONLOCAL) { PyErr_Format(PyExc_SyntaxError, "name '%s' is nonlocal and global", - PyString_AS_STRING(name)); + PyUnicode_AsString(name)); return 0; } SET_SCOPE(scopes, name, GLOBAL_EXPLICIT); @@ -410,7 +410,7 @@ analyze_name(PySTEntryObject *ste, PyObject *scopes, PyObject *name, long flags, if (flags & DEF_PARAM) { PyErr_Format(PyExc_SyntaxError, "name '%s' is parameter and nonlocal", - PyString_AS_STRING(name)); + PyUnicode_AsString(name)); return 0; } if (!bound) { @@ -421,7 +421,7 @@ analyze_name(PySTEntryObject *ste, PyObject *scopes, PyObject *name, long flags, if (!PySet_Contains(bound, name)) { PyErr_Format(PyExc_SyntaxError, "no binding for nonlocal '%s' found", - PyString_AS_STRING(name)); + PyUnicode_AsString(name)); return 0; } @@ -524,7 +524,7 @@ check_unoptimized(const PySTEntryObject* ste) { PyOS_snprintf(buf, sizeof(buf), "import * is not allowed in function '%.100s' " "because it is %s", - PyString_AS_STRING(ste->ste_name), trailer); + PyUnicode_AsString(ste->ste_name), trailer); break; } @@ -984,7 +984,7 @@ symtable_new_tmpname(struct symtable *st) PyOS_snprintf(tmpname, sizeof(tmpname), "_[%d]", ++st->st_cur->ste_tmpname); - tmp = PyString_InternFromString(tmpname); + tmp = PyUnicode_InternFromString(tmpname); if (!tmp) return 0; if (!symtable_add_def(st, tmp, DEF_LOCAL)) @@ -1129,7 +1129,7 @@ symtable_visit_stmt(struct symtable *st, stmt_ty s) asdl_seq *seq = s->v.Global.names; for (i = 0; i < asdl_seq_LEN(seq); i++) { identifier name = (identifier)asdl_seq_GET(seq, i); - char *c_name = PyString_AS_STRING(name); + char *c_name = PyUnicode_AsString(name); long cur = symtable_lookup(st, name); if (cur < 0) return 0; @@ -1156,7 +1156,7 @@ symtable_visit_stmt(struct symtable *st, stmt_ty s) asdl_seq *seq = s->v.Nonlocal.names; for (i = 0; i < asdl_seq_LEN(seq); i++) { identifier name = (identifier)asdl_seq_GET(seq, i); - char *c_name = PyString_AS_STRING(name); + char *c_name = PyUnicode_AsString(name); long cur = symtable_lookup(st, name); if (cur < 0) return 0; @@ -1316,7 +1316,7 @@ symtable_visit_expr(struct symtable *st, expr_ty e) static int symtable_implicit_arg(struct symtable *st, int pos) { - PyObject *id = PyString_FromFormat(".%d", pos); + PyObject *id = PyUnicode_FromFormat(".%d", pos); if (id == NULL) return 0; if (!symtable_add_def(st, id, DEF_PARAM)) { @@ -1425,10 +1425,10 @@ symtable_visit_alias(struct symtable *st, alias_ty a) */ PyObject *store_name; PyObject *name = (a->asname == NULL) ? a->name : a->asname; - const char *base = PyString_AS_STRING(name); - char *dot = strchr(base, '.'); + const Py_UNICODE *base = PyUnicode_AS_UNICODE(name); + Py_UNICODE *dot = Py_UNICODE_strchr(base, '.'); if (dot) { - store_name = PyString_FromStringAndSize(base, dot - base); + store_name = PyUnicode_FromUnicode(base, dot - base); if (!store_name) return 0; } @@ -1436,7 +1436,7 @@ symtable_visit_alias(struct symtable *st, alias_ty a) store_name = name; Py_INCREF(store_name); } - if (strcmp(PyString_AS_STRING(name), "*")) { + if (PyUnicode_CompareWithASCIIString(name, "*")) { int r = symtable_add_def(st, store_name, DEF_IMPORT); Py_DECREF(store_name); return r; diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 30e0180..1b7674b 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -280,7 +280,7 @@ trace_init(void) int i; for (i = 0; i < 7; ++i) { if (whatstrings[i] == NULL) { - name = PyString_InternFromString(whatnames[i]); + name = PyUnicode_InternFromString(whatnames[i]); if (name == NULL) return -1; whatstrings[i] = name; @@ -801,7 +801,7 @@ list_builtin_module_names(void) if (list == NULL) return NULL; for (i = 0; PyImport_Inittab[i].name != NULL; i++) { - PyObject *name = PyString_FromString( + PyObject *name = PyUnicode_FromString( PyImport_Inittab[i].name); if (name == NULL) break; -- cgit v0.12