diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2007-06-10 09:51:05 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2007-06-10 09:51:05 (GMT) |
commit | 5b222135f8d2492713994f2cb003980e87ce6a72 (patch) | |
tree | 3ac3a6a1d7805360ed779e884ca6c4b3f000321f /Objects/unicodeobject.c | |
parent | 38e43c25eede3fa77d90ac8183cc0335f4861f4a (diff) | |
download | cpython-5b222135f8d2492713994f2cb003980e87ce6a72.zip cpython-5b222135f8d2492713994f2cb003980e87ce6a72.tar.gz cpython-5b222135f8d2492713994f2cb003980e87ce6a72.tar.bz2 |
Make identifiers str (not str8) objects throughout.
This affects the parser, various object implementations,
and all places that put identifiers into C string literals.
In testing, a number of crashes occurred as code would
fail when the recursion limit was reached (such as the
Unicode interning dictionary having key/value pairs where
key is not value). To solve these, I added an overflowed
flag, which allows for 50 more recursions after the
limit was reached and the exception was raised, and
a recursion_critical flag, which indicates that recursion
absolutely must be allowed, i.e. that a certain call
must not cause a stack overflow exception.
There are still some places where both str and str8 are
accepted as identifiers; these should eventually be
removed.
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 97 |
1 files changed, 94 insertions, 3 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e793418..87c5c99 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -458,8 +458,10 @@ PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size) /* Copy the Unicode data into the new object */ if (u != NULL) { Py_UNICODE *p = unicode->str; - while ((*p++ = *u++)) - ; + while (size--) + *p++ = *u++; + /* Don't need to write trailing 0 because + that's already done by _PyUnicode_New */ } return (PyObject *)unicode; @@ -1184,6 +1186,16 @@ PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode, return v; } +char* +PyUnicode_AsString(PyObject *unicode) +{ + assert(PyUnicode_Check(unicode)); + unicode = _PyUnicode_AsDefaultEncodedString(unicode, NULL); + if (!unicode) + return NULL; + return PyString_AsString(unicode); +} + Py_UNICODE *PyUnicode_AsUnicode(PyObject *unicode) { if (!PyUnicode_Check(unicode)) { @@ -3247,7 +3259,7 @@ PyObject *PyUnicode_DecodeASCII(const char *s, goto onError; } } - if (p - PyUnicode_AS_UNICODE(v) < PyString_GET_SIZE(v)) + if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v)) if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0) goto onError; Py_XDECREF(errorHandler); @@ -5861,6 +5873,24 @@ int PyUnicode_Compare(PyObject *left, return -1; } +int +PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) +{ + int i; + Py_UNICODE *id; + assert(PyUnicode_Check(uni)); + id = PyUnicode_AS_UNICODE(uni); + /* Compare Unicode string and source character set string */ + for (i = 0; id[i] && str[i]; i++) + if (id[i] != str[i]) + return ((int)id[i] < (int)str[i]) ? -1 : 1; + if (id[i]) + return 1; /* uni is longer */ + if (str[i]) + return -1; /* str is longer */ + return 0; +} + PyObject *PyUnicode_RichCompare(PyObject *left, PyObject *right, int op) @@ -8671,7 +8701,13 @@ PyUnicode_InternInPlace(PyObject **p) return; } } + /* It might be that the GetItem call fails even + though the key is present in the dictionary, + namely when this happens during a stack overflow. */ + Py_ALLOW_RECURSION t = PyDict_GetItem(interned, (PyObject *)s); + Py_END_ALLOW_RECURSION + if (t) { Py_INCREF(t); Py_DECREF(*p); @@ -8679,10 +8715,13 @@ PyUnicode_InternInPlace(PyObject **p) return; } + PyThreadState_GET()->recursion_critical = 1; if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) { PyErr_Clear(); + PyThreadState_GET()->recursion_critical = 0; return; } + PyThreadState_GET()->recursion_critical = 0; /* The two references in interned are not counted by refcnt. The deallocator will take care of this */ s->ob_refcnt -= 2; @@ -8879,6 +8918,58 @@ unicode_iter(PyObject *seq) return (PyObject *)it; } +size_t +Py_UNICODE_strlen(const Py_UNICODE *u) +{ + int res = 0; + while(*u++) + res++; + return res; +} + +Py_UNICODE* +Py_UNICODE_strcpy(Py_UNICODE *s1, const Py_UNICODE *s2) +{ + Py_UNICODE *u = s1; + while ((*u++ = *s2++)); + return s1; +} + +Py_UNICODE* +Py_UNICODE_strncpy(Py_UNICODE *s1, const Py_UNICODE *s2, size_t n) +{ + Py_UNICODE *u = s1; + while ((*u++ = *s2++)) + if (n-- == 0) + break; + return s1; +} + +int +Py_UNICODE_strcmp(const Py_UNICODE *s1, const Py_UNICODE *s2) +{ + while (*s1 && *s2 && *s1 == *s2) + s1++, s2++; + if (*s1 && *s2) + return (*s1 < *s2) ? -1 : +1; + if (*s1) + return 1; + if (*s2) + return -1; + return 0; +} + +Py_UNICODE* +Py_UNICODE_strchr(const Py_UNICODE *s, Py_UNICODE c) +{ + const Py_UNICODE *p; + for (p = s; *p; p++) + if (*p == c) + return (Py_UNICODE*)p; + return NULL; +} + + #ifdef __cplusplus } #endif |