summaryrefslogtreecommitdiffstats
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2007-06-10 09:51:05 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2007-06-10 09:51:05 (GMT)
commit5b222135f8d2492713994f2cb003980e87ce6a72 (patch)
tree3ac3a6a1d7805360ed779e884ca6c4b3f000321f /Objects/unicodeobject.c
parent38e43c25eede3fa77d90ac8183cc0335f4861f4a (diff)
downloadcpython-5b222135f8d2492713994f2cb003980e87ce6a72.zip
cpython-5b222135f8d2492713994f2cb003980e87ce6a72.tar.gz
cpython-5b222135f8d2492713994f2cb003980e87ce6a72.tar.bz2
Make identifiers str (not str8) objects throughout.
This affects the parser, various object implementations, and all places that put identifiers into C string literals. In testing, a number of crashes occurred as code would fail when the recursion limit was reached (such as the Unicode interning dictionary having key/value pairs where key is not value). To solve these, I added an overflowed flag, which allows for 50 more recursions after the limit was reached and the exception was raised, and a recursion_critical flag, which indicates that recursion absolutely must be allowed, i.e. that a certain call must not cause a stack overflow exception. There are still some places where both str and str8 are accepted as identifiers; these should eventually be removed.
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c97
1 files changed, 94 insertions, 3 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index e793418..87c5c99 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -458,8 +458,10 @@ PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
/* Copy the Unicode data into the new object */
if (u != NULL) {
Py_UNICODE *p = unicode->str;
- while ((*p++ = *u++))
- ;
+ while (size--)
+ *p++ = *u++;
+ /* Don't need to write trailing 0 because
+ that's already done by _PyUnicode_New */
}
return (PyObject *)unicode;
@@ -1184,6 +1186,16 @@ PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
return v;
}
+char*
+PyUnicode_AsString(PyObject *unicode)
+{
+ assert(PyUnicode_Check(unicode));
+ unicode = _PyUnicode_AsDefaultEncodedString(unicode, NULL);
+ if (!unicode)
+ return NULL;
+ return PyString_AsString(unicode);
+}
+
Py_UNICODE *PyUnicode_AsUnicode(PyObject *unicode)
{
if (!PyUnicode_Check(unicode)) {
@@ -3247,7 +3259,7 @@ PyObject *PyUnicode_DecodeASCII(const char *s,
goto onError;
}
}
- if (p - PyUnicode_AS_UNICODE(v) < PyString_GET_SIZE(v))
+ if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
goto onError;
Py_XDECREF(errorHandler);
@@ -5861,6 +5873,24 @@ int PyUnicode_Compare(PyObject *left,
return -1;
}
+int
+PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
+{
+ int i;
+ Py_UNICODE *id;
+ assert(PyUnicode_Check(uni));
+ id = PyUnicode_AS_UNICODE(uni);
+ /* Compare Unicode string and source character set string */
+ for (i = 0; id[i] && str[i]; i++)
+ if (id[i] != str[i])
+ return ((int)id[i] < (int)str[i]) ? -1 : 1;
+ if (id[i])
+ return 1; /* uni is longer */
+ if (str[i])
+ return -1; /* str is longer */
+ return 0;
+}
+
PyObject *PyUnicode_RichCompare(PyObject *left,
PyObject *right,
int op)
@@ -8671,7 +8701,13 @@ PyUnicode_InternInPlace(PyObject **p)
return;
}
}
+ /* It might be that the GetItem call fails even
+ though the key is present in the dictionary,
+ namely when this happens during a stack overflow. */
+ Py_ALLOW_RECURSION
t = PyDict_GetItem(interned, (PyObject *)s);
+ Py_END_ALLOW_RECURSION
+
if (t) {
Py_INCREF(t);
Py_DECREF(*p);
@@ -8679,10 +8715,13 @@ PyUnicode_InternInPlace(PyObject **p)
return;
}
+ PyThreadState_GET()->recursion_critical = 1;
if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
PyErr_Clear();
+ PyThreadState_GET()->recursion_critical = 0;
return;
}
+ PyThreadState_GET()->recursion_critical = 0;
/* The two references in interned are not counted by refcnt.
The deallocator will take care of this */
s->ob_refcnt -= 2;
@@ -8879,6 +8918,58 @@ unicode_iter(PyObject *seq)
return (PyObject *)it;
}
+size_t
+Py_UNICODE_strlen(const Py_UNICODE *u)
+{
+ int res = 0;
+ while(*u++)
+ res++;
+ return res;
+}
+
+Py_UNICODE*
+Py_UNICODE_strcpy(Py_UNICODE *s1, const Py_UNICODE *s2)
+{
+ Py_UNICODE *u = s1;
+ while ((*u++ = *s2++));
+ return s1;
+}
+
+Py_UNICODE*
+Py_UNICODE_strncpy(Py_UNICODE *s1, const Py_UNICODE *s2, size_t n)
+{
+ Py_UNICODE *u = s1;
+ while ((*u++ = *s2++))
+ if (n-- == 0)
+ break;
+ return s1;
+}
+
+int
+Py_UNICODE_strcmp(const Py_UNICODE *s1, const Py_UNICODE *s2)
+{
+ while (*s1 && *s2 && *s1 == *s2)
+ s1++, s2++;
+ if (*s1 && *s2)
+ return (*s1 < *s2) ? -1 : +1;
+ if (*s1)
+ return 1;
+ if (*s2)
+ return -1;
+ return 0;
+}
+
+Py_UNICODE*
+Py_UNICODE_strchr(const Py_UNICODE *s, Py_UNICODE c)
+{
+ const Py_UNICODE *p;
+ for (p = s; *p; p++)
+ if (*p == c)
+ return (Py_UNICODE*)p;
+ return NULL;
+}
+
+
#ifdef __cplusplus
}
#endif