From 3a645e4dd4eebbfbbfad8443558bb3b879e23896 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lemburg?= Date: Tue, 16 Jan 2001 11:54:12 +0000 Subject: Added checks to prevent PyUnicode_Count() from dumping core in case the parameters are out of bounds and fixes error handling for .count(), .startswith() and .endswith() for the case of mixed string/Unicode objects. This patch adds Python style index semantics to PyUnicode_Count() indices (including the special handling of negative indices). The patch is an extended version of patch #103249 submitted by Michael Hudson (mwh) on SF. It also includes new test cases. --- Lib/test/string_tests.py | 4 ++++ Lib/test/test_unicode.py | 7 +++++++ Objects/stringobject.c | 37 ++++++++++++++++++++++++++----------- Objects/unicodeobject.c | 27 +++++++++++++++++++-------- 4 files changed, 56 insertions(+), 19 deletions(-) diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index d4041be..067ccca 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -53,6 +53,10 @@ def run_method_tests(test): test('capitalize', ' hello ', ' hello ') test('capitalize', 'hello ', 'Hello ') + + test('count', 'aaa', 3, 'a') + test('count', 'aaa', 0, 'b') + test('find', 'abcdefghiabc', 0, 'abc') test('find', 'abcdefghiabc', 9, 'abc', 1) test('find', 'abcdefghiabc', -1, 'def', 4) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 579bab1..c71f927 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -32,6 +32,13 @@ def test(method, input, output, *args): test('capitalize', u' hello ', u' hello ') test('capitalize', u'hello ', u'Hello ') +test('count', u'aaa', 3, u'a') +test('count', u'aaa', 0, u'b') +test('count', 'aaa', 3, u'a') +test('count', 'aaa', 0, u'b') +test('count', u'aaa', 3, 'a') +test('count', u'aaa', 0, 'b') + test('title', u' hello ', u' Hello ') test('title', u'hello ', u'Hello ') test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String') diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 091ede7..eed4687 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -1236,9 +1236,14 @@ string_count(PyStringObject *self, PyObject *args) sub = PyString_AS_STRING(subobj); n = PyString_GET_SIZE(subobj); } - else if (PyUnicode_Check(subobj)) - return PyInt_FromLong( - PyUnicode_Count((PyObject *)self, subobj, i, last)); + else if (PyUnicode_Check(subobj)) { + int count; + count = PyUnicode_Count((PyObject *)self, subobj, i, last); + if (count == -1) + return NULL; + else + return PyInt_FromLong((long) count); + } else if (PyObject_AsCharBuffer(subobj, &sub, &n)) return NULL; @@ -1637,10 +1642,15 @@ string_startswith(PyStringObject *self, PyObject *args) prefix = PyString_AS_STRING(subobj); plen = PyString_GET_SIZE(subobj); } - else if (PyUnicode_Check(subobj)) - return PyInt_FromLong( - PyUnicode_Tailmatch((PyObject *)self, - subobj, start, end, -1)); + else if (PyUnicode_Check(subobj)) { + int rc; + rc = PyUnicode_Tailmatch((PyObject *)self, + subobj, start, end, -1); + if (rc == -1) + return NULL; + else + return PyInt_FromLong((long) rc); + } else if (PyObject_AsCharBuffer(subobj, &prefix, &plen)) return NULL; @@ -1690,10 +1700,15 @@ string_endswith(PyStringObject *self, PyObject *args) suffix = PyString_AS_STRING(subobj); slen = PyString_GET_SIZE(subobj); } - else if (PyUnicode_Check(subobj)) - return PyInt_FromLong( - PyUnicode_Tailmatch((PyObject *)self, - subobj, start, end, +1)); + else if (PyUnicode_Check(subobj)) { + int rc; + rc = PyUnicode_Tailmatch((PyObject *)self, + subobj, start, end, +1); + if (rc == -1) + return NULL; + else + return PyInt_FromLong((long) rc); + } else if (PyObject_AsCharBuffer(subobj, &suffix, &slen)) return NULL; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 8f7b354..a3678d5 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1976,7 +1976,7 @@ PyObject *PyUnicode_DecodeCharmap(const char *s, x = Py_None; Py_INCREF(x); } else - goto onError; + goto onError; } /* Apply mapping */ @@ -2014,9 +2014,9 @@ PyObject *PyUnicode_DecodeCharmap(const char *s, (targetsize << 2); extrachars += needed; if (_PyUnicode_Resize(v, PyUnicode_GET_SIZE(v) + needed)) { - Py_DECREF(x); - goto onError; - } + Py_DECREF(x); + goto onError; + } p = PyUnicode_AS_UNICODE(v) + oldpos; } Py_UNICODE_COPY(p, @@ -2112,7 +2112,7 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p, x = Py_None; Py_INCREF(x); } else - goto onError; + goto onError; } /* Apply mapping */ @@ -2150,9 +2150,9 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p, (targetsize << 2); extrachars += needed; if (_PyString_Resize(&v, PyString_GET_SIZE(v) + needed)) { - Py_DECREF(x); - goto onError; - } + Py_DECREF(x); + goto onError; + } s = PyString_AS_STRING(v) + oldpos; } memcpy(s, @@ -2392,6 +2392,17 @@ int count(PyUnicodeObject *self, { int count = 0; + if (start < 0) + start += self->length; + if (start < 0) + start = 0; + if (end > self->length) + end = self->length; + if (end < 0) + end += self->length; + if (end < 0) + end = 0; + if (substring->length == 0) return (end - start + 1); -- cgit v0.12