diff options
author | Xiang Zhang <angwerzx@126.com> | 2016-12-20 14:52:33 (GMT) |
---|---|---|
committer | Xiang Zhang <angwerzx@126.com> | 2016-12-20 14:52:33 (GMT) |
commit | b211068f5c8e2535ab2dd7f4c43325bbf5b30fad (patch) | |
tree | d18683bf67a43286f7184b39a8e3b49f036cc466 | |
parent | 38f225dd486ab69779eab3cae4fa2375f6c2d8d6 (diff) | |
download | cpython-b211068f5c8e2535ab2dd7f4c43325bbf5b30fad.zip cpython-b211068f5c8e2535ab2dd7f4c43325bbf5b30fad.tar.gz cpython-b211068f5c8e2535ab2dd7f4c43325bbf5b30fad.tar.bz2 |
Issue #28822: Adjust indices handling of PyUnicode_FindChar().
-rw-r--r-- | Doc/c-api/unicode.rst | 3 | ||||
-rw-r--r-- | Lib/test/test_unicode.py | 23 | ||||
-rw-r--r-- | Misc/NEWS | 3 | ||||
-rw-r--r-- | Modules/_testcapimodule.c | 22 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 12 |
5 files changed, 55 insertions, 8 deletions
diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index b31d689..b57d70a 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -1625,6 +1625,9 @@ They all return *NULL* or ``-1`` if an exception occurs. .. versionadded:: 3.3 + .. versionchanged:: 3.7 + *start* and *end* are now adjusted to behave like ``str[start:end]``. + .. c:function:: Py_ssize_t PyUnicode_Count(PyObject *str, PyObject *substr, \ Py_ssize_t start, Py_ssize_t end) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 883c362..fb77ffb 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2728,6 +2728,29 @@ class CAPITest(unittest.TestCase): self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0') self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff') + # Test PyUnicode_FindChar() + @support.cpython_only + def test_findchar(self): + from _testcapi import unicode_findchar + + for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1": + for i, ch in enumerate(str): + self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), 1), i) + self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), -1), i) + + str = "!>_<!" + self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), 1), -1) + self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), -1), -1) + # start < end + self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, 1), 4) + self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, -1), 4) + # start >= end + self.assertEqual(unicode_findchar(str, ord('!'), 0, 0, 1), -1) + self.assertEqual(unicode_findchar(str, ord('!'), len(str), 0, 1), -1) + # negative + self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, 1), 0) + self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, -1), 0) + # Test PyUnicode_CopyCharacters() @support.cpython_only def test_copycharacters(self): @@ -545,6 +545,9 @@ Windows C API ----- +- Issue #28822: The indices parameters *start* and *end* of PyUnicode_FindChar() + are now adjusted to behave like ``str[start:end]``. + - Issue #28808: PyUnicode_CompareWithASCIIString() now never raises exceptions. - Issue #28761: The fields name and doc of structures PyMemberDef, PyGetSetDef, diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 8d4346c..ef5f9d4 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1888,6 +1888,27 @@ unicode_asucs4(PyObject *self, PyObject *args) } static PyObject * +unicode_findchar(PyObject *self, PyObject *args) +{ + PyObject *str; + int direction; + unsigned int ch; + Py_ssize_t result; + Py_ssize_t start, end; + + if (!PyArg_ParseTuple(args, "UInni:unicode_findchar", &str, &ch, + &start, &end, &direction)) { + return NULL; + } + + result = PyUnicode_FindChar(str, (Py_UCS4)ch, start, end, direction); + if (result == -2) + return NULL; + else + return PyLong_FromSsize_t(result); +} + +static PyObject * unicode_copycharacters(PyObject *self, PyObject *args) { PyObject *from, *to, *to_copy; @@ -4121,6 +4142,7 @@ static PyMethodDef TestMethods[] = { {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS}, {"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS}, {"unicode_asucs4", unicode_asucs4, METH_VARARGS}, + {"unicode_findchar", unicode_findchar, METH_VARARGS}, {"unicode_copycharacters", unicode_copycharacters, METH_VARARGS}, {"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS}, {"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS}, diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3fdce82..bbda4d8 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9461,16 +9461,12 @@ PyUnicode_FindChar(PyObject *str, Py_UCS4 ch, int direction) { int kind; - Py_ssize_t result; + Py_ssize_t len, result; if (PyUnicode_READY(str) == -1) return -2; - if (start < 0 || end < 0) { - PyErr_SetString(PyExc_IndexError, "string index out of range"); - return -2; - } - if (end > PyUnicode_GET_LENGTH(str)) - end = PyUnicode_GET_LENGTH(str); - if (start >= end) + len = PyUnicode_GET_LENGTH(str); + ADJUST_INDICES(start, end, len); + if (end - start < 1) return -1; kind = PyUnicode_KIND(str); result = findchar(PyUnicode_1BYTE_DATA(str) + kind*start, |