From e63d7dae90d15957303688285daeebc2e931e04b Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Thu, 6 Oct 2022 18:20:22 +0300 Subject: gh-94808: Cover `PyUnicode_Count` in CAPI (#96929) --- Lib/test/test_unicode.py | 38 ++++++++++++++++++++++++++++++++++++++ Modules/_testcapi/unicode.c | 21 +++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 63bccb7..30faaaf 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2945,6 +2945,44 @@ class CAPITest(unittest.TestCase): self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4)) self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc') + # Test PyUnicode_Count() + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_count(self): + from _testcapi import unicode_count + + st = 'abcabd' + self.assertEqual(unicode_count(st, 'a', 0, len(st)), 2) + self.assertEqual(unicode_count(st, 'ab', 0, len(st)), 2) + self.assertEqual(unicode_count(st, 'abc', 0, len(st)), 1) + self.assertEqual(unicode_count(st, 'а', 0, len(st)), 0) # cyrillic "a" + # start < end + self.assertEqual(unicode_count(st, 'a', 3, len(st)), 1) + self.assertEqual(unicode_count(st, 'a', 4, len(st)), 0) + self.assertEqual(unicode_count(st, 'a', 0, sys.maxsize), 2) + # start >= end + self.assertEqual(unicode_count(st, 'abc', 0, 0), 0) + self.assertEqual(unicode_count(st, 'a', 3, 2), 0) + self.assertEqual(unicode_count(st, 'a', sys.maxsize, 5), 0) + # negative + self.assertEqual(unicode_count(st, 'ab', -len(st), -1), 2) + self.assertEqual(unicode_count(st, 'a', -len(st), -3), 1) + # wrong args + self.assertRaises(TypeError, unicode_count, 'a', 'a') + self.assertRaises(TypeError, unicode_count, 'a', 'a', 1) + self.assertRaises(TypeError, unicode_count, 1, 'a', 0, 1) + self.assertRaises(TypeError, unicode_count, 'a', 1, 0, 1) + # empty string + self.assertEqual(unicode_count('abc', '', 0, 3), 4) + self.assertEqual(unicode_count('abc', '', 1, 3), 3) + self.assertEqual(unicode_count('', '', 0, 1), 1) + self.assertEqual(unicode_count('', 'a', 0, 1), 0) + # different unicode kinds + for uni in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1": + for ch in uni: + self.assertEqual(unicode_count(uni, ch, 0, len(uni)), 1) + self.assertEqual(unicode_count(st, ch, 0, len(st)), 0) + # Test PyUnicode_FindChar() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c index d0f1e2a..d5c4a9e 100644 --- a/Modules/_testcapi/unicode.c +++ b/Modules/_testcapi/unicode.c @@ -224,6 +224,26 @@ unicode_asutf8andsize(PyObject *self, PyObject *args) } static PyObject * +unicode_count(PyObject *self, PyObject *args) +{ + PyObject *str; + PyObject *substr; + Py_ssize_t result; + Py_ssize_t start, end; + + if (!PyArg_ParseTuple(args, "UUnn:unicode_count", &str, &substr, + &start, &end)) { + return NULL; + } + + result = PyUnicode_Count(str, substr, start, end); + if (result == -1) + return NULL; + else + return PyLong_FromSsize_t(result); +} + +static PyObject * unicode_findchar(PyObject *self, PyObject *args) { PyObject *str; @@ -696,6 +716,7 @@ static PyMethodDef TestMethods[] = { {"unicode_asucs4", unicode_asucs4, METH_VARARGS}, {"unicode_asutf8", unicode_asutf8, METH_VARARGS}, {"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS}, + {"unicode_count", unicode_count, METH_VARARGS}, {"unicode_findchar", unicode_findchar, METH_VARARGS}, {"unicode_copycharacters", unicode_copycharacters, METH_VARARGS}, {NULL}, -- cgit v0.12