diff options
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 353 |
1 files changed, 144 insertions, 209 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index eb83312..e135638 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9194,75 +9194,6 @@ _PyUnicode_InsertThousandsGrouping( return count; } -static Py_ssize_t -unicode_count_impl(PyObject *str, - PyObject *substr, - Py_ssize_t start, - Py_ssize_t end) -{ - assert(PyUnicode_Check(str)); - assert(PyUnicode_Check(substr)); - - Py_ssize_t result; - int kind1, kind2; - const void *buf1 = NULL, *buf2 = NULL; - Py_ssize_t len1, len2; - - kind1 = PyUnicode_KIND(str); - kind2 = PyUnicode_KIND(substr); - if (kind1 < kind2) - return 0; - - len1 = PyUnicode_GET_LENGTH(str); - len2 = PyUnicode_GET_LENGTH(substr); - ADJUST_INDICES(start, end, len1); - if (end - start < len2) - return 0; - - buf1 = PyUnicode_DATA(str); - buf2 = PyUnicode_DATA(substr); - if (kind2 != kind1) { - buf2 = unicode_askind(kind2, buf2, len2, kind1); - if (!buf2) - goto onError; - } - - // We don't reuse `anylib_count` here because of the explicit casts. - switch (kind1) { - case PyUnicode_1BYTE_KIND: - result = ucs1lib_count( - ((const Py_UCS1*)buf1) + start, end - start, - buf2, len2, PY_SSIZE_T_MAX - ); - break; - case PyUnicode_2BYTE_KIND: - result = ucs2lib_count( - ((const Py_UCS2*)buf1) + start, end - start, - buf2, len2, PY_SSIZE_T_MAX - ); - break; - case PyUnicode_4BYTE_KIND: - result = ucs4lib_count( - ((const Py_UCS4*)buf1) + start, end - start, - buf2, len2, PY_SSIZE_T_MAX - ); - break; - default: - Py_UNREACHABLE(); - } - - assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr))); - if (kind2 != kind1) - PyMem_Free((void *)buf2); - - return result; - onError: - assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr))); - if (kind2 != kind1) - PyMem_Free((void *)buf2); - return -1; -} - Py_ssize_t PyUnicode_Count(PyObject *str, PyObject *substr, @@ -11131,47 +11062,87 @@ PyUnicode_AppendAndDel(PyObject **pleft, PyObject *right) Py_XDECREF(right); } -/* -Wraps asciilib_parse_args_finds() and additionally ensures that the -first argument is a unicode object. -*/ +/*[clinic input] +@text_signature "($self, sub[, start[, end]], /)" +str.count as unicode_count -> Py_ssize_t -static inline int -parse_args_finds_unicode(const char * function_name, PyObject *args, - PyObject **substring, - Py_ssize_t *start, Py_ssize_t *end) -{ - if (asciilib_parse_args_finds(function_name, args, substring, start, end)) { - if (ensure_unicode(*substring) < 0) - return 0; - return 1; - } - return 0; -} + self as str: self + sub as substr: unicode + start: slice_index(accept={int, NoneType}, c_default='0') = None + end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None + / -PyDoc_STRVAR(count__doc__, - "S.count(sub[, start[, end]]) -> int\n\ -\n\ -Return the number of non-overlapping occurrences of substring sub in\n\ -string S[start:end]. Optional arguments start and end are\n\ -interpreted as in slice notation."); +Return the number of non-overlapping occurrences of substring sub in string S[start:end]. -static PyObject * -unicode_count(PyObject *self, PyObject *args) +Optional arguments start and end are interpreted as in slice notation. +[clinic start generated code]*/ + +static Py_ssize_t +unicode_count_impl(PyObject *str, PyObject *substr, Py_ssize_t start, + Py_ssize_t end) +/*[clinic end generated code: output=8fcc3aef0b18edbf input=6f168ffd94be8785]*/ { - PyObject *substring = NULL; /* initialize to fix a compiler warning */ - Py_ssize_t start = 0; - Py_ssize_t end = PY_SSIZE_T_MAX; + assert(PyUnicode_Check(str)); + assert(PyUnicode_Check(substr)); + Py_ssize_t result; + int kind1, kind2; + const void *buf1 = NULL, *buf2 = NULL; + Py_ssize_t len1, len2; - if (!parse_args_finds_unicode("count", args, &substring, &start, &end)) - return NULL; + kind1 = PyUnicode_KIND(str); + kind2 = PyUnicode_KIND(substr); + if (kind1 < kind2) + return 0; - result = unicode_count_impl(self, substring, start, end); - if (result == -1) - return NULL; + len1 = PyUnicode_GET_LENGTH(str); + len2 = PyUnicode_GET_LENGTH(substr); + ADJUST_INDICES(start, end, len1); + if (end - start < len2) + return 0; - return PyLong_FromSsize_t(result); + buf1 = PyUnicode_DATA(str); + buf2 = PyUnicode_DATA(substr); + if (kind2 != kind1) { + buf2 = unicode_askind(kind2, buf2, len2, kind1); + if (!buf2) + goto onError; + } + + // We don't reuse `anylib_count` here because of the explicit casts. + switch (kind1) { + case PyUnicode_1BYTE_KIND: + result = ucs1lib_count( + ((const Py_UCS1*)buf1) + start, end - start, + buf2, len2, PY_SSIZE_T_MAX + ); + break; + case PyUnicode_2BYTE_KIND: + result = ucs2lib_count( + ((const Py_UCS2*)buf1) + start, end - start, + buf2, len2, PY_SSIZE_T_MAX + ); + break; + case PyUnicode_4BYTE_KIND: + result = ucs4lib_count( + ((const Py_UCS4*)buf1) + start, end - start, + buf2, len2, PY_SSIZE_T_MAX + ); + break; + default: + Py_UNREACHABLE(); + } + + assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr))); + if (kind2 != kind1) + PyMem_Free((void *)buf2); + + return result; + onError: + assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr))); + if (kind2 != kind1) + PyMem_Free((void *)buf2); + return -1; } /*[clinic input] @@ -11282,33 +11253,25 @@ unicode_expandtabs_impl(PyObject *self, int tabsize) return NULL; } -PyDoc_STRVAR(find__doc__, - "S.find(sub[, start[, end]]) -> int\n\ -\n\ -Return the lowest index in S where substring sub is found,\n\ -such that sub is contained within S[start:end]. Optional\n\ -arguments start and end are interpreted as in slice notation.\n\ -\n\ -Return -1 on failure."); - -static PyObject * -unicode_find(PyObject *self, PyObject *args) -{ - /* initialize variables to prevent gcc warning */ - PyObject *substring = NULL; - Py_ssize_t start = 0; - Py_ssize_t end = 0; - Py_ssize_t result; - - if (!parse_args_finds_unicode("find", args, &substring, &start, &end)) - return NULL; +/*[clinic input] +str.find as unicode_find = str.count - result = any_find_slice(self, substring, start, end, 1); +Return the lowest index in S where substring sub is found, such that sub is contained within S[start:end]. - if (result == -2) - return NULL; +Optional arguments start and end are interpreted as in slice notation. +Return -1 on failure. +[clinic start generated code]*/ - return PyLong_FromSsize_t(result); +static Py_ssize_t +unicode_find_impl(PyObject *str, PyObject *substr, Py_ssize_t start, + Py_ssize_t end) +/*[clinic end generated code: output=51dbe6255712e278 input=4a89d2d68ef57256]*/ +{ + Py_ssize_t result = any_find_slice(str, substr, start, end, 1); + if (result < 0) { + return -1; + } + return result; } static PyObject * @@ -11351,38 +11314,28 @@ unicode_hash(PyObject *self) return x; } -PyDoc_STRVAR(index__doc__, - "S.index(sub[, start[, end]]) -> int\n\ -\n\ -Return the lowest index in S where substring sub is found,\n\ -such that sub is contained within S[start:end]. Optional\n\ -arguments start and end are interpreted as in slice notation.\n\ -\n\ -Raises ValueError when the substring is not found."); - -static PyObject * -unicode_index(PyObject *self, PyObject *args) -{ - /* initialize variables to prevent gcc warning */ - Py_ssize_t result; - PyObject *substring = NULL; - Py_ssize_t start = 0; - Py_ssize_t end = 0; - - if (!parse_args_finds_unicode("index", args, &substring, &start, &end)) - return NULL; +/*[clinic input] +str.index as unicode_index = str.count - result = any_find_slice(self, substring, start, end, 1); +Return the lowest index in S where substring sub is found, such that sub is contained within S[start:end]. - if (result == -2) - return NULL; +Optional arguments start and end are interpreted as in slice notation. +Raises ValueError when the substring is not found. +[clinic start generated code]*/ - if (result < 0) { +static Py_ssize_t +unicode_index_impl(PyObject *str, PyObject *substr, Py_ssize_t start, + Py_ssize_t end) +/*[clinic end generated code: output=77558288837cdf40 input=d986aeac0be14a1c]*/ +{ + Py_ssize_t result = any_find_slice(str, substr, start, end, 1); + if (result == -1) { PyErr_SetString(PyExc_ValueError, "substring not found"); - return NULL; } - - return PyLong_FromSsize_t(result); + else if (result < 0) { + return -1; + } + return result; } /*[clinic input] @@ -12462,67 +12415,49 @@ unicode_repr(PyObject *unicode) return repr; } -PyDoc_STRVAR(rfind__doc__, - "S.rfind(sub[, start[, end]]) -> int\n\ -\n\ -Return the highest index in S where substring sub is found,\n\ -such that sub is contained within S[start:end]. Optional\n\ -arguments start and end are interpreted as in slice notation.\n\ -\n\ -Return -1 on failure."); - -static PyObject * -unicode_rfind(PyObject *self, PyObject *args) -{ - /* initialize variables to prevent gcc warning */ - PyObject *substring = NULL; - Py_ssize_t start = 0; - Py_ssize_t end = 0; - Py_ssize_t result; - - if (!parse_args_finds_unicode("rfind", args, &substring, &start, &end)) - return NULL; - - result = any_find_slice(self, substring, start, end, -1); - - if (result == -2) - return NULL; +/*[clinic input] +str.rfind as unicode_rfind = str.count - return PyLong_FromSsize_t(result); -} +Return the highest index in S where substring sub is found, such that sub is contained within S[start:end]. -PyDoc_STRVAR(rindex__doc__, - "S.rindex(sub[, start[, end]]) -> int\n\ -\n\ -Return the highest index in S where substring sub is found,\n\ -such that sub is contained within S[start:end]. Optional\n\ -arguments start and end are interpreted as in slice notation.\n\ -\n\ -Raises ValueError when the substring is not found."); +Optional arguments start and end are interpreted as in slice notation. +Return -1 on failure. +[clinic start generated code]*/ -static PyObject * -unicode_rindex(PyObject *self, PyObject *args) +static Py_ssize_t +unicode_rfind_impl(PyObject *str, PyObject *substr, Py_ssize_t start, + Py_ssize_t end) +/*[clinic end generated code: output=880b29f01dd014c8 input=898361fb71f59294]*/ { - /* initialize variables to prevent gcc warning */ - PyObject *substring = NULL; - Py_ssize_t start = 0; - Py_ssize_t end = 0; - Py_ssize_t result; + Py_ssize_t result = any_find_slice(str, substr, start, end, -1); + if (result < 0) { + return -1; + } + return result; +} - if (!parse_args_finds_unicode("rindex", args, &substring, &start, &end)) - return NULL; +/*[clinic input] +str.rindex as unicode_rindex = str.count - result = any_find_slice(self, substring, start, end, -1); +Return the highest index in S where substring sub is found, such that sub is contained within S[start:end]. - if (result == -2) - return NULL; +Optional arguments start and end are interpreted as in slice notation. +Raises ValueError when the substring is not found. +[clinic start generated code]*/ - if (result < 0) { +static Py_ssize_t +unicode_rindex_impl(PyObject *str, PyObject *substr, Py_ssize_t start, + Py_ssize_t end) +/*[clinic end generated code: output=5f3aef124c867fe1 input=35943dead6c1ea9d]*/ +{ + Py_ssize_t result = any_find_slice(str, substr, start, end, -1); + if (result == -1) { PyErr_SetString(PyExc_ValueError, "substring not found"); - return NULL; } - - return PyLong_FromSsize_t(result); + else if (result < 0) { + return -1; + } + return result; } /*[clinic input] @@ -13562,16 +13497,16 @@ static PyMethodDef unicode_methods[] = { UNICODE_CASEFOLD_METHODDEF UNICODE_TITLE_METHODDEF UNICODE_CENTER_METHODDEF - {"count", (PyCFunction) unicode_count, METH_VARARGS, count__doc__}, + UNICODE_COUNT_METHODDEF UNICODE_EXPANDTABS_METHODDEF - {"find", (PyCFunction) unicode_find, METH_VARARGS, find__doc__}, + UNICODE_FIND_METHODDEF UNICODE_PARTITION_METHODDEF - {"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__}, + UNICODE_INDEX_METHODDEF UNICODE_LJUST_METHODDEF UNICODE_LOWER_METHODDEF UNICODE_LSTRIP_METHODDEF - {"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__}, - {"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__}, + UNICODE_RFIND_METHODDEF + UNICODE_RINDEX_METHODDEF UNICODE_RJUST_METHODDEF UNICODE_RSTRIP_METHODDEF UNICODE_RPARTITION_METHODDEF |