summaryrefslogtreecommitdiffstats
path: root/Objects/stringlib/find.h
blob: 518e012b759f8485fec4193c2537f28ebcf72c77 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
/* stringlib: find/index implementation */

#ifndef STRINGLIB_FASTSEARCH_H
#error must include "stringlib/fastsearch.h" before including this module
#endif

Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(find)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
               const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
               Py_ssize_t offset)
{
    Py_ssize_t pos;

    if (str_len < 0)
        return -1;
    if (sub_len == 0)
        return offset;

    pos = FASTSEARCH(str, str_len, sub, sub_len, -1, FAST_SEARCH);

    if (pos >= 0)
        pos += offset;

    return pos;
}

Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
                const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
                Py_ssize_t offset)
{
    Py_ssize_t pos;

    if (str_len < 0)
        return -1;
    if (sub_len == 0)
        return str_len + offset;

    pos = FASTSEARCH(str, str_len, sub, sub_len, -1, FAST_RSEARCH);

    if (pos >= 0)
        pos += offset;

    return pos;
}

/* helper macro to fixup start/end slice values */
#define ADJUST_INDICES(start, end, len)         \
    if (end > len)                              \
        end = len;                              \
    else if (end < 0) {                         \
        end += len;                             \
        if (end < 0)                            \
            end = 0;                            \
    }                                           \
    if (start < 0) {                            \
        start += len;                           \
        if (start < 0)                          \
            start = 0;                          \
    }

Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(find_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
                     const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
                     Py_ssize_t start, Py_ssize_t end)
{
    ADJUST_INDICES(start, end, str_len);
    return STRINGLIB(find)(str + start, end - start, sub, sub_len, start);
}

Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(rfind_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
                      const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
                      Py_ssize_t start, Py_ssize_t end)
{
    ADJUST_INDICES(start, end, str_len);
    return STRINGLIB(rfind)(str + start, end - start, sub, sub_len, start);
}

#ifdef STRINGLIB_WANT_CONTAINS_OBJ

Py_LOCAL_INLINE(int)
STRINGLIB(contains_obj)(PyObject* str, PyObject* sub)
{
    return STRINGLIB(find)(
        STRINGLIB_STR(str), STRINGLIB_LEN(str),
        STRINGLIB_STR(sub), STRINGLIB_LEN(sub), 0
        ) != -1;
}

#endif /* STRINGLIB_WANT_CONTAINS_OBJ */

/*
This function is a helper for the "find" family (find, rfind, index,
rindex) and for count, startswith and endswith, because they all have
the same behaviour for the arguments.

It does not touch the variables received until it knows everything
is ok.
*/

#define FORMAT_BUFFER_SIZE 50

Py_LOCAL_INLINE(int)
STRINGLIB(parse_args_finds)(const char * function_name, PyObject *args,
                           PyObject **subobj,
                           Py_ssize_t *start, Py_ssize_t *end)
{
    PyObject *tmp_subobj;
    Py_ssize_t tmp_start = 0;
    Py_ssize_t tmp_end = PY_SSIZE_T_MAX;
    PyObject *obj_start=Py_None, *obj_end=Py_None;
    char format[FORMAT_BUFFER_SIZE] = "O|OO:";
    size_t len = strlen(format);

    strncpy(format + len, function_name, FORMAT_BUFFER_SIZE - len - 1);
    format[FORMAT_BUFFER_SIZE - 1] = '\0';

    if (!PyArg_ParseTuple(args, format, &tmp_subobj, &obj_start, &obj_end))
        return 0;

    /* To support None in "start" and "end" arguments, meaning
       the same as if they were not passed.
    */
    if (obj_start != Py_None)
        if (!_PyEval_SliceIndex(obj_start, &tmp_start))
            return 0;
    if (obj_end != Py_None)
        if (!_PyEval_SliceIndex(obj_end, &tmp_end))
            return 0;

    *start = tmp_start;
    *end = tmp_end;
    *subobj = tmp_subobj;
    return 1;
}

#undef FORMAT_BUFFER_SIZE

#if STRINGLIB_IS_UNICODE

/*
Wraps stringlib_parse_args_finds() and additionally ensures that the
first argument is a unicode object.

Note that we receive a pointer to the pointer of the substring object,
so when we create that object in this function we don't DECREF it,
because it continues living in the caller functions (those functions,
after finishing using the substring, must DECREF it).
*/

Py_LOCAL_INLINE(int)
STRINGLIB(parse_args_finds_unicode)(const char * function_name, PyObject *args,
                                   PyObject **substring,
                                   Py_ssize_t *start, Py_ssize_t *end)
{
    PyObject *tmp_substring;

    if(STRINGLIB(parse_args_finds)(function_name, args, &tmp_substring,
                                  start, end)) {
        tmp_substring = PyUnicode_FromObject(tmp_substring);
        if (!tmp_substring)
            return 0;
        *substring = tmp_substring;
        return 1;
    }
    return 0;
}

#else /* !STRINGLIB_IS_UNICODE */

/*
Wraps stringlib_parse_args_finds() and additionally checks whether the
first argument is an integer in range(0, 256).

If this is the case, writes the integer value to the byte parameter
and sets subobj to NULL. Otherwise, sets the first argument to subobj
and doesn't touch byte. The other parameters are similar to those of
stringlib_parse_args_finds().
*/

Py_LOCAL_INLINE(int)
STRINGLIB(parse_args_finds_byte)(const char *function_name, PyObject *args,
                                 PyObject **subobj, char *byte,
                                 Py_ssize_t *start, Py_ssize_t *end)
{
    PyObject *tmp_subobj;
    Py_ssize_t ival;
    PyObject *err;

    if(!STRINGLIB(parse_args_finds)(function_name, args, &tmp_subobj,
                                    start, end))
        return 0;

    if (!PyNumber_Check(tmp_subobj)) {
        *subobj = tmp_subobj;
        return 1;
    }

    ival = PyNumber_AsSsize_t(tmp_subobj, PyExc_OverflowError);
    if (ival == -1) {
        err = PyErr_Occurred();
        if (err && !PyErr_GivenExceptionMatches(err, PyExc_OverflowError)) {
            PyErr_Clear();
            *subobj = tmp_subobj;
            return 1;
        }
    }

    if (ival < 0 || ival > 255) {
        PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
        return 0;
    }

    *subobj = NULL;
    *byte = (char)ival;
    return 1;
}

#endif /* STRINGLIB_IS_UNICODE */