diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2013-10-16 09:46:28 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2013-10-16 09:46:28 (GMT) |
commit | 25324971fb3922a9e7f01380b23227416d753757 (patch) | |
tree | ec6a42304dd8243d3d8834908753f559c53709f6 /Modules/_sre.c | |
parent | 355dda8d17c01d39fac47966aa6b6b44bf6e88b0 (diff) | |
download | cpython-25324971fb3922a9e7f01380b23227416d753757.zip cpython-25324971fb3922a9e7f01380b23227416d753757.tar.gz cpython-25324971fb3922a9e7f01380b23227416d753757.tar.bz2 |
Issue #18468: The re.split, re.findall, and re.sub functions and the group()
and groups() methods of match object now always return a string or a bytes
object.
Diffstat (limited to 'Modules/_sre.c')
-rw-r--r-- | Modules/_sre.c | 110 |
1 files changed, 59 insertions, 51 deletions
diff --git a/Modules/_sre.c b/Modules/_sre.c index 99c3cd5..2b00121 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -1812,6 +1812,24 @@ state_fini(SRE_STATE* state) (((char*)(member) - (char*)(state)->beginning) / (state)->charsize) LOCAL(PyObject*) +getslice(int logical_charsize, const void *ptr, + PyObject* string, Py_ssize_t start, Py_ssize_t end) +{ + if (logical_charsize == 1) { + if (PyBytes_CheckExact(string) && + start == 0 && end == PyBytes_GET_SIZE(string)) { + Py_INCREF(string); + return string; + } + return PyBytes_FromStringAndSize( + (const char *)ptr + start, end - start); + } + else { + return PyUnicode_Substring(string, start, end); + } +} + +LOCAL(PyObject*) state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty) { Py_ssize_t i, j; @@ -1831,7 +1849,7 @@ state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty) j = STATE_OFFSET(state, state->mark[index+1]); } - return PySequence_GetSlice(string, i, j); + return getslice(state->logical_charsize, state->beginning, string, i, j); } static void @@ -1993,45 +2011,6 @@ deepcopy(PyObject** object, PyObject* memo) #endif static PyObject* -join_list(PyObject* list, PyObject* string) -{ - /* join list elements */ - - PyObject* joiner; - PyObject* function; - PyObject* args; - PyObject* result; - - joiner = PySequence_GetSlice(string, 0, 0); - if (!joiner) - return NULL; - - if (PyList_GET_SIZE(list) == 0) { - Py_DECREF(list); - return joiner; - } - - function = PyObject_GetAttrString(joiner, "join"); - if (!function) { - Py_DECREF(joiner); - return NULL; - } - args = PyTuple_New(1); - if (!args) { - Py_DECREF(function); - Py_DECREF(joiner); - return NULL; - } - PyTuple_SET_ITEM(args, 0, list); - result = PyObject_CallObject(function, args); - Py_DECREF(args); /* also removes list */ - Py_DECREF(function); - Py_DECREF(joiner); - - return result; -} - -static PyObject* pattern_findall(PatternObject* self, PyObject* args, PyObject* kw) { SRE_STATE state; @@ -2086,7 +2065,8 @@ pattern_findall(PatternObject* self, PyObject* args, PyObject* kw) case 0: b = STATE_OFFSET(&state, state.start); e = STATE_OFFSET(&state, state.ptr); - item = PySequence_GetSlice(string, b, e); + item = getslice(state.logical_charsize, state.beginning, + string, b, e); if (!item) goto error; break; @@ -2216,7 +2196,7 @@ pattern_split(PatternObject* self, PyObject* args, PyObject* kw) } /* get segment before this match */ - item = PySequence_GetSlice( + item = getslice(state.logical_charsize, state.beginning, string, STATE_OFFSET(&state, last), STATE_OFFSET(&state, state.start) ); @@ -2245,7 +2225,7 @@ pattern_split(PatternObject* self, PyObject* args, PyObject* kw) } /* get segment following last match (even if empty) */ - item = PySequence_GetSlice( + item = getslice(state.logical_charsize, state.beginning, string, STATE_OFFSET(&state, last), state.endpos ); if (!item) @@ -2271,6 +2251,7 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string, { SRE_STATE state; PyObject* list; + PyObject* joiner; PyObject* item; PyObject* filter; PyObject* args; @@ -2360,7 +2341,8 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string, if (i < b) { /* get segment before this match */ - item = PySequence_GetSlice(string, i, b); + item = getslice(state.logical_charsize, state.beginning, + string, i, b); if (!item) goto error; status = PyList_Append(list, item); @@ -2415,7 +2397,8 @@ next: /* get segment following last match */ if (i < state.endpos) { - item = PySequence_GetSlice(string, i, state.endpos); + item = getslice(state.logical_charsize, state.beginning, + string, i, state.endpos); if (!item) goto error; status = PyList_Append(list, item); @@ -2429,10 +2412,24 @@ next: Py_DECREF(filter); /* convert list to single string (also removes list) */ - item = join_list(list, string); - - if (!item) + joiner = getslice(state.logical_charsize, state.beginning, string, 0, 0); + if (!joiner) { + Py_DECREF(list); return NULL; + } + if (PyList_GET_SIZE(list) == 0) { + Py_DECREF(list); + item = joiner; + } + else { + if (state.logical_charsize == 1) + item = _PyBytes_Join(joiner, list); + else + item = PyUnicode_Join(joiner, list); + Py_DECREF(joiner); + if (!item) + return NULL; + } if (subn) return Py_BuildValue("Nn", item, n); @@ -3189,6 +3186,12 @@ match_dealloc(MatchObject* self) static PyObject* match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def) { + Py_ssize_t length; + int logical_charsize, charsize; + Py_buffer view; + PyObject *result; + void* ptr; + if (index < 0 || index >= self->groups) { /* raise IndexError if we were given a bad group number */ PyErr_SetString( @@ -3206,9 +3209,14 @@ match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def) return def; } - return PySequence_GetSlice( - self->string, self->mark[index], self->mark[index+1] - ); + ptr = getstring(self->string, &length, &logical_charsize, &charsize, &view); + if (ptr == NULL) + return NULL; + result = getslice(logical_charsize, ptr, + self->string, self->mark[index], self->mark[index+1]); + if (logical_charsize == 1 && view.buf != NULL) + PyBuffer_Release(&view); + return result; } static Py_ssize_t |