diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2016-05-05 06:26:07 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2016-05-05 06:26:07 (GMT) |
commit | fb81d3cbe72e406a47962cee0a832d7b67ad11a3 (patch) | |
tree | 890ec9fb389953b406251d7702a1c2e2ce909595 /Objects/bytearrayobject.c | |
parent | 9fc385748caa4802b0a10c5978873da850dd319d (diff) | |
download | cpython-fb81d3cbe72e406a47962cee0a832d7b67ad11a3.zip cpython-fb81d3cbe72e406a47962cee0a832d7b67ad11a3.tar.gz cpython-fb81d3cbe72e406a47962cee0a832d7b67ad11a3.tar.bz2 |
Issue #26765: Moved common code for the replace() method of bytes and bytearray
to a template file.
Diffstat (limited to 'Objects/bytearrayobject.c')
-rw-r--r-- | Objects/bytearrayobject.c | 503 |
1 files changed, 3 insertions, 500 deletions
diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 093cdbf..9457768 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -1307,503 +1307,6 @@ bytearray_maketrans_impl(Py_buffer *frm, Py_buffer *to) } -/* find and count characters and substrings */ - -#define findchar(target, target_len, c) \ - ((char *)memchr((const void *)(target), c, target_len)) - - -/* Bytes ops must return a string, create a copy */ -Py_LOCAL(PyByteArrayObject *) -return_self(PyByteArrayObject *self) -{ - /* always return a new bytearray */ - return (PyByteArrayObject *)PyByteArray_FromStringAndSize( - PyByteArray_AS_STRING(self), - PyByteArray_GET_SIZE(self)); -} - -Py_LOCAL_INLINE(Py_ssize_t) -countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount) -{ - Py_ssize_t count=0; - const char *start=target; - const char *end=target+target_len; - - while ( (start=findchar(start, end-start, c)) != NULL ) { - count++; - if (count >= maxcount) - break; - start += 1; - } - return count; -} - - -/* Algorithms for different cases of string replacement */ - -/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */ -Py_LOCAL(PyByteArrayObject *) -replace_interleave(PyByteArrayObject *self, - const char *to_s, Py_ssize_t to_len, - Py_ssize_t maxcount) -{ - char *self_s, *result_s; - Py_ssize_t self_len, result_len; - Py_ssize_t count, i; - PyByteArrayObject *result; - - self_len = PyByteArray_GET_SIZE(self); - - /* 1 at the end plus 1 after every character; - count = min(maxcount, self_len + 1) */ - if (maxcount <= self_len) - count = maxcount; - else - /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */ - count = self_len + 1; - - /* Check for overflow */ - /* result_len = count * to_len + self_len; */ - assert(count > 0); - if (to_len > (PY_SSIZE_T_MAX - self_len) / count) { - PyErr_SetString(PyExc_OverflowError, - "replace string is too long"); - return NULL; - } - result_len = count * to_len + self_len; - - if (! (result = (PyByteArrayObject *) - PyByteArray_FromStringAndSize(NULL, result_len)) ) - return NULL; - - self_s = PyByteArray_AS_STRING(self); - result_s = PyByteArray_AS_STRING(result); - - if (to_len > 1) { - /* Lay the first one down (guaranteed this will occur) */ - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - count -= 1; - - for (i = 0; i < count; i++) { - *result_s++ = *self_s++; - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - } - } - else { - result_s[0] = to_s[0]; - result_s += to_len; - count -= 1; - for (i = 0; i < count; i++) { - *result_s++ = *self_s++; - result_s[0] = to_s[0]; - result_s += to_len; - } - } - - /* Copy the rest of the original string */ - Py_MEMCPY(result_s, self_s, self_len-i); - - return result; -} - -/* Special case for deleting a single character */ -/* len(self)>=1, len(from)==1, to="", maxcount>=1 */ -Py_LOCAL(PyByteArrayObject *) -replace_delete_single_character(PyByteArrayObject *self, - char from_c, Py_ssize_t maxcount) -{ - char *self_s, *result_s; - char *start, *next, *end; - Py_ssize_t self_len, result_len; - Py_ssize_t count; - PyByteArrayObject *result; - - self_len = PyByteArray_GET_SIZE(self); - self_s = PyByteArray_AS_STRING(self); - - count = countchar(self_s, self_len, from_c, maxcount); - if (count == 0) { - return return_self(self); - } - - result_len = self_len - count; /* from_len == 1 */ - assert(result_len>=0); - - if ( (result = (PyByteArrayObject *) - PyByteArray_FromStringAndSize(NULL, result_len)) == NULL) - return NULL; - result_s = PyByteArray_AS_STRING(result); - - start = self_s; - end = self_s + self_len; - while (count-- > 0) { - next = findchar(start, end-start, from_c); - if (next == NULL) - break; - Py_MEMCPY(result_s, start, next-start); - result_s += (next-start); - start = next+1; - } - Py_MEMCPY(result_s, start, end-start); - - return result; -} - -/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */ - -Py_LOCAL(PyByteArrayObject *) -replace_delete_substring(PyByteArrayObject *self, - const char *from_s, Py_ssize_t from_len, - Py_ssize_t maxcount) -{ - char *self_s, *result_s; - char *start, *next, *end; - Py_ssize_t self_len, result_len; - Py_ssize_t count, offset; - PyByteArrayObject *result; - - self_len = PyByteArray_GET_SIZE(self); - self_s = PyByteArray_AS_STRING(self); - - count = stringlib_count(self_s, self_len, - from_s, from_len, - maxcount); - - if (count == 0) { - /* no matches */ - return return_self(self); - } - - result_len = self_len - (count * from_len); - assert (result_len>=0); - - if ( (result = (PyByteArrayObject *) - PyByteArray_FromStringAndSize(NULL, result_len)) == NULL ) - return NULL; - - result_s = PyByteArray_AS_STRING(result); - - start = self_s; - end = self_s + self_len; - while (count-- > 0) { - offset = stringlib_find(start, end-start, - from_s, from_len, - 0); - if (offset == -1) - break; - next = start + offset; - - Py_MEMCPY(result_s, start, next-start); - - result_s += (next-start); - start = next+from_len; - } - Py_MEMCPY(result_s, start, end-start); - return result; -} - -/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */ -Py_LOCAL(PyByteArrayObject *) -replace_single_character_in_place(PyByteArrayObject *self, - char from_c, char to_c, - Py_ssize_t maxcount) -{ - char *self_s, *result_s, *start, *end, *next; - Py_ssize_t self_len; - PyByteArrayObject *result; - - /* The result string will be the same size */ - self_s = PyByteArray_AS_STRING(self); - self_len = PyByteArray_GET_SIZE(self); - - next = findchar(self_s, self_len, from_c); - - if (next == NULL) { - /* No matches; return the original bytes */ - return return_self(self); - } - - /* Need to make a new bytes */ - result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len); - if (result == NULL) - return NULL; - result_s = PyByteArray_AS_STRING(result); - Py_MEMCPY(result_s, self_s, self_len); - - /* change everything in-place, starting with this one */ - start = result_s + (next-self_s); - *start = to_c; - start++; - end = result_s + self_len; - - while (--maxcount > 0) { - next = findchar(start, end-start, from_c); - if (next == NULL) - break; - *next = to_c; - start = next+1; - } - - return result; -} - -/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */ -Py_LOCAL(PyByteArrayObject *) -replace_substring_in_place(PyByteArrayObject *self, - const char *from_s, Py_ssize_t from_len, - const char *to_s, Py_ssize_t to_len, - Py_ssize_t maxcount) -{ - char *result_s, *start, *end; - char *self_s; - Py_ssize_t self_len, offset; - PyByteArrayObject *result; - - /* The result bytes will be the same size */ - - self_s = PyByteArray_AS_STRING(self); - self_len = PyByteArray_GET_SIZE(self); - - offset = stringlib_find(self_s, self_len, - from_s, from_len, - 0); - if (offset == -1) { - /* No matches; return the original bytes */ - return return_self(self); - } - - /* Need to make a new bytes */ - result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len); - if (result == NULL) - return NULL; - result_s = PyByteArray_AS_STRING(result); - Py_MEMCPY(result_s, self_s, self_len); - - /* change everything in-place, starting with this one */ - start = result_s + offset; - Py_MEMCPY(start, to_s, from_len); - start += from_len; - end = result_s + self_len; - - while ( --maxcount > 0) { - offset = stringlib_find(start, end-start, - from_s, from_len, - 0); - if (offset==-1) - break; - Py_MEMCPY(start+offset, to_s, from_len); - start += offset+from_len; - } - - return result; -} - -/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */ -Py_LOCAL(PyByteArrayObject *) -replace_single_character(PyByteArrayObject *self, - char from_c, - const char *to_s, Py_ssize_t to_len, - Py_ssize_t maxcount) -{ - char *self_s, *result_s; - char *start, *next, *end; - Py_ssize_t self_len, result_len; - Py_ssize_t count; - PyByteArrayObject *result; - - self_s = PyByteArray_AS_STRING(self); - self_len = PyByteArray_GET_SIZE(self); - - count = countchar(self_s, self_len, from_c, maxcount); - if (count == 0) { - /* no matches, return unchanged */ - return return_self(self); - } - - /* use the difference between current and new, hence the "-1" */ - /* result_len = self_len + count * (to_len-1) */ - assert(count > 0); - if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) { - PyErr_SetString(PyExc_OverflowError, "replace bytes is too long"); - return NULL; - } - result_len = self_len + count * (to_len - 1); - - if ( (result = (PyByteArrayObject *) - PyByteArray_FromStringAndSize(NULL, result_len)) == NULL) - return NULL; - result_s = PyByteArray_AS_STRING(result); - - start = self_s; - end = self_s + self_len; - while (count-- > 0) { - next = findchar(start, end-start, from_c); - if (next == NULL) - break; - - if (next == start) { - /* replace with the 'to' */ - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - start += 1; - } else { - /* copy the unchanged old then the 'to' */ - Py_MEMCPY(result_s, start, next-start); - result_s += (next-start); - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - start = next+1; - } - } - /* Copy the remainder of the remaining bytes */ - Py_MEMCPY(result_s, start, end-start); - - return result; -} - -/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */ -Py_LOCAL(PyByteArrayObject *) -replace_substring(PyByteArrayObject *self, - const char *from_s, Py_ssize_t from_len, - const char *to_s, Py_ssize_t to_len, - Py_ssize_t maxcount) -{ - char *self_s, *result_s; - char *start, *next, *end; - Py_ssize_t self_len, result_len; - Py_ssize_t count, offset; - PyByteArrayObject *result; - - self_s = PyByteArray_AS_STRING(self); - self_len = PyByteArray_GET_SIZE(self); - - count = stringlib_count(self_s, self_len, - from_s, from_len, - maxcount); - - if (count == 0) { - /* no matches, return unchanged */ - return return_self(self); - } - - /* Check for overflow */ - /* result_len = self_len + count * (to_len-from_len) */ - assert(count > 0); - if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) { - PyErr_SetString(PyExc_OverflowError, "replace bytes is too long"); - return NULL; - } - result_len = self_len + count * (to_len - from_len); - - if ( (result = (PyByteArrayObject *) - PyByteArray_FromStringAndSize(NULL, result_len)) == NULL) - return NULL; - result_s = PyByteArray_AS_STRING(result); - - start = self_s; - end = self_s + self_len; - while (count-- > 0) { - offset = stringlib_find(start, end-start, - from_s, from_len, - 0); - if (offset == -1) - break; - next = start+offset; - if (next == start) { - /* replace with the 'to' */ - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - start += from_len; - } else { - /* copy the unchanged old then the 'to' */ - Py_MEMCPY(result_s, start, next-start); - result_s += (next-start); - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - start = next+from_len; - } - } - /* Copy the remainder of the remaining bytes */ - Py_MEMCPY(result_s, start, end-start); - - return result; -} - - -Py_LOCAL(PyByteArrayObject *) -replace(PyByteArrayObject *self, - const char *from_s, Py_ssize_t from_len, - const char *to_s, Py_ssize_t to_len, - Py_ssize_t maxcount) -{ - if (maxcount < 0) { - maxcount = PY_SSIZE_T_MAX; - } else if (maxcount == 0 || PyByteArray_GET_SIZE(self) == 0) { - /* nothing to do; return the original bytes */ - return return_self(self); - } - - if (maxcount == 0 || - (from_len == 0 && to_len == 0)) { - /* nothing to do; return the original bytes */ - return return_self(self); - } - - /* Handle zero-length special cases */ - - if (from_len == 0) { - /* insert the 'to' bytes everywhere. */ - /* >>> "Python".replace("", ".") */ - /* '.P.y.t.h.o.n.' */ - return replace_interleave(self, to_s, to_len, maxcount); - } - - /* Except for "".replace("", "A") == "A" there is no way beyond this */ - /* point for an empty self bytes to generate a non-empty bytes */ - /* Special case so the remaining code always gets a non-empty bytes */ - if (PyByteArray_GET_SIZE(self) == 0) { - return return_self(self); - } - - if (to_len == 0) { - /* delete all occurrences of 'from' bytes */ - if (from_len == 1) { - return replace_delete_single_character( - self, from_s[0], maxcount); - } else { - return replace_delete_substring(self, from_s, from_len, maxcount); - } - } - - /* Handle special case where both bytes have the same length */ - - if (from_len == to_len) { - if (from_len == 1) { - return replace_single_character_in_place( - self, - from_s[0], - to_s[0], - maxcount); - } else { - return replace_substring_in_place( - self, from_s, from_len, to_s, to_len, maxcount); - } - } - - /* Otherwise use the more generic algorithms */ - if (from_len == 1) { - return replace_single_character(self, from_s[0], - to_s, to_len, maxcount); - } else { - /* len('from')>=2, len('to')>=1 */ - return replace_substring(self, from_s, from_len, to_s, to_len, maxcount); - } -} - - /*[clinic input] bytearray.replace @@ -1825,9 +1328,9 @@ bytearray_replace_impl(PyByteArrayObject *self, Py_buffer *old, Py_buffer *new, Py_ssize_t count) /*[clinic end generated code: output=d39884c4dc59412a input=aa379d988637c7fb]*/ { - return (PyObject *)replace((PyByteArrayObject *) self, - old->buf, old->len, - new->buf, new->len, count); + return stringlib_replace((PyObject *)self, + (const char *)old->buf, old->len, + (const char *)new->buf, new->len, count); } /*[clinic input] |