diff options
Diffstat (limited to 'Objects/bytearrayobject.c')
-rw-r--r-- | Objects/bytearrayobject.c | 623 |
1 files changed, 120 insertions, 503 deletions
diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 6fc229d..827fded 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -5,24 +5,16 @@ #include "structmember.h" #include "bytes_methods.h" -static PyByteArrayObject *nullbytes = NULL; char _PyByteArray_empty_string[] = ""; void PyByteArray_Fini(void) { - Py_CLEAR(nullbytes); } int PyByteArray_Init(void) { - nullbytes = PyObject_New(PyByteArrayObject, &PyByteArray_Type); - if (nullbytes == NULL) - return 0; - nullbytes->ob_bytes = NULL; - Py_SIZE(nullbytes) = nullbytes->ob_alloc = 0; - nullbytes->ob_exports = 0; return 1; } @@ -41,6 +33,7 @@ _getbytevalue(PyObject* arg, int *value) PyObject *index = PyNumber_Index(arg); if (index == NULL) { PyErr_Format(PyExc_TypeError, "an integer is required"); + *value = -1; return 0; } face_value = PyLong_AsLong(index); @@ -50,6 +43,7 @@ _getbytevalue(PyObject* arg, int *value) if (face_value < 0 || face_value >= 256) { /* this includes the OverflowError in case the long is too large */ PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)"); + *value = -1; return 0; } @@ -316,9 +310,9 @@ bytearray_repeat(PyByteArrayObject *self, Py_ssize_t count) if (count < 0) count = 0; mysize = Py_SIZE(self); - size = mysize * count; - if (count != 0 && size / count != mysize) + if (count > 0 && mysize > PY_SSIZE_T_MAX / count) return PyErr_NoMemory(); + size = mysize * count; result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size); if (result != NULL && size != 0) { if (mysize == 1) @@ -341,9 +335,9 @@ bytearray_irepeat(PyByteArrayObject *self, Py_ssize_t count) if (count < 0) count = 0; mysize = Py_SIZE(self); - size = mysize * count; - if (count != 0 && size / count != mysize) + if (count > 0 && mysize > PY_SSIZE_T_MAX / count) return PyErr_NoMemory(); + size = mysize * count; if (size < self->ob_alloc) { Py_SIZE(self) = size; self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */ @@ -395,7 +389,7 @@ bytearray_subscript(PyByteArrayObject *self, PyObject *index) } else if (PySlice_Check(index)) { Py_ssize_t start, stop, step, slicelength, cur, i; - if (PySlice_GetIndicesEx((PySliceObject *)index, + if (PySlice_GetIndicesEx(index, PyByteArray_GET_SIZE(self), &start, &stop, &step, &slicelength) < 0) { return NULL; @@ -579,7 +573,7 @@ bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *valu } } else if (PySlice_Check(index)) { - if (PySlice_GetIndicesEx((PySliceObject *)index, + if (PySlice_GetIndicesEx(index, PyByteArray_GET_SIZE(self), &start, &stop, &step, &slicelen) < 0) { return -1; @@ -655,6 +649,11 @@ bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *valu if (!_canresize(self)) return -1; + + if (slicelen == 0) + /* Nothing to do here. */ + return 0; + if (step < 0) { stop = start + 1; start = stop + step * (slicelen - 1) - 1; @@ -671,7 +670,7 @@ bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *valu self->ob_bytes + cur + 1, lim); } /* Move the tail of the bytes, in one chunk */ - cur = start + slicelen*step; + cur = start + (size_t)slicelen*step; if (cur < (size_t)PyByteArray_GET_SIZE(self)) { memmove(self->ob_bytes + cur - slicelen, self->ob_bytes + cur, @@ -685,7 +684,8 @@ bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *valu } else { /* Assign slice */ - Py_ssize_t cur, i; + Py_ssize_t i; + size_t cur; if (needed != slicelen) { PyErr_Format(PyExc_ValueError, @@ -1038,19 +1038,19 @@ bytearray_dealloc(PyByteArrayObject *self) /* Methods */ #define STRINGLIB_CHAR char -#define STRINGLIB_CMP memcmp #define STRINGLIB_LEN PyByteArray_GET_SIZE #define STRINGLIB_STR PyByteArray_AS_STRING #define STRINGLIB_NEW PyByteArray_FromStringAndSize -#define STRINGLIB_EMPTY nullbytes +#define STRINGLIB_ISSPACE Py_ISSPACE +#define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r')) #define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact #define STRINGLIB_MUTABLE 1 -#define FROM_BYTEARRAY 1 #include "stringlib/fastsearch.h" #include "stringlib/count.h" #include "stringlib/find.h" #include "stringlib/partition.h" +#include "stringlib/split.h" #include "stringlib/ctype.h" #include "stringlib/transmogrify.h" @@ -1058,21 +1058,20 @@ bytearray_dealloc(PyByteArrayObject *self) /* The following Py_LOCAL_INLINE and Py_LOCAL functions were copied from the old char* style string object. */ -Py_LOCAL_INLINE(void) -_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len) -{ - if (*end > len) - *end = len; - else if (*end < 0) - *end += len; - if (*end < 0) - *end = 0; - if (*start < 0) - *start += len; - if (*start < 0) - *start = 0; -} - +/* helper macro to fixup start/end slice values */ +#define ADJUST_INDICES(start, end, len) \ + if (end > len) \ + end = len; \ + else if (end < 0) { \ + end += len; \ + if (end < 0) \ + end = 0; \ + } \ + if (start < 0) { \ + start += len; \ + if (start < 0) \ + start = 0; \ + } Py_LOCAL_INLINE(Py_ssize_t) bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir) @@ -1139,10 +1138,10 @@ bytearray_count(PyByteArrayObject *self, PyObject *args) if (_getbuffer(sub_obj, &vsub) < 0) return NULL; - _adjust_indices(&start, &end, PyByteArray_GET_SIZE(self)); + ADJUST_INDICES(start, end, PyByteArray_GET_SIZE(self)); count_obj = PyLong_FromSsize_t( - stringlib_count(str + start, end - start, vsub.buf, vsub.len) + stringlib_count(str + start, end - start, vsub.buf, vsub.len, PY_SSIZE_T_MAX) ); PyBuffer_Release(&vsub); return count_obj; @@ -1250,7 +1249,7 @@ _bytearray_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start if (_getbuffer(substr, &vsubstr) < 0) return -1; - _adjust_indices(&start, &end, len); + ADJUST_INDICES(start, end, len); if (direction < 0) { /* startswith */ @@ -1460,20 +1459,11 @@ bytearray_maketrans(PyObject *null, PyObject *args) } -#define FORWARD 1 -#define REVERSE -1 - /* find and count characters and substrings */ #define findchar(target, target_len, c) \ ((char *)memchr((const void *)(target), c, target_len)) -/* Don't call if length < 2 */ -#define Py_STRING_MATCH(target, offset, pattern, length) \ - (target[offset] == pattern[0] && \ - target[offset+length-1] == pattern[length-1] && \ - !memcmp(target+offset+1, pattern+1, length-2) ) - /* Bytes ops must return a string, create a copy */ Py_LOCAL(PyByteArrayObject *) @@ -1501,93 +1491,6 @@ countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount return count; } -Py_LOCAL(Py_ssize_t) -findstring(const char *target, Py_ssize_t target_len, - const char *pattern, Py_ssize_t pattern_len, - Py_ssize_t start, - Py_ssize_t end, - int direction) -{ - if (start < 0) { - start += target_len; - if (start < 0) - start = 0; - } - if (end > target_len) { - end = target_len; - } else if (end < 0) { - end += target_len; - if (end < 0) - end = 0; - } - - /* zero-length substrings always match at the first attempt */ - if (pattern_len == 0) - return (direction > 0) ? start : end; - - end -= pattern_len; - - if (direction < 0) { - for (; end >= start; end--) - if (Py_STRING_MATCH(target, end, pattern, pattern_len)) - return end; - } else { - for (; start <= end; start++) - if (Py_STRING_MATCH(target, start, pattern, pattern_len)) - return start; - } - return -1; -} - -Py_LOCAL_INLINE(Py_ssize_t) -countstring(const char *target, Py_ssize_t target_len, - const char *pattern, Py_ssize_t pattern_len, - Py_ssize_t start, - Py_ssize_t end, - int direction, Py_ssize_t maxcount) -{ - Py_ssize_t count=0; - - if (start < 0) { - start += target_len; - if (start < 0) - start = 0; - } - if (end > target_len) { - end = target_len; - } else if (end < 0) { - end += target_len; - if (end < 0) - end = 0; - } - - /* zero-length substrings match everywhere */ - if (pattern_len == 0 || maxcount == 0) { - if (target_len+1 < maxcount) - return target_len+1; - return maxcount; - } - - end -= pattern_len; - if (direction < 0) { - for (; (end >= start); end--) - if (Py_STRING_MATCH(target, end, pattern, pattern_len)) { - count++; - if (--maxcount <= 0) break; - end -= pattern_len-1; - } - } else { - for (; (start <= end); start++) - if (Py_STRING_MATCH(target, start, pattern, pattern_len)) { - count++; - if (--maxcount <= 0) - break; - start += pattern_len-1; - } - } - return count; -} - /* Algorithms for different cases of string replacement */ @@ -1599,30 +1502,28 @@ replace_interleave(PyByteArrayObject *self, { char *self_s, *result_s; Py_ssize_t self_len, result_len; - Py_ssize_t count, i, product; + Py_ssize_t count, i; PyByteArrayObject *result; self_len = PyByteArray_GET_SIZE(self); - /* 1 at the end plus 1 after every character */ - count = self_len+1; - if (maxcount < count) + /* 1 at the end plus 1 after every character; + count = min(maxcount, self_len + 1) */ + if (maxcount <= self_len) count = maxcount; + else + /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */ + count = self_len + 1; /* Check for overflow */ /* result_len = count * to_len + self_len; */ - product = count * to_len; - if (product / to_len != count) { - PyErr_SetString(PyExc_OverflowError, - "replace string is too long"); - return NULL; - } - result_len = product + self_len; - if (result_len < 0) { + assert(count > 0); + if (to_len > (PY_SSIZE_T_MAX - self_len) / count) { PyErr_SetString(PyExc_OverflowError, "replace string is too long"); return NULL; } + result_len = count * to_len + self_len; if (! (result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, result_len)) ) @@ -1709,10 +1610,9 @@ replace_delete_substring(PyByteArrayObject *self, self_len = PyByteArray_GET_SIZE(self); self_s = PyByteArray_AS_STRING(self); - count = countstring(self_s, self_len, - from_s, from_len, - 0, self_len, 1, - maxcount); + count = stringlib_count(self_s, self_len, + from_s, from_len, + maxcount); if (count == 0) { /* no matches */ @@ -1731,9 +1631,9 @@ replace_delete_substring(PyByteArrayObject *self, start = self_s; end = self_s + self_len; while (count-- > 0) { - offset = findstring(start, end-start, - from_s, from_len, - 0, end-start, FORWARD); + offset = stringlib_find(start, end-start, + from_s, from_len, + 0); if (offset == -1) break; next = start + offset; @@ -1809,9 +1709,9 @@ replace_substring_in_place(PyByteArrayObject *self, self_s = PyByteArray_AS_STRING(self); self_len = PyByteArray_GET_SIZE(self); - offset = findstring(self_s, self_len, - from_s, from_len, - 0, self_len, FORWARD); + offset = stringlib_find(self_s, self_len, + from_s, from_len, + 0); if (offset == -1) { /* No matches; return the original bytes */ return return_self(self); @@ -1831,9 +1731,9 @@ replace_substring_in_place(PyByteArrayObject *self, end = result_s + self_len; while ( --maxcount > 0) { - offset = findstring(start, end-start, - from_s, from_len, - 0, end-start, FORWARD); + offset = stringlib_find(start, end-start, + from_s, from_len, + 0); if (offset==-1) break; Py_MEMCPY(start+offset, to_s, from_len); @@ -1853,7 +1753,7 @@ replace_single_character(PyByteArrayObject *self, char *self_s, *result_s; char *start, *next, *end; Py_ssize_t self_len, result_len; - Py_ssize_t count, product; + Py_ssize_t count; PyByteArrayObject *result; self_s = PyByteArray_AS_STRING(self); @@ -1867,16 +1767,12 @@ replace_single_character(PyByteArrayObject *self, /* use the difference between current and new, hence the "-1" */ /* result_len = self_len + count * (to_len-1) */ - product = count * (to_len-1); - if (product / (to_len-1) != count) { + assert(count > 0); + if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) { PyErr_SetString(PyExc_OverflowError, "replace bytes is too long"); return NULL; } - result_len = self_len + product; - if (result_len < 0) { - PyErr_SetString(PyExc_OverflowError, "replace bytes is too long"); - return NULL; - } + result_len = self_len + count * (to_len - 1); if ( (result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, result_len)) == NULL) @@ -1920,15 +1816,16 @@ replace_substring(PyByteArrayObject *self, char *self_s, *result_s; char *start, *next, *end; Py_ssize_t self_len, result_len; - Py_ssize_t count, offset, product; + Py_ssize_t count, offset; PyByteArrayObject *result; self_s = PyByteArray_AS_STRING(self); self_len = PyByteArray_GET_SIZE(self); - count = countstring(self_s, self_len, - from_s, from_len, - 0, self_len, FORWARD, maxcount); + count = stringlib_count(self_s, self_len, + from_s, from_len, + maxcount); + if (count == 0) { /* no matches, return unchanged */ return return_self(self); @@ -1936,16 +1833,12 @@ replace_substring(PyByteArrayObject *self, /* Check for overflow */ /* result_len = self_len + count * (to_len-from_len) */ - product = count * (to_len-from_len); - if (product / (to_len-from_len) != count) { - PyErr_SetString(PyExc_OverflowError, "replace bytes is too long"); - return NULL; - } - result_len = self_len + product; - if (result_len < 0) { + assert(count > 0); + if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) { PyErr_SetString(PyExc_OverflowError, "replace bytes is too long"); return NULL; } + result_len = self_len + count * (to_len - from_len); if ( (result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, result_len)) == NULL) @@ -1955,9 +1848,9 @@ replace_substring(PyByteArrayObject *self, start = self_s; end = self_s + self_len; while (count-- > 0) { - offset = findstring(start, end-start, - from_s, from_len, - 0, end-start, FORWARD); + offset = stringlib_find(start, end-start, + from_s, from_len, + 0); if (offset == -1) break; next = start+offset; @@ -2086,123 +1979,6 @@ bytearray_replace(PyByteArrayObject *self, PyObject *args) return res; } - -/* Overallocate the initial list to reduce the number of reallocs for small - split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three - resizes, to sizes 4, 8, then 16. Most observed string splits are for human - text (roughly 11 words per line) and field delimited data (usually 1-10 - fields). For large strings the split algorithms are bandwidth limited - so increasing the preallocation likely will not improve things.*/ - -#define MAX_PREALLOC 12 - -/* 5 splits gives 6 elements */ -#define PREALLOC_SIZE(maxsplit) \ - (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1) - -#define SPLIT_APPEND(data, left, right) \ - str = PyByteArray_FromStringAndSize((data) + (left), \ - (right) - (left)); \ - if (str == NULL) \ - goto onError; \ - if (PyList_Append(list, str)) { \ - Py_DECREF(str); \ - goto onError; \ - } \ - else \ - Py_DECREF(str); - -#define SPLIT_ADD(data, left, right) { \ - str = PyByteArray_FromStringAndSize((data) + (left), \ - (right) - (left)); \ - if (str == NULL) \ - goto onError; \ - if (count < MAX_PREALLOC) { \ - PyList_SET_ITEM(list, count, str); \ - } else { \ - if (PyList_Append(list, str)) { \ - Py_DECREF(str); \ - goto onError; \ - } \ - else \ - Py_DECREF(str); \ - } \ - count++; } - -/* Always force the list to the expected size. */ -#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count - - -Py_LOCAL_INLINE(PyObject *) -split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount) -{ - register Py_ssize_t i, j, count = 0; - PyObject *str; - PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); - - if (list == NULL) - return NULL; - - i = j = 0; - while ((j < len) && (maxcount-- > 0)) { - for(; j < len; j++) { - /* I found that using memchr makes no difference */ - if (s[j] == ch) { - SPLIT_ADD(s, i, j); - i = j = j + 1; - break; - } - } - } - if (i <= len) { - SPLIT_ADD(s, i, len); - } - FIX_PREALLOC_SIZE(list); - return list; - - onError: - Py_DECREF(list); - return NULL; -} - - -Py_LOCAL_INLINE(PyObject *) -split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount) -{ - register Py_ssize_t i, j, count = 0; - PyObject *str; - PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); - - if (list == NULL) - return NULL; - - for (i = j = 0; i < len; ) { - /* find a token */ - while (i < len && Py_ISSPACE(s[i])) - i++; - j = i; - while (i < len && !Py_ISSPACE(s[i])) - i++; - if (j < i) { - if (maxcount-- <= 0) - break; - SPLIT_ADD(s, j, i); - while (i < len && Py_ISSPACE(s[i])) - i++; - j = i; - } - } - if (j < len) { - SPLIT_ADD(s, j, len); - } - FIX_PREALLOC_SIZE(list); - return list; - - onError: - Py_DECREF(list); - return NULL; -} - PyDoc_STRVAR(split__doc__, "B.split([sep[, maxsplit]]) -> list of bytearrays\n\ \n\ @@ -2214,14 +1990,11 @@ If maxsplit is given, at most maxsplit splits are done."); static PyObject * bytearray_split(PyByteArrayObject *self, PyObject *args) { - Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j; - Py_ssize_t maxsplit = -1, count = 0; + Py_ssize_t len = PyByteArray_GET_SIZE(self), n; + Py_ssize_t maxsplit = -1; const char *s = PyByteArray_AS_STRING(self), *sub; - PyObject *list, *str, *subobj = Py_None; + PyObject *list, *subobj = Py_None; Py_buffer vsub; -#ifdef USE_FAST - Py_ssize_t pos; -#endif if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit)) return NULL; @@ -2229,86 +2002,18 @@ bytearray_split(PyByteArrayObject *self, PyObject *args) maxsplit = PY_SSIZE_T_MAX; if (subobj == Py_None) - return split_whitespace(s, len, maxsplit); + return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit); if (_getbuffer(subobj, &vsub) < 0) return NULL; sub = vsub.buf; n = vsub.len; - if (n == 0) { - PyErr_SetString(PyExc_ValueError, "empty separator"); - PyBuffer_Release(&vsub); - return NULL; - } - if (n == 1) { - list = split_char(s, len, sub[0], maxsplit); - PyBuffer_Release(&vsub); - return list; - } - - list = PyList_New(PREALLOC_SIZE(maxsplit)); - if (list == NULL) { - PyBuffer_Release(&vsub); - return NULL; - } - -#ifdef USE_FAST - i = j = 0; - while (maxsplit-- > 0) { - pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH); - if (pos < 0) - break; - j = i+pos; - SPLIT_ADD(s, i, j); - i = j + n; - } -#else - i = j = 0; - while ((j+n <= len) && (maxsplit-- > 0)) { - for (; j+n <= len; j++) { - if (Py_STRING_MATCH(s, j, sub, n)) { - SPLIT_ADD(s, i, j); - i = j = j + n; - break; - } - } - } -#endif - SPLIT_ADD(s, i, len); - FIX_PREALLOC_SIZE(list); + list = stringlib_split( + (PyObject*) self, s, len, sub, n, maxsplit + ); PyBuffer_Release(&vsub); return list; - - onError: - Py_DECREF(list); - PyBuffer_Release(&vsub); - return NULL; -} - -/* stringlib's partition shares nullbytes in some cases. - undo this, we don't want the nullbytes to be shared. */ -static PyObject * -make_nullbytes_unique(PyObject *result) -{ - if (result != NULL) { - int i; - assert(PyTuple_Check(result)); - assert(PyTuple_GET_SIZE(result) == 3); - for (i = 0; i < 3; i++) { - if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) { - PyObject *new = PyByteArray_FromStringAndSize(NULL, 0); - if (new == NULL) { - Py_DECREF(result); - result = NULL; - break; - } - Py_DECREF(nullbytes); - PyTuple_SET_ITEM(result, i, new); - } - } - } - return result; } PyDoc_STRVAR(partition__doc__, @@ -2335,7 +2040,7 @@ bytearray_partition(PyByteArrayObject *self, PyObject *sep_obj) ); Py_DECREF(bytesep); - return make_nullbytes_unique(result); + return result; } PyDoc_STRVAR(rpartition__doc__, @@ -2363,81 +2068,7 @@ bytearray_rpartition(PyByteArrayObject *self, PyObject *sep_obj) ); Py_DECREF(bytesep); - return make_nullbytes_unique(result); -} - -Py_LOCAL_INLINE(PyObject *) -rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount) -{ - register Py_ssize_t i, j, count=0; - PyObject *str; - PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); - - if (list == NULL) - return NULL; - - i = j = len - 1; - while ((i >= 0) && (maxcount-- > 0)) { - for (; i >= 0; i--) { - if (s[i] == ch) { - SPLIT_ADD(s, i + 1, j + 1); - j = i = i - 1; - break; - } - } - } - if (j >= -1) { - SPLIT_ADD(s, 0, j + 1); - } - FIX_PREALLOC_SIZE(list); - if (PyList_Reverse(list) < 0) - goto onError; - - return list; - - onError: - Py_DECREF(list); - return NULL; -} - -Py_LOCAL_INLINE(PyObject *) -rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount) -{ - register Py_ssize_t i, j, count = 0; - PyObject *str; - PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); - - if (list == NULL) - return NULL; - - for (i = j = len - 1; i >= 0; ) { - /* find a token */ - while (i >= 0 && Py_ISSPACE(s[i])) - i--; - j = i; - while (i >= 0 && !Py_ISSPACE(s[i])) - i--; - if (j > i) { - if (maxcount-- <= 0) - break; - SPLIT_ADD(s, i + 1, j + 1); - while (i >= 0 && Py_ISSPACE(s[i])) - i--; - j = i; - } - } - if (j >= 0) { - SPLIT_ADD(s, 0, j + 1); - } - FIX_PREALLOC_SIZE(list); - if (PyList_Reverse(list) < 0) - goto onError; - - return list; - - onError: - Py_DECREF(list); - return NULL; + return result; } PyDoc_STRVAR(rsplit__doc__, @@ -2452,10 +2083,10 @@ If maxsplit is given, at most maxsplit splits are done."); static PyObject * bytearray_rsplit(PyByteArrayObject *self, PyObject *args) { - Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j; - Py_ssize_t maxsplit = -1, count = 0; + Py_ssize_t len = PyByteArray_GET_SIZE(self), n; + Py_ssize_t maxsplit = -1; const char *s = PyByteArray_AS_STRING(self), *sub; - PyObject *list, *str, *subobj = Py_None; + PyObject *list, *subobj = Py_None; Py_buffer vsub; if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit)) @@ -2464,54 +2095,18 @@ bytearray_rsplit(PyByteArrayObject *self, PyObject *args) maxsplit = PY_SSIZE_T_MAX; if (subobj == Py_None) - return rsplit_whitespace(s, len, maxsplit); + return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit); if (_getbuffer(subobj, &vsub) < 0) return NULL; sub = vsub.buf; n = vsub.len; - if (n == 0) { - PyErr_SetString(PyExc_ValueError, "empty separator"); - PyBuffer_Release(&vsub); - return NULL; - } - else if (n == 1) { - list = rsplit_char(s, len, sub[0], maxsplit); - PyBuffer_Release(&vsub); - return list; - } - - list = PyList_New(PREALLOC_SIZE(maxsplit)); - if (list == NULL) { - PyBuffer_Release(&vsub); - return NULL; - } - - j = len; - i = j - n; - - while ( (i >= 0) && (maxsplit-- > 0) ) { - for (; i>=0; i--) { - if (Py_STRING_MATCH(s, i, sub, n)) { - SPLIT_ADD(s, i + n, j); - j = i; - i -= n; - break; - } - } - } - SPLIT_ADD(s, 0, j); - FIX_PREALLOC_SIZE(list); - if (PyList_Reverse(list) < 0) - goto onError; + list = stringlib_rsplit( + (PyObject*) self, s, len, sub, n, maxsplit + ); PyBuffer_Release(&vsub); return list; - -onError: - Py_DECREF(list); - PyBuffer_Release(&vsub); - return NULL; } PyDoc_STRVAR(reverse__doc__, @@ -2687,8 +2282,8 @@ bytearray_pop(PyByteArrayObject *self, PyObject *args) return NULL; if (n == 0) { - PyErr_SetString(PyExc_OverflowError, - "cannot pop an empty bytearray"); + PyErr_SetString(PyExc_IndexError, + "pop from empty bytearray"); return NULL; } if (where < 0) @@ -2867,22 +2462,23 @@ bytearray_rstrip(PyByteArrayObject *self, PyObject *args) } PyDoc_STRVAR(decode_doc, -"B.decode([encoding[, errors]]) -> str\n\ +"B.decode(encoding='utf-8', errors='strict') -> str\n\ \n\ -Decode B using the codec registered for encoding. encoding defaults\n\ -to the default encoding. errors may be given to set a different error\n\ +Decode B using the codec registered for encoding. Default encoding\n\ +is 'utf-8'. errors may be given to set a different error\n\ handling scheme. Default is 'strict' meaning that encoding errors raise\n\ a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\ as well as any other name registered with codecs.register_error that is\n\ able to handle UnicodeDecodeErrors."); static PyObject * -bytearray_decode(PyObject *self, PyObject *args) +bytearray_decode(PyObject *self, PyObject *args, PyObject *kwargs) { const char *encoding = NULL; const char *errors = NULL; + static char *kwlist[] = {"encoding", "errors", 0}; - if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors)) return NULL; if (encoding == NULL) encoding = PyUnicode_GetDefaultEncoding(); @@ -2976,6 +2572,27 @@ bytearray_join(PyByteArrayObject *self, PyObject *it) return NULL; } +PyDoc_STRVAR(splitlines__doc__, +"B.splitlines([keepends]) -> list of lines\n\ +\n\ +Return a list of the lines in B, breaking at line boundaries.\n\ +Line breaks are not included in the resulting list unless keepends\n\ +is given and true."); + +static PyObject* +bytearray_splitlines(PyObject *self, PyObject *args) +{ + int keepends = 0; + + if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends)) + return NULL; + + return stringlib_splitlines( + (PyObject*) self, PyByteArray_AS_STRING(self), + PyByteArray_GET_SIZE(self), keepends + ); +} + PyDoc_STRVAR(fromhex_doc, "bytearray.fromhex(string) -> bytearray (static method)\n\ \n\ @@ -3112,7 +2729,7 @@ bytearray_methods[] = { _Py_capitalize__doc__}, {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__}, {"count", (PyCFunction)bytearray_count, METH_VARARGS, count__doc__}, - {"decode", (PyCFunction)bytearray_decode, METH_VARARGS, decode_doc}, + {"decode", (PyCFunction)bytearray_decode, METH_VARARGS | METH_KEYWORDS, decode_doc}, {"endswith", (PyCFunction)bytearray_endswith, METH_VARARGS, endswith__doc__}, {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS, expandtabs__doc__}, @@ -3154,7 +2771,7 @@ bytearray_methods[] = { {"rsplit", (PyCFunction)bytearray_rsplit, METH_VARARGS, rsplit__doc__}, {"rstrip", (PyCFunction)bytearray_rstrip, METH_VARARGS, rstrip__doc__}, {"split", (PyCFunction)bytearray_split, METH_VARARGS, split__doc__}, - {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS, + {"splitlines", (PyCFunction)bytearray_splitlines, METH_VARARGS, splitlines__doc__}, {"startswith", (PyCFunction)bytearray_startswith, METH_VARARGS , startswith__doc__}, |