summaryrefslogtreecommitdiffstats
path: root/Objects/bytearrayobject.c
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/bytearrayobject.c')
-rw-r--r--Objects/bytearrayobject.c623
1 files changed, 120 insertions, 503 deletions
diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c
index 6fc229d..827fded 100644
--- a/Objects/bytearrayobject.c
+++ b/Objects/bytearrayobject.c
@@ -5,24 +5,16 @@
#include "structmember.h"
#include "bytes_methods.h"
-static PyByteArrayObject *nullbytes = NULL;
char _PyByteArray_empty_string[] = "";
void
PyByteArray_Fini(void)
{
- Py_CLEAR(nullbytes);
}
int
PyByteArray_Init(void)
{
- nullbytes = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
- if (nullbytes == NULL)
- return 0;
- nullbytes->ob_bytes = NULL;
- Py_SIZE(nullbytes) = nullbytes->ob_alloc = 0;
- nullbytes->ob_exports = 0;
return 1;
}
@@ -41,6 +33,7 @@ _getbytevalue(PyObject* arg, int *value)
PyObject *index = PyNumber_Index(arg);
if (index == NULL) {
PyErr_Format(PyExc_TypeError, "an integer is required");
+ *value = -1;
return 0;
}
face_value = PyLong_AsLong(index);
@@ -50,6 +43,7 @@ _getbytevalue(PyObject* arg, int *value)
if (face_value < 0 || face_value >= 256) {
/* this includes the OverflowError in case the long is too large */
PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
+ *value = -1;
return 0;
}
@@ -316,9 +310,9 @@ bytearray_repeat(PyByteArrayObject *self, Py_ssize_t count)
if (count < 0)
count = 0;
mysize = Py_SIZE(self);
- size = mysize * count;
- if (count != 0 && size / count != mysize)
+ if (count > 0 && mysize > PY_SSIZE_T_MAX / count)
return PyErr_NoMemory();
+ size = mysize * count;
result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size);
if (result != NULL && size != 0) {
if (mysize == 1)
@@ -341,9 +335,9 @@ bytearray_irepeat(PyByteArrayObject *self, Py_ssize_t count)
if (count < 0)
count = 0;
mysize = Py_SIZE(self);
- size = mysize * count;
- if (count != 0 && size / count != mysize)
+ if (count > 0 && mysize > PY_SSIZE_T_MAX / count)
return PyErr_NoMemory();
+ size = mysize * count;
if (size < self->ob_alloc) {
Py_SIZE(self) = size;
self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
@@ -395,7 +389,7 @@ bytearray_subscript(PyByteArrayObject *self, PyObject *index)
}
else if (PySlice_Check(index)) {
Py_ssize_t start, stop, step, slicelength, cur, i;
- if (PySlice_GetIndicesEx((PySliceObject *)index,
+ if (PySlice_GetIndicesEx(index,
PyByteArray_GET_SIZE(self),
&start, &stop, &step, &slicelength) < 0) {
return NULL;
@@ -579,7 +573,7 @@ bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *valu
}
}
else if (PySlice_Check(index)) {
- if (PySlice_GetIndicesEx((PySliceObject *)index,
+ if (PySlice_GetIndicesEx(index,
PyByteArray_GET_SIZE(self),
&start, &stop, &step, &slicelen) < 0) {
return -1;
@@ -655,6 +649,11 @@ bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *valu
if (!_canresize(self))
return -1;
+
+ if (slicelen == 0)
+ /* Nothing to do here. */
+ return 0;
+
if (step < 0) {
stop = start + 1;
start = stop + step * (slicelen - 1) - 1;
@@ -671,7 +670,7 @@ bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *valu
self->ob_bytes + cur + 1, lim);
}
/* Move the tail of the bytes, in one chunk */
- cur = start + slicelen*step;
+ cur = start + (size_t)slicelen*step;
if (cur < (size_t)PyByteArray_GET_SIZE(self)) {
memmove(self->ob_bytes + cur - slicelen,
self->ob_bytes + cur,
@@ -685,7 +684,8 @@ bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *valu
}
else {
/* Assign slice */
- Py_ssize_t cur, i;
+ Py_ssize_t i;
+ size_t cur;
if (needed != slicelen) {
PyErr_Format(PyExc_ValueError,
@@ -1038,19 +1038,19 @@ bytearray_dealloc(PyByteArrayObject *self)
/* Methods */
#define STRINGLIB_CHAR char
-#define STRINGLIB_CMP memcmp
#define STRINGLIB_LEN PyByteArray_GET_SIZE
#define STRINGLIB_STR PyByteArray_AS_STRING
#define STRINGLIB_NEW PyByteArray_FromStringAndSize
-#define STRINGLIB_EMPTY nullbytes
+#define STRINGLIB_ISSPACE Py_ISSPACE
+#define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r'))
#define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
#define STRINGLIB_MUTABLE 1
-#define FROM_BYTEARRAY 1
#include "stringlib/fastsearch.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
#include "stringlib/partition.h"
+#include "stringlib/split.h"
#include "stringlib/ctype.h"
#include "stringlib/transmogrify.h"
@@ -1058,21 +1058,20 @@ bytearray_dealloc(PyByteArrayObject *self)
/* The following Py_LOCAL_INLINE and Py_LOCAL functions
were copied from the old char* style string object. */
-Py_LOCAL_INLINE(void)
-_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
-{
- if (*end > len)
- *end = len;
- else if (*end < 0)
- *end += len;
- if (*end < 0)
- *end = 0;
- if (*start < 0)
- *start += len;
- if (*start < 0)
- *start = 0;
-}
-
+/* helper macro to fixup start/end slice values */
+#define ADJUST_INDICES(start, end, len) \
+ if (end > len) \
+ end = len; \
+ else if (end < 0) { \
+ end += len; \
+ if (end < 0) \
+ end = 0; \
+ } \
+ if (start < 0) { \
+ start += len; \
+ if (start < 0) \
+ start = 0; \
+ }
Py_LOCAL_INLINE(Py_ssize_t)
bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
@@ -1139,10 +1138,10 @@ bytearray_count(PyByteArrayObject *self, PyObject *args)
if (_getbuffer(sub_obj, &vsub) < 0)
return NULL;
- _adjust_indices(&start, &end, PyByteArray_GET_SIZE(self));
+ ADJUST_INDICES(start, end, PyByteArray_GET_SIZE(self));
count_obj = PyLong_FromSsize_t(
- stringlib_count(str + start, end - start, vsub.buf, vsub.len)
+ stringlib_count(str + start, end - start, vsub.buf, vsub.len, PY_SSIZE_T_MAX)
);
PyBuffer_Release(&vsub);
return count_obj;
@@ -1250,7 +1249,7 @@ _bytearray_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start
if (_getbuffer(substr, &vsubstr) < 0)
return -1;
- _adjust_indices(&start, &end, len);
+ ADJUST_INDICES(start, end, len);
if (direction < 0) {
/* startswith */
@@ -1460,20 +1459,11 @@ bytearray_maketrans(PyObject *null, PyObject *args)
}
-#define FORWARD 1
-#define REVERSE -1
-
/* find and count characters and substrings */
#define findchar(target, target_len, c) \
((char *)memchr((const void *)(target), c, target_len))
-/* Don't call if length < 2 */
-#define Py_STRING_MATCH(target, offset, pattern, length) \
- (target[offset] == pattern[0] && \
- target[offset+length-1] == pattern[length-1] && \
- !memcmp(target+offset+1, pattern+1, length-2) )
-
/* Bytes ops must return a string, create a copy */
Py_LOCAL(PyByteArrayObject *)
@@ -1501,93 +1491,6 @@ countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount
return count;
}
-Py_LOCAL(Py_ssize_t)
-findstring(const char *target, Py_ssize_t target_len,
- const char *pattern, Py_ssize_t pattern_len,
- Py_ssize_t start,
- Py_ssize_t end,
- int direction)
-{
- if (start < 0) {
- start += target_len;
- if (start < 0)
- start = 0;
- }
- if (end > target_len) {
- end = target_len;
- } else if (end < 0) {
- end += target_len;
- if (end < 0)
- end = 0;
- }
-
- /* zero-length substrings always match at the first attempt */
- if (pattern_len == 0)
- return (direction > 0) ? start : end;
-
- end -= pattern_len;
-
- if (direction < 0) {
- for (; end >= start; end--)
- if (Py_STRING_MATCH(target, end, pattern, pattern_len))
- return end;
- } else {
- for (; start <= end; start++)
- if (Py_STRING_MATCH(target, start, pattern, pattern_len))
- return start;
- }
- return -1;
-}
-
-Py_LOCAL_INLINE(Py_ssize_t)
-countstring(const char *target, Py_ssize_t target_len,
- const char *pattern, Py_ssize_t pattern_len,
- Py_ssize_t start,
- Py_ssize_t end,
- int direction, Py_ssize_t maxcount)
-{
- Py_ssize_t count=0;
-
- if (start < 0) {
- start += target_len;
- if (start < 0)
- start = 0;
- }
- if (end > target_len) {
- end = target_len;
- } else if (end < 0) {
- end += target_len;
- if (end < 0)
- end = 0;
- }
-
- /* zero-length substrings match everywhere */
- if (pattern_len == 0 || maxcount == 0) {
- if (target_len+1 < maxcount)
- return target_len+1;
- return maxcount;
- }
-
- end -= pattern_len;
- if (direction < 0) {
- for (; (end >= start); end--)
- if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
- count++;
- if (--maxcount <= 0) break;
- end -= pattern_len-1;
- }
- } else {
- for (; (start <= end); start++)
- if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
- count++;
- if (--maxcount <= 0)
- break;
- start += pattern_len-1;
- }
- }
- return count;
-}
-
/* Algorithms for different cases of string replacement */
@@ -1599,30 +1502,28 @@ replace_interleave(PyByteArrayObject *self,
{
char *self_s, *result_s;
Py_ssize_t self_len, result_len;
- Py_ssize_t count, i, product;
+ Py_ssize_t count, i;
PyByteArrayObject *result;
self_len = PyByteArray_GET_SIZE(self);
- /* 1 at the end plus 1 after every character */
- count = self_len+1;
- if (maxcount < count)
+ /* 1 at the end plus 1 after every character;
+ count = min(maxcount, self_len + 1) */
+ if (maxcount <= self_len)
count = maxcount;
+ else
+ /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
+ count = self_len + 1;
/* Check for overflow */
/* result_len = count * to_len + self_len; */
- product = count * to_len;
- if (product / to_len != count) {
- PyErr_SetString(PyExc_OverflowError,
- "replace string is too long");
- return NULL;
- }
- result_len = product + self_len;
- if (result_len < 0) {
+ assert(count > 0);
+ if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
PyErr_SetString(PyExc_OverflowError,
"replace string is too long");
return NULL;
}
+ result_len = count * to_len + self_len;
if (! (result = (PyByteArrayObject *)
PyByteArray_FromStringAndSize(NULL, result_len)) )
@@ -1709,10 +1610,9 @@ replace_delete_substring(PyByteArrayObject *self,
self_len = PyByteArray_GET_SIZE(self);
self_s = PyByteArray_AS_STRING(self);
- count = countstring(self_s, self_len,
- from_s, from_len,
- 0, self_len, 1,
- maxcount);
+ count = stringlib_count(self_s, self_len,
+ from_s, from_len,
+ maxcount);
if (count == 0) {
/* no matches */
@@ -1731,9 +1631,9 @@ replace_delete_substring(PyByteArrayObject *self,
start = self_s;
end = self_s + self_len;
while (count-- > 0) {
- offset = findstring(start, end-start,
- from_s, from_len,
- 0, end-start, FORWARD);
+ offset = stringlib_find(start, end-start,
+ from_s, from_len,
+ 0);
if (offset == -1)
break;
next = start + offset;
@@ -1809,9 +1709,9 @@ replace_substring_in_place(PyByteArrayObject *self,
self_s = PyByteArray_AS_STRING(self);
self_len = PyByteArray_GET_SIZE(self);
- offset = findstring(self_s, self_len,
- from_s, from_len,
- 0, self_len, FORWARD);
+ offset = stringlib_find(self_s, self_len,
+ from_s, from_len,
+ 0);
if (offset == -1) {
/* No matches; return the original bytes */
return return_self(self);
@@ -1831,9 +1731,9 @@ replace_substring_in_place(PyByteArrayObject *self,
end = result_s + self_len;
while ( --maxcount > 0) {
- offset = findstring(start, end-start,
- from_s, from_len,
- 0, end-start, FORWARD);
+ offset = stringlib_find(start, end-start,
+ from_s, from_len,
+ 0);
if (offset==-1)
break;
Py_MEMCPY(start+offset, to_s, from_len);
@@ -1853,7 +1753,7 @@ replace_single_character(PyByteArrayObject *self,
char *self_s, *result_s;
char *start, *next, *end;
Py_ssize_t self_len, result_len;
- Py_ssize_t count, product;
+ Py_ssize_t count;
PyByteArrayObject *result;
self_s = PyByteArray_AS_STRING(self);
@@ -1867,16 +1767,12 @@ replace_single_character(PyByteArrayObject *self,
/* use the difference between current and new, hence the "-1" */
/* result_len = self_len + count * (to_len-1) */
- product = count * (to_len-1);
- if (product / (to_len-1) != count) {
+ assert(count > 0);
+ if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
return NULL;
}
- result_len = self_len + product;
- if (result_len < 0) {
- PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
- return NULL;
- }
+ result_len = self_len + count * (to_len - 1);
if ( (result = (PyByteArrayObject *)
PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
@@ -1920,15 +1816,16 @@ replace_substring(PyByteArrayObject *self,
char *self_s, *result_s;
char *start, *next, *end;
Py_ssize_t self_len, result_len;
- Py_ssize_t count, offset, product;
+ Py_ssize_t count, offset;
PyByteArrayObject *result;
self_s = PyByteArray_AS_STRING(self);
self_len = PyByteArray_GET_SIZE(self);
- count = countstring(self_s, self_len,
- from_s, from_len,
- 0, self_len, FORWARD, maxcount);
+ count = stringlib_count(self_s, self_len,
+ from_s, from_len,
+ maxcount);
+
if (count == 0) {
/* no matches, return unchanged */
return return_self(self);
@@ -1936,16 +1833,12 @@ replace_substring(PyByteArrayObject *self,
/* Check for overflow */
/* result_len = self_len + count * (to_len-from_len) */
- product = count * (to_len-from_len);
- if (product / (to_len-from_len) != count) {
- PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
- return NULL;
- }
- result_len = self_len + product;
- if (result_len < 0) {
+ assert(count > 0);
+ if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
return NULL;
}
+ result_len = self_len + count * (to_len - from_len);
if ( (result = (PyByteArrayObject *)
PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
@@ -1955,9 +1848,9 @@ replace_substring(PyByteArrayObject *self,
start = self_s;
end = self_s + self_len;
while (count-- > 0) {
- offset = findstring(start, end-start,
- from_s, from_len,
- 0, end-start, FORWARD);
+ offset = stringlib_find(start, end-start,
+ from_s, from_len,
+ 0);
if (offset == -1)
break;
next = start+offset;
@@ -2086,123 +1979,6 @@ bytearray_replace(PyByteArrayObject *self, PyObject *args)
return res;
}
-
-/* Overallocate the initial list to reduce the number of reallocs for small
- split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
- resizes, to sizes 4, 8, then 16. Most observed string splits are for human
- text (roughly 11 words per line) and field delimited data (usually 1-10
- fields). For large strings the split algorithms are bandwidth limited
- so increasing the preallocation likely will not improve things.*/
-
-#define MAX_PREALLOC 12
-
-/* 5 splits gives 6 elements */
-#define PREALLOC_SIZE(maxsplit) \
- (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
-
-#define SPLIT_APPEND(data, left, right) \
- str = PyByteArray_FromStringAndSize((data) + (left), \
- (right) - (left)); \
- if (str == NULL) \
- goto onError; \
- if (PyList_Append(list, str)) { \
- Py_DECREF(str); \
- goto onError; \
- } \
- else \
- Py_DECREF(str);
-
-#define SPLIT_ADD(data, left, right) { \
- str = PyByteArray_FromStringAndSize((data) + (left), \
- (right) - (left)); \
- if (str == NULL) \
- goto onError; \
- if (count < MAX_PREALLOC) { \
- PyList_SET_ITEM(list, count, str); \
- } else { \
- if (PyList_Append(list, str)) { \
- Py_DECREF(str); \
- goto onError; \
- } \
- else \
- Py_DECREF(str); \
- } \
- count++; }
-
-/* Always force the list to the expected size. */
-#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
-
-
-Py_LOCAL_INLINE(PyObject *)
-split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
-{
- register Py_ssize_t i, j, count = 0;
- PyObject *str;
- PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
-
- if (list == NULL)
- return NULL;
-
- i = j = 0;
- while ((j < len) && (maxcount-- > 0)) {
- for(; j < len; j++) {
- /* I found that using memchr makes no difference */
- if (s[j] == ch) {
- SPLIT_ADD(s, i, j);
- i = j = j + 1;
- break;
- }
- }
- }
- if (i <= len) {
- SPLIT_ADD(s, i, len);
- }
- FIX_PREALLOC_SIZE(list);
- return list;
-
- onError:
- Py_DECREF(list);
- return NULL;
-}
-
-
-Py_LOCAL_INLINE(PyObject *)
-split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
-{
- register Py_ssize_t i, j, count = 0;
- PyObject *str;
- PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
-
- if (list == NULL)
- return NULL;
-
- for (i = j = 0; i < len; ) {
- /* find a token */
- while (i < len && Py_ISSPACE(s[i]))
- i++;
- j = i;
- while (i < len && !Py_ISSPACE(s[i]))
- i++;
- if (j < i) {
- if (maxcount-- <= 0)
- break;
- SPLIT_ADD(s, j, i);
- while (i < len && Py_ISSPACE(s[i]))
- i++;
- j = i;
- }
- }
- if (j < len) {
- SPLIT_ADD(s, j, len);
- }
- FIX_PREALLOC_SIZE(list);
- return list;
-
- onError:
- Py_DECREF(list);
- return NULL;
-}
-
PyDoc_STRVAR(split__doc__,
"B.split([sep[, maxsplit]]) -> list of bytearrays\n\
\n\
@@ -2214,14 +1990,11 @@ If maxsplit is given, at most maxsplit splits are done.");
static PyObject *
bytearray_split(PyByteArrayObject *self, PyObject *args)
{
- Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
- Py_ssize_t maxsplit = -1, count = 0;
+ Py_ssize_t len = PyByteArray_GET_SIZE(self), n;
+ Py_ssize_t maxsplit = -1;
const char *s = PyByteArray_AS_STRING(self), *sub;
- PyObject *list, *str, *subobj = Py_None;
+ PyObject *list, *subobj = Py_None;
Py_buffer vsub;
-#ifdef USE_FAST
- Py_ssize_t pos;
-#endif
if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
return NULL;
@@ -2229,86 +2002,18 @@ bytearray_split(PyByteArrayObject *self, PyObject *args)
maxsplit = PY_SSIZE_T_MAX;
if (subobj == Py_None)
- return split_whitespace(s, len, maxsplit);
+ return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
if (_getbuffer(subobj, &vsub) < 0)
return NULL;
sub = vsub.buf;
n = vsub.len;
- if (n == 0) {
- PyErr_SetString(PyExc_ValueError, "empty separator");
- PyBuffer_Release(&vsub);
- return NULL;
- }
- if (n == 1) {
- list = split_char(s, len, sub[0], maxsplit);
- PyBuffer_Release(&vsub);
- return list;
- }
-
- list = PyList_New(PREALLOC_SIZE(maxsplit));
- if (list == NULL) {
- PyBuffer_Release(&vsub);
- return NULL;
- }
-
-#ifdef USE_FAST
- i = j = 0;
- while (maxsplit-- > 0) {
- pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
- if (pos < 0)
- break;
- j = i+pos;
- SPLIT_ADD(s, i, j);
- i = j + n;
- }
-#else
- i = j = 0;
- while ((j+n <= len) && (maxsplit-- > 0)) {
- for (; j+n <= len; j++) {
- if (Py_STRING_MATCH(s, j, sub, n)) {
- SPLIT_ADD(s, i, j);
- i = j = j + n;
- break;
- }
- }
- }
-#endif
- SPLIT_ADD(s, i, len);
- FIX_PREALLOC_SIZE(list);
+ list = stringlib_split(
+ (PyObject*) self, s, len, sub, n, maxsplit
+ );
PyBuffer_Release(&vsub);
return list;
-
- onError:
- Py_DECREF(list);
- PyBuffer_Release(&vsub);
- return NULL;
-}
-
-/* stringlib's partition shares nullbytes in some cases.
- undo this, we don't want the nullbytes to be shared. */
-static PyObject *
-make_nullbytes_unique(PyObject *result)
-{
- if (result != NULL) {
- int i;
- assert(PyTuple_Check(result));
- assert(PyTuple_GET_SIZE(result) == 3);
- for (i = 0; i < 3; i++) {
- if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) {
- PyObject *new = PyByteArray_FromStringAndSize(NULL, 0);
- if (new == NULL) {
- Py_DECREF(result);
- result = NULL;
- break;
- }
- Py_DECREF(nullbytes);
- PyTuple_SET_ITEM(result, i, new);
- }
- }
- }
- return result;
}
PyDoc_STRVAR(partition__doc__,
@@ -2335,7 +2040,7 @@ bytearray_partition(PyByteArrayObject *self, PyObject *sep_obj)
);
Py_DECREF(bytesep);
- return make_nullbytes_unique(result);
+ return result;
}
PyDoc_STRVAR(rpartition__doc__,
@@ -2363,81 +2068,7 @@ bytearray_rpartition(PyByteArrayObject *self, PyObject *sep_obj)
);
Py_DECREF(bytesep);
- return make_nullbytes_unique(result);
-}
-
-Py_LOCAL_INLINE(PyObject *)
-rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
-{
- register Py_ssize_t i, j, count=0;
- PyObject *str;
- PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
-
- if (list == NULL)
- return NULL;
-
- i = j = len - 1;
- while ((i >= 0) && (maxcount-- > 0)) {
- for (; i >= 0; i--) {
- if (s[i] == ch) {
- SPLIT_ADD(s, i + 1, j + 1);
- j = i = i - 1;
- break;
- }
- }
- }
- if (j >= -1) {
- SPLIT_ADD(s, 0, j + 1);
- }
- FIX_PREALLOC_SIZE(list);
- if (PyList_Reverse(list) < 0)
- goto onError;
-
- return list;
-
- onError:
- Py_DECREF(list);
- return NULL;
-}
-
-Py_LOCAL_INLINE(PyObject *)
-rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
-{
- register Py_ssize_t i, j, count = 0;
- PyObject *str;
- PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
-
- if (list == NULL)
- return NULL;
-
- for (i = j = len - 1; i >= 0; ) {
- /* find a token */
- while (i >= 0 && Py_ISSPACE(s[i]))
- i--;
- j = i;
- while (i >= 0 && !Py_ISSPACE(s[i]))
- i--;
- if (j > i) {
- if (maxcount-- <= 0)
- break;
- SPLIT_ADD(s, i + 1, j + 1);
- while (i >= 0 && Py_ISSPACE(s[i]))
- i--;
- j = i;
- }
- }
- if (j >= 0) {
- SPLIT_ADD(s, 0, j + 1);
- }
- FIX_PREALLOC_SIZE(list);
- if (PyList_Reverse(list) < 0)
- goto onError;
-
- return list;
-
- onError:
- Py_DECREF(list);
- return NULL;
+ return result;
}
PyDoc_STRVAR(rsplit__doc__,
@@ -2452,10 +2083,10 @@ If maxsplit is given, at most maxsplit splits are done.");
static PyObject *
bytearray_rsplit(PyByteArrayObject *self, PyObject *args)
{
- Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
- Py_ssize_t maxsplit = -1, count = 0;
+ Py_ssize_t len = PyByteArray_GET_SIZE(self), n;
+ Py_ssize_t maxsplit = -1;
const char *s = PyByteArray_AS_STRING(self), *sub;
- PyObject *list, *str, *subobj = Py_None;
+ PyObject *list, *subobj = Py_None;
Py_buffer vsub;
if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
@@ -2464,54 +2095,18 @@ bytearray_rsplit(PyByteArrayObject *self, PyObject *args)
maxsplit = PY_SSIZE_T_MAX;
if (subobj == Py_None)
- return rsplit_whitespace(s, len, maxsplit);
+ return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
if (_getbuffer(subobj, &vsub) < 0)
return NULL;
sub = vsub.buf;
n = vsub.len;
- if (n == 0) {
- PyErr_SetString(PyExc_ValueError, "empty separator");
- PyBuffer_Release(&vsub);
- return NULL;
- }
- else if (n == 1) {
- list = rsplit_char(s, len, sub[0], maxsplit);
- PyBuffer_Release(&vsub);
- return list;
- }
-
- list = PyList_New(PREALLOC_SIZE(maxsplit));
- if (list == NULL) {
- PyBuffer_Release(&vsub);
- return NULL;
- }
-
- j = len;
- i = j - n;
-
- while ( (i >= 0) && (maxsplit-- > 0) ) {
- for (; i>=0; i--) {
- if (Py_STRING_MATCH(s, i, sub, n)) {
- SPLIT_ADD(s, i + n, j);
- j = i;
- i -= n;
- break;
- }
- }
- }
- SPLIT_ADD(s, 0, j);
- FIX_PREALLOC_SIZE(list);
- if (PyList_Reverse(list) < 0)
- goto onError;
+ list = stringlib_rsplit(
+ (PyObject*) self, s, len, sub, n, maxsplit
+ );
PyBuffer_Release(&vsub);
return list;
-
-onError:
- Py_DECREF(list);
- PyBuffer_Release(&vsub);
- return NULL;
}
PyDoc_STRVAR(reverse__doc__,
@@ -2687,8 +2282,8 @@ bytearray_pop(PyByteArrayObject *self, PyObject *args)
return NULL;
if (n == 0) {
- PyErr_SetString(PyExc_OverflowError,
- "cannot pop an empty bytearray");
+ PyErr_SetString(PyExc_IndexError,
+ "pop from empty bytearray");
return NULL;
}
if (where < 0)
@@ -2867,22 +2462,23 @@ bytearray_rstrip(PyByteArrayObject *self, PyObject *args)
}
PyDoc_STRVAR(decode_doc,
-"B.decode([encoding[, errors]]) -> str\n\
+"B.decode(encoding='utf-8', errors='strict') -> str\n\
\n\
-Decode B using the codec registered for encoding. encoding defaults\n\
-to the default encoding. errors may be given to set a different error\n\
+Decode B using the codec registered for encoding. Default encoding\n\
+is 'utf-8'. errors may be given to set a different error\n\
handling scheme. Default is 'strict' meaning that encoding errors raise\n\
a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
as well as any other name registered with codecs.register_error that is\n\
able to handle UnicodeDecodeErrors.");
static PyObject *
-bytearray_decode(PyObject *self, PyObject *args)
+bytearray_decode(PyObject *self, PyObject *args, PyObject *kwargs)
{
const char *encoding = NULL;
const char *errors = NULL;
+ static char *kwlist[] = {"encoding", "errors", 0};
- if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
return NULL;
if (encoding == NULL)
encoding = PyUnicode_GetDefaultEncoding();
@@ -2976,6 +2572,27 @@ bytearray_join(PyByteArrayObject *self, PyObject *it)
return NULL;
}
+PyDoc_STRVAR(splitlines__doc__,
+"B.splitlines([keepends]) -> list of lines\n\
+\n\
+Return a list of the lines in B, breaking at line boundaries.\n\
+Line breaks are not included in the resulting list unless keepends\n\
+is given and true.");
+
+static PyObject*
+bytearray_splitlines(PyObject *self, PyObject *args)
+{
+ int keepends = 0;
+
+ if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
+ return NULL;
+
+ return stringlib_splitlines(
+ (PyObject*) self, PyByteArray_AS_STRING(self),
+ PyByteArray_GET_SIZE(self), keepends
+ );
+}
+
PyDoc_STRVAR(fromhex_doc,
"bytearray.fromhex(string) -> bytearray (static method)\n\
\n\
@@ -3112,7 +2729,7 @@ bytearray_methods[] = {
_Py_capitalize__doc__},
{"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
{"count", (PyCFunction)bytearray_count, METH_VARARGS, count__doc__},
- {"decode", (PyCFunction)bytearray_decode, METH_VARARGS, decode_doc},
+ {"decode", (PyCFunction)bytearray_decode, METH_VARARGS | METH_KEYWORDS, decode_doc},
{"endswith", (PyCFunction)bytearray_endswith, METH_VARARGS, endswith__doc__},
{"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
expandtabs__doc__},
@@ -3154,7 +2771,7 @@ bytearray_methods[] = {
{"rsplit", (PyCFunction)bytearray_rsplit, METH_VARARGS, rsplit__doc__},
{"rstrip", (PyCFunction)bytearray_rstrip, METH_VARARGS, rstrip__doc__},
{"split", (PyCFunction)bytearray_split, METH_VARARGS, split__doc__},
- {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
+ {"splitlines", (PyCFunction)bytearray_splitlines, METH_VARARGS,
splitlines__doc__},
{"startswith", (PyCFunction)bytearray_startswith, METH_VARARGS ,
startswith__doc__},