diff options
Diffstat (limited to 'Objects/bytesobject.c')
-rw-r--r-- | Objects/bytesobject.c | 775 |
1 files changed, 211 insertions, 564 deletions
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index e6ab440..d63fabc 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -56,7 +56,7 @@ static PyBytesObject *nullstring; If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1' bytes (setting the last byte to the null terminating character) and you can fill in the data yourself. If `str' is non-NULL then the resulting - PyString object must be treated as immutable and you must not fill in nor + PyBytes object must be treated as immutable and you must not fill in nor alter the data yourself, since the strings may be shared. The PyObject member `op->ob_size', which denotes the number of "extra @@ -173,20 +173,12 @@ PyBytes_FromFormatV(const char *format, va_list vargs) char *s; PyObject* string; -#ifdef VA_LIST_IS_ARRAY - Py_MEMCPY(count, vargs, sizeof(va_list)); -#else -#ifdef __va_copy - __va_copy(count, vargs); -#else - count = vargs; -#endif -#endif + Py_VA_COPY(count, vargs); /* step 1: figure out how large a buffer we need */ for (f = format; *f; f++) { if (*f == '%') { const char* p = f; - while (*++f && *f != '%' && !ISALPHA(*f)) + while (*++f && *f != '%' && !Py_ISALPHA(*f)) ; /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since @@ -255,15 +247,15 @@ PyBytes_FromFormatV(const char *format, va_list vargs) /* parse the width.precision part (we're only interested in the precision value, if any) */ n = 0; - while (ISDIGIT(*f)) + while (Py_ISDIGIT(*f)) n = (n*10) + *f++ - '0'; if (*f == '.') { f++; n = 0; - while (ISDIGIT(*f)) + while (Py_ISDIGIT(*f)) n = (n*10) + *f++ - '0'; } - while (*f && *f != '%' && !ISALPHA(*f)) + while (*f && *f != '%' && !Py_ISALPHA(*f)) f++; /* handle the long flag, but only for %ld and %lu. others can be added when necessary. */ @@ -454,22 +446,22 @@ PyObject *PyBytes_DecodeEscape(const char *s, *p++ = c; break; case 'x': - if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) { + if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) { unsigned int x = 0; c = Py_CHARMASK(*s); s++; - if (ISDIGIT(c)) + if (Py_ISDIGIT(c)) x = c - '0'; - else if (ISLOWER(c)) + else if (Py_ISLOWER(c)) x = 10 + c - 'a'; else x = 10 + c - 'A'; x = x << 4; c = Py_CHARMASK(*s); s++; - if (ISDIGIT(c)) + if (Py_ISDIGIT(c)) x += c - '0'; - else if (ISLOWER(c)) + else if (Py_ISLOWER(c)) x += 10 + c - 'a'; else x += 10 + c - 'A'; @@ -563,29 +555,15 @@ PyBytes_AsStringAndSize(register PyObject *obj, /* Methods */ #include "stringlib/stringdefs.h" -#define STRINGLIB_CHAR char - -#define STRINGLIB_CMP memcmp -#define STRINGLIB_LEN PyBytes_GET_SIZE -#define STRINGLIB_NEW PyBytes_FromStringAndSize -#define STRINGLIB_STR PyBytes_AS_STRING -/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */ - -#define STRINGLIB_EMPTY nullstring -#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact -#define STRINGLIB_MUTABLE 0 #include "stringlib/fastsearch.h" - #include "stringlib/count.h" #include "stringlib/find.h" #include "stringlib/partition.h" +#include "stringlib/split.h" #include "stringlib/ctype.h" -#include "stringlib/transmogrify.h" -#define _Py_InsertThousandsGrouping _PyBytes_InsertThousandsGrouping -#define _Py_InsertThousandsGroupingLocale _PyBytes_InsertThousandsGroupingLocale -#include "stringlib/localeutil.h" +#include "stringlib/transmogrify.h" PyObject * PyBytes_Repr(PyObject *obj, int smartquotes) @@ -593,13 +571,14 @@ PyBytes_Repr(PyObject *obj, int smartquotes) static const char *hexdigits = "0123456789abcdef"; register PyBytesObject* op = (PyBytesObject*) obj; Py_ssize_t length = Py_SIZE(op); - size_t newsize = 3 + 4 * length; + size_t newsize; PyObject *v; - if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) { + if (length > (PY_SSIZE_T_MAX - 3) / 4) { PyErr_SetString(PyExc_OverflowError, "bytes object is too large to make repr"); return NULL; } + newsize = 3 + 4 * length; v = PyUnicode_FromUnicode(NULL, newsize); if (v == NULL) { return NULL; @@ -746,12 +725,12 @@ bytes_repeat(register PyBytesObject *a, register Py_ssize_t n) /* watch out for overflows: the size can overflow int, * and the # of bytes needed can overflow size_t */ - size = Py_SIZE(a) * n; - if (n && size / n != Py_SIZE(a)) { + if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) { PyErr_SetString(PyExc_OverflowError, "repeated bytes are too long"); return NULL; } + size = Py_SIZE(a) * n; if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) { Py_INCREF(a); return (PyObject *)a; @@ -889,12 +868,12 @@ bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op) return result; } -static long +static Py_hash_t bytes_hash(PyBytesObject *a) { register Py_ssize_t len; register unsigned char *p; - register long x; + register Py_hash_t x; if (a->ob_shash != -1) return a->ob_shash; @@ -911,7 +890,7 @@ bytes_hash(PyBytesObject *a) x = _Py_HashSecret.prefix; x ^= *p << 7; while (--len >= 0) - x = (1000003*x) ^ *p++; + x = (_PyHASH_MULTIPLIER*x) ^ *p++; x ^= Py_SIZE(a); x ^= _Py_HashSecret.suffix; if (x == -1) @@ -942,7 +921,7 @@ bytes_subscript(PyBytesObject* self, PyObject* item) char* result_buf; PyObject* result; - if (PySlice_GetIndicesEx((PySliceObject*)item, + if (PySlice_GetIndicesEx(item, PyBytes_GET_SIZE(self), &start, &stop, &step, &slicelength) < 0) { return NULL; @@ -1024,133 +1003,6 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"}; #define STRIPNAME(i) (stripformat[i]+3) - -/* Don't call if length < 2 */ -#define Py_STRING_MATCH(target, offset, pattern, length) \ - (target[offset] == pattern[0] && \ - target[offset+length-1] == pattern[length-1] && \ - !memcmp(target+offset+1, pattern+1, length-2) ) - - -/* Overallocate the initial list to reduce the number of reallocs for small - split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three - resizes, to sizes 4, 8, then 16. Most observed string splits are for human - text (roughly 11 words per line) and field delimited data (usually 1-10 - fields). For large strings the split algorithms are bandwidth limited - so increasing the preallocation likely will not improve things.*/ - -#define MAX_PREALLOC 12 - -/* 5 splits gives 6 elements */ -#define PREALLOC_SIZE(maxsplit) \ - (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1) - -#define SPLIT_ADD(data, left, right) { \ - str = PyBytes_FromStringAndSize((data) + (left), \ - (right) - (left)); \ - if (str == NULL) \ - goto onError; \ - if (count < MAX_PREALLOC) { \ - PyList_SET_ITEM(list, count, str); \ - } else { \ - if (PyList_Append(list, str)) { \ - Py_DECREF(str); \ - goto onError; \ - } \ - else \ - Py_DECREF(str); \ - } \ - count++; } - -/* Always force the list to the expected size. */ -#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count - -#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; } -#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; } -#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; } -#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; } - -Py_LOCAL_INLINE(PyObject *) -split_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit) -{ - const char *s = PyBytes_AS_STRING(self); - Py_ssize_t i, j, count=0; - PyObject *str; - PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit)); - - if (list == NULL) - return NULL; - - i = j = 0; - - while (maxsplit-- > 0) { - SKIP_SPACE(s, i, len); - if (i==len) break; - j = i; i++; - SKIP_NONSPACE(s, i, len); - if (j == 0 && i == len && PyBytes_CheckExact(self)) { - /* No whitespace in self, so just use it as list[0] */ - Py_INCREF(self); - PyList_SET_ITEM(list, 0, (PyObject *)self); - count++; - break; - } - SPLIT_ADD(s, j, i); - } - - if (i < len) { - /* Only occurs when maxsplit was reached */ - /* Skip any remaining whitespace and copy to end of string */ - SKIP_SPACE(s, i, len); - if (i != len) - SPLIT_ADD(s, i, len); - } - FIX_PREALLOC_SIZE(list); - return list; - onError: - Py_DECREF(list); - return NULL; -} - -Py_LOCAL_INLINE(PyObject *) -split_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount) -{ - const char *s = PyBytes_AS_STRING(self); - register Py_ssize_t i, j, count=0; - PyObject *str; - PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); - - if (list == NULL) - return NULL; - - i = j = 0; - while ((j < len) && (maxcount-- > 0)) { - for(; j<len; j++) { - /* I found that using memchr makes no difference */ - if (s[j] == ch) { - SPLIT_ADD(s, i, j); - i = j = j + 1; - break; - } - } - } - if (i == 0 && count == 0 && PyBytes_CheckExact(self)) { - /* ch not in self, so just use self as list[0] */ - Py_INCREF(self); - PyList_SET_ITEM(list, 0, (PyObject *)self); - count++; - } - else if (i <= len) { - SPLIT_ADD(s, i, len); - } - FIX_PREALLOC_SIZE(list); - return list; - - onError: - Py_DECREF(list); - return NULL; -} - PyDoc_STRVAR(split__doc__, "B.split([sep[, maxsplit]]) -> list of bytes\n\ \n\ @@ -1162,74 +1014,26 @@ If maxsplit is given, at most maxsplit splits are done."); static PyObject * bytes_split(PyBytesObject *self, PyObject *args) { - Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j; - Py_ssize_t maxsplit = -1, count=0; + Py_ssize_t len = PyBytes_GET_SIZE(self), n; + Py_ssize_t maxsplit = -1; const char *s = PyBytes_AS_STRING(self), *sub; Py_buffer vsub; - PyObject *list, *str, *subobj = Py_None; -#ifdef USE_FAST - Py_ssize_t pos; -#endif + PyObject *list, *subobj = Py_None; if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit)) return NULL; if (maxsplit < 0) maxsplit = PY_SSIZE_T_MAX; if (subobj == Py_None) - return split_whitespace(self, len, maxsplit); + return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit); if (_getbuffer(subobj, &vsub) < 0) return NULL; sub = vsub.buf; n = vsub.len; - if (n == 0) { - PyErr_SetString(PyExc_ValueError, "empty separator"); - PyBuffer_Release(&vsub); - return NULL; - } - else if (n == 1) { - list = split_char(self, len, sub[0], maxsplit); - PyBuffer_Release(&vsub); - return list; - } - - list = PyList_New(PREALLOC_SIZE(maxsplit)); - if (list == NULL) { - PyBuffer_Release(&vsub); - return NULL; - } - -#ifdef USE_FAST - i = j = 0; - while (maxsplit-- > 0) { - pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH); - if (pos < 0) - break; - j = i+pos; - SPLIT_ADD(s, i, j); - i = j + n; - } -#else - i = j = 0; - while ((j+n <= len) && (maxsplit-- > 0)) { - for (; j+n <= len; j++) { - if (Py_STRING_MATCH(s, j, sub, n)) { - SPLIT_ADD(s, i, j); - i = j = j + n; - break; - } - } - } -#endif - SPLIT_ADD(s, i, len); - FIX_PREALLOC_SIZE(list); + list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit); PyBuffer_Release(&vsub); return list; - - onError: - Py_DECREF(list); - PyBuffer_Release(&vsub); - return NULL; } PyDoc_STRVAR(partition__doc__, @@ -1287,90 +1091,6 @@ bytes_rpartition(PyBytesObject *self, PyObject *sep_obj) ); } -Py_LOCAL_INLINE(PyObject *) -rsplit_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit) -{ - const char *s = PyBytes_AS_STRING(self); - Py_ssize_t i, j, count=0; - PyObject *str; - PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit)); - - if (list == NULL) - return NULL; - - i = j = len-1; - - while (maxsplit-- > 0) { - RSKIP_SPACE(s, i); - if (i<0) break; - j = i; i--; - RSKIP_NONSPACE(s, i); - if (j == len-1 && i < 0 && PyBytes_CheckExact(self)) { - /* No whitespace in self, so just use it as list[0] */ - Py_INCREF(self); - PyList_SET_ITEM(list, 0, (PyObject *)self); - count++; - break; - } - SPLIT_ADD(s, i + 1, j + 1); - } - if (i >= 0) { - /* Only occurs when maxsplit was reached. Skip any remaining - whitespace and copy to beginning of string. */ - RSKIP_SPACE(s, i); - if (i >= 0) - SPLIT_ADD(s, 0, i + 1); - - } - FIX_PREALLOC_SIZE(list); - if (PyList_Reverse(list) < 0) - goto onError; - return list; - onError: - Py_DECREF(list); - return NULL; -} - -Py_LOCAL_INLINE(PyObject *) -rsplit_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount) -{ - const char *s = PyBytes_AS_STRING(self); - register Py_ssize_t i, j, count=0; - PyObject *str; - PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); - - if (list == NULL) - return NULL; - - i = j = len - 1; - while ((i >= 0) && (maxcount-- > 0)) { - for (; i >= 0; i--) { - if (s[i] == ch) { - SPLIT_ADD(s, i + 1, j + 1); - j = i = i - 1; - break; - } - } - } - if (i < 0 && count == 0 && PyBytes_CheckExact(self)) { - /* ch not in self, so just use self as list[0] */ - Py_INCREF(self); - PyList_SET_ITEM(list, 0, (PyObject *)self); - count++; - } - else if (j >= -1) { - SPLIT_ADD(s, 0, j + 1); - } - FIX_PREALLOC_SIZE(list); - if (PyList_Reverse(list) < 0) - goto onError; - return list; - - onError: - Py_DECREF(list); - return NULL; -} - PyDoc_STRVAR(rsplit__doc__, "B.rsplit([sep[, maxsplit]]) -> list of bytes\n\ \n\ @@ -1384,71 +1104,28 @@ If maxsplit is given, at most maxsplit splits are done."); static PyObject * bytes_rsplit(PyBytesObject *self, PyObject *args) { - Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j; - Py_ssize_t maxsplit = -1, count=0; - const char *s, *sub; + Py_ssize_t len = PyBytes_GET_SIZE(self), n; + Py_ssize_t maxsplit = -1; + const char *s = PyBytes_AS_STRING(self), *sub; Py_buffer vsub; - PyObject *list, *str, *subobj = Py_None; + PyObject *list, *subobj = Py_None; if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit)) return NULL; if (maxsplit < 0) maxsplit = PY_SSIZE_T_MAX; if (subobj == Py_None) - return rsplit_whitespace(self, len, maxsplit); + return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit); if (_getbuffer(subobj, &vsub) < 0) return NULL; sub = vsub.buf; n = vsub.len; - if (n == 0) { - PyErr_SetString(PyExc_ValueError, "empty separator"); - PyBuffer_Release(&vsub); - return NULL; - } - else if (n == 1) { - list = rsplit_char(self, len, sub[0], maxsplit); - PyBuffer_Release(&vsub); - return list; - } - - list = PyList_New(PREALLOC_SIZE(maxsplit)); - if (list == NULL) { - PyBuffer_Release(&vsub); - return NULL; - } - - j = len; - i = j - n; - - s = PyBytes_AS_STRING(self); - while ( (i >= 0) && (maxsplit-- > 0) ) { - for (; i>=0; i--) { - if (Py_STRING_MATCH(s, i, sub, n)) { - SPLIT_ADD(s, i + n, j); - j = i; - i -= n; - break; - } - } - } - SPLIT_ADD(s, 0, j); - FIX_PREALLOC_SIZE(list); - if (PyList_Reverse(list) < 0) - goto onError; + list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit); PyBuffer_Release(&vsub); return list; - -onError: - Py_DECREF(list); - PyBuffer_Release(&vsub); - return NULL; } -#undef SPLIT_ADD -#undef MAX_PREALLOC -#undef PREALLOC_SIZE - PyDoc_STRVAR(join__doc__, "B.join(iterable_of_bytes) -> bytes\n\ @@ -1555,20 +1232,20 @@ _PyBytes_Join(PyObject *sep, PyObject *x) return bytes_join(sep, x); } -Py_LOCAL_INLINE(void) -bytes_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len) -{ - if (*end > len) - *end = len; - else if (*end < 0) - *end += len; - if (*end < 0) - *end = 0; - if (*start < 0) - *start += len; - if (*start < 0) - *start = 0; -} +/* helper macro to fixup start/end slice values */ +#define ADJUST_INDICES(start, end, len) \ + if (end > len) \ + end = len; \ + else if (end < 0) { \ + end += len; \ + if (end < 0) \ + end = 0; \ + } \ + if (start < 0) { \ + start += len; \ + if (start < 0) \ + start = 0; \ + } Py_LOCAL_INLINE(Py_ssize_t) bytes_find_internal(PyBytesObject *self, PyObject *args, int dir) @@ -1605,8 +1282,8 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir) PyDoc_STRVAR(find__doc__, "B.find(sub[, start[, end]]) -> int\n\ \n\ -Return the lowest index in S where substring sub is found,\n\ -such that sub is contained within s[start:end]. Optional\n\ +Return the lowest index in B where substring sub is found,\n\ +such that sub is contained within B[start:end]. Optional\n\ arguments start and end are interpreted as in slice notation.\n\ \n\ Return -1 on failure."); @@ -1645,7 +1322,7 @@ PyDoc_STRVAR(rfind__doc__, "B.rfind(sub[, start[, end]]) -> int\n\ \n\ Return the highest index in B where substring sub is found,\n\ -such that sub is contained within s[start:end]. Optional\n\ +such that sub is contained within B[start:end]. Optional\n\ arguments start and end are interpreted as in slice notation.\n\ \n\ Return -1 on failure."); @@ -1729,7 +1406,7 @@ do_strip(PyBytesObject *self, int striptype) i = 0; if (striptype != RIGHTSTRIP) { - while (i < len && ISSPACE(s[i])) { + while (i < len && Py_ISSPACE(s[i])) { i++; } } @@ -1738,7 +1415,7 @@ do_strip(PyBytesObject *self, int striptype) if (striptype != LEFTSTRIP) { do { j--; - } while (j >= i && ISSPACE(s[j])); + } while (j >= i && Py_ISSPACE(s[j])); j++; } @@ -1770,7 +1447,7 @@ PyDoc_STRVAR(strip__doc__, "B.strip([bytes]) -> bytes\n\ \n\ Strip leading and trailing bytes contained in the argument.\n\ -If the argument is omitted, strip trailing ASCII whitespace."); +If the argument is omitted, strip leading and trailing ASCII whitespace."); static PyObject * bytes_strip(PyBytesObject *self, PyObject *args) { @@ -1815,7 +1492,7 @@ PyDoc_STRVAR(count__doc__, "B.count(sub[, start[, end]]) -> int\n\ \n\ Return the number of non-overlapping occurrences of substring sub in\n\ -string S[start:end]. Optional arguments start and end are interpreted\n\ +string B[start:end]. Optional arguments start and end are interpreted\n\ as in slice notation."); static PyObject * @@ -1836,10 +1513,10 @@ bytes_count(PyBytesObject *self, PyObject *args) else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len)) return NULL; - bytes_adjust_indices(&start, &end, PyBytes_GET_SIZE(self)); + ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self)); return PyLong_FromSsize_t( - stringlib_count(str + start, end - start, sub, sub_len) + stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX) ); } @@ -1956,9 +1633,6 @@ bytes_maketrans(PyObject *null, PyObject *args) return _Py_bytes_maketrans(args); } -#define FORWARD 1 -#define REVERSE -1 - /* find and count characters and substrings */ #define findchar(target, target_len, c) \ @@ -1994,94 +1668,6 @@ countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount return count; } -Py_LOCAL(Py_ssize_t) -findstring(const char *target, Py_ssize_t target_len, - const char *pattern, Py_ssize_t pattern_len, - Py_ssize_t start, - Py_ssize_t end, - int direction) -{ - if (start < 0) { - start += target_len; - if (start < 0) - start = 0; - } - if (end > target_len) { - end = target_len; - } else if (end < 0) { - end += target_len; - if (end < 0) - end = 0; - } - - /* zero-length substrings always match at the first attempt */ - if (pattern_len == 0) - return (direction > 0) ? start : end; - - end -= pattern_len; - - if (direction < 0) { - for (; end >= start; end--) - if (Py_STRING_MATCH(target, end, pattern, pattern_len)) - return end; - } else { - for (; start <= end; start++) - if (Py_STRING_MATCH(target, start,pattern,pattern_len)) - return start; - } - return -1; -} - -Py_LOCAL_INLINE(Py_ssize_t) -countstring(const char *target, Py_ssize_t target_len, - const char *pattern, Py_ssize_t pattern_len, - Py_ssize_t start, - Py_ssize_t end, - int direction, Py_ssize_t maxcount) -{ - Py_ssize_t count=0; - - if (start < 0) { - start += target_len; - if (start < 0) - start = 0; - } - if (end > target_len) { - end = target_len; - } else if (end < 0) { - end += target_len; - if (end < 0) - end = 0; - } - - /* zero-length substrings match everywhere */ - if (pattern_len == 0 || maxcount == 0) { - if (target_len+1 < maxcount) - return target_len+1; - return maxcount; - } - - end -= pattern_len; - if (direction < 0) { - for (; (end >= start); end--) - if (Py_STRING_MATCH(target, end,pattern,pattern_len)) { - count++; - if (--maxcount <= 0) break; - end -= pattern_len-1; - } - } else { - for (; (start <= end); start++) - if (Py_STRING_MATCH(target, start, - pattern, pattern_len)) { - count++; - if (--maxcount <= 0) - break; - start += pattern_len-1; - } - } - return count; -} - /* Algorithms for different cases of string replacement */ @@ -2093,30 +1679,28 @@ replace_interleave(PyBytesObject *self, { char *self_s, *result_s; Py_ssize_t self_len, result_len; - Py_ssize_t count, i, product; + Py_ssize_t count, i; PyBytesObject *result; self_len = PyBytes_GET_SIZE(self); - /* 1 at the end plus 1 after every character */ - count = self_len+1; - if (maxcount < count) + /* 1 at the end plus 1 after every character; + count = min(maxcount, self_len + 1) */ + if (maxcount <= self_len) count = maxcount; + else + /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */ + count = self_len + 1; /* Check for overflow */ /* result_len = count * to_len + self_len; */ - product = count * to_len; - if (product / to_len != count) { - PyErr_SetString(PyExc_OverflowError, - "replacement bytes are too long"); - return NULL; - } - result_len = product + self_len; - if (result_len < 0) { + assert(count > 0); + if (to_len > (PY_SSIZE_T_MAX - self_len) / count) { PyErr_SetString(PyExc_OverflowError, "replacement bytes are too long"); return NULL; } + result_len = count * to_len + self_len; if (! (result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, result_len)) ) @@ -2202,10 +1786,9 @@ replace_delete_substring(PyBytesObject *self, self_len = PyBytes_GET_SIZE(self); self_s = PyBytes_AS_STRING(self); - count = countstring(self_s, self_len, - from_s, from_len, - 0, self_len, 1, - maxcount); + count = stringlib_count(self_s, self_len, + from_s, from_len, + maxcount); if (count == 0) { /* no matches */ @@ -2224,9 +1807,9 @@ replace_delete_substring(PyBytesObject *self, start = self_s; end = self_s + self_len; while (count-- > 0) { - offset = findstring(start, end-start, - from_s, from_len, - 0, end-start, FORWARD); + offset = stringlib_find(start, end-start, + from_s, from_len, + 0); if (offset == -1) break; next = start + offset; @@ -2302,9 +1885,9 @@ replace_substring_in_place(PyBytesObject *self, self_s = PyBytes_AS_STRING(self); self_len = PyBytes_GET_SIZE(self); - offset = findstring(self_s, self_len, - from_s, from_len, - 0, self_len, FORWARD); + offset = stringlib_find(self_s, self_len, + from_s, from_len, + 0); if (offset == -1) { /* No matches; return the original string */ return return_self(self); @@ -2324,9 +1907,9 @@ replace_substring_in_place(PyBytesObject *self, end = result_s + self_len; while ( --maxcount > 0) { - offset = findstring(start, end-start, - from_s, from_len, - 0, end-start, FORWARD); + offset = stringlib_find(start, end-start, + from_s, from_len, + 0); if (offset==-1) break; Py_MEMCPY(start+offset, to_s, from_len); @@ -2346,7 +1929,7 @@ replace_single_character(PyBytesObject *self, char *self_s, *result_s; char *start, *next, *end; Py_ssize_t self_len, result_len; - Py_ssize_t count, product; + Py_ssize_t count; PyBytesObject *result; self_s = PyBytes_AS_STRING(self); @@ -2360,18 +1943,13 @@ replace_single_character(PyBytesObject *self, /* use the difference between current and new, hence the "-1" */ /* result_len = self_len + count * (to_len-1) */ - product = count * (to_len-1); - if (product / (to_len-1) != count) { + assert(count > 0); + if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) { PyErr_SetString(PyExc_OverflowError, "replacement bytes are too long"); return NULL; } - result_len = self_len + product; - if (result_len < 0) { - PyErr_SetString(PyExc_OverflowError, - "replacment bytes are too long"); - return NULL; - } + result_len = self_len + count * (to_len - 1); if ( (result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, result_len)) == NULL) @@ -2414,15 +1992,16 @@ replace_substring(PyBytesObject *self, char *self_s, *result_s; char *start, *next, *end; Py_ssize_t self_len, result_len; - Py_ssize_t count, offset, product; + Py_ssize_t count, offset; PyBytesObject *result; self_s = PyBytes_AS_STRING(self); self_len = PyBytes_GET_SIZE(self); - count = countstring(self_s, self_len, - from_s, from_len, - 0, self_len, FORWARD, maxcount); + count = stringlib_count(self_s, self_len, + from_s, from_len, + maxcount); + if (count == 0) { /* no matches, return unchanged */ return return_self(self); @@ -2430,18 +2009,13 @@ replace_substring(PyBytesObject *self, /* Check for overflow */ /* result_len = self_len + count * (to_len-from_len) */ - product = count * (to_len-from_len); - if (product / (to_len-from_len) != count) { - PyErr_SetString(PyExc_OverflowError, - "replacement bytes are too long"); - return NULL; - } - result_len = self_len + product; - if (result_len < 0) { + assert(count > 0); + if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) { PyErr_SetString(PyExc_OverflowError, "replacement bytes are too long"); return NULL; } + result_len = self_len + count * (to_len-from_len); if ( (result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, result_len)) == NULL) @@ -2451,9 +2025,9 @@ replace_substring(PyBytesObject *self, start = self_s; end = self_s + self_len; while (count-- > 0) { - offset = findstring(start, end-start, - from_s, from_len, - 0, end-start, FORWARD); + offset = stringlib_find(start, end-start, + from_s, from_len, + 0); if (offset == -1) break; next = start+offset; @@ -2611,7 +2185,7 @@ _bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start, return -1; str = PyBytes_AS_STRING(self); - bytes_adjust_indices(&start, &end, len); + ADJUST_INDICES(start, end, len); if (direction < 0) { /* startswith */ @@ -2720,22 +2294,23 @@ bytes_endswith(PyBytesObject *self, PyObject *args) PyDoc_STRVAR(decode__doc__, -"B.decode([encoding[, errors]]) -> str\n\ +"B.decode(encoding='utf-8', errors='strict') -> str\n\ \n\ -Decode S using the codec registered for encoding. encoding defaults\n\ -to the default encoding. errors may be given to set a different error\n\ +Decode B using the codec registered for encoding. Default encoding\n\ +is 'utf-8'. errors may be given to set a different error\n\ handling scheme. Default is 'strict' meaning that encoding errors raise\n\ a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\ as well as any other name registerd with codecs.register_error that is\n\ able to handle UnicodeDecodeErrors."); static PyObject * -bytes_decode(PyObject *self, PyObject *args) +bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs) { const char *encoding = NULL; const char *errors = NULL; + static char *kwlist[] = {"encoding", "errors", 0}; - if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors)) return NULL; if (encoding == NULL) encoding = PyUnicode_GetDefaultEncoding(); @@ -2743,6 +2318,28 @@ bytes_decode(PyObject *self, PyObject *args) } +PyDoc_STRVAR(splitlines__doc__, +"B.splitlines([keepends]) -> list of lines\n\ +\n\ +Return a list of the lines in B, breaking at line boundaries.\n\ +Line breaks are not included in the resulting list unless keepends\n\ +is given and true."); + +static PyObject* +bytes_splitlines(PyObject *self, PyObject *args) +{ + int keepends = 0; + + if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends)) + return NULL; + + return stringlib_splitlines( + (PyObject*) self, PyBytes_AS_STRING(self), + PyBytes_GET_SIZE(self), keepends + ); +} + + PyDoc_STRVAR(fromhex_doc, "bytes.fromhex(string) -> bytes\n\ \n\ @@ -2755,11 +2352,11 @@ hex_digit_to_int(Py_UNICODE c) { if (c >= 128) return -1; - if (ISDIGIT(c)) + if (Py_ISDIGIT(c)) return c - '0'; else { - if (ISUPPER(c)) - c = TOLOWER(c); + if (Py_ISUPPER(c)) + c = Py_TOLOWER(c); if (c >= 'a' && c <= 'f') return c - 'a' + 10; } @@ -2825,7 +2422,7 @@ bytes_sizeof(PyBytesObject *v) static PyObject * bytes_getnewargs(PyBytesObject *v) { - return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v)); + return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v)); } @@ -2836,7 +2433,7 @@ bytes_methods[] = { _Py_capitalize__doc__}, {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__}, {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__}, - {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode__doc__}, + {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__}, {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__}, {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS, @@ -2875,7 +2472,7 @@ bytes_methods[] = { {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__}, {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__}, {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__}, - {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS, + {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS, splitlines__doc__}, {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS, startswith__doc__}, @@ -2902,6 +2499,7 @@ bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds) const char *encoding = NULL; const char *errors = NULL; PyObject *new = NULL; + Py_ssize_t size; static char *kwlist[] = {"source", "encoding", "errors", 0}; if (type != &PyBytes_Type) @@ -2932,28 +2530,7 @@ bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds) assert(PyBytes_Check(new)); return new; } - - /* If it's not unicode, there can't be encoding or errors */ - if (encoding != NULL || errors != NULL) { - PyErr_SetString(PyExc_TypeError, - "encoding or errors without a string argument"); - return NULL; - } - return PyObject_Bytes(x); -} - -PyObject * -PyBytes_FromObject(PyObject *x) -{ - PyObject *new, *it; - Py_ssize_t i, size; - - if (x == NULL) { - PyErr_BadInternalCall(); - return NULL; - } - - /* Is it an int? */ + /* Is it an integer? */ size = PyNumber_AsSsize_t(x, PyExc_OverflowError); if (size == -1 && PyErr_Occurred()) { if (PyErr_ExceptionMatches(PyExc_OverflowError)) @@ -2975,6 +2552,25 @@ PyBytes_FromObject(PyObject *x) return new; } + /* If it's not unicode, there can't be encoding or errors */ + if (encoding != NULL || errors != NULL) { + PyErr_SetString(PyExc_TypeError, + "encoding or errors without a string argument"); + return NULL; + } + return PyObject_Bytes(x); +} + +PyObject * +PyBytes_FromObject(PyObject *x) +{ + PyObject *new, *it; + Py_ssize_t i, size; + + if (x == NULL) { + PyErr_BadInternalCall(); + return NULL; + } /* Use the modern buffer interface */ if (PyObject_CheckBuffer(x)) { Py_buffer view; @@ -2994,18 +2590,68 @@ PyBytes_FromObject(PyObject *x) PyBuffer_Release(&view); return NULL; } + if (PyUnicode_Check(x)) { + PyErr_SetString(PyExc_TypeError, + "cannot convert unicode object to bytes"); + return NULL; + } + + if (PyList_CheckExact(x)) { + new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x)); + if (new == NULL) + return NULL; + for (i = 0; i < Py_SIZE(x); i++) { + Py_ssize_t value = PyNumber_AsSsize_t( + PyList_GET_ITEM(x, i), PyExc_ValueError); + if (value == -1 && PyErr_Occurred()) { + Py_DECREF(new); + return NULL; + } + if (value < 0 || value >= 256) { + PyErr_SetString(PyExc_ValueError, + "bytes must be in range(0, 256)"); + Py_DECREF(new); + return NULL; + } + ((PyBytesObject *)new)->ob_sval[i] = (char) value; + } + return new; + } + if (PyTuple_CheckExact(x)) { + new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x)); + if (new == NULL) + return NULL; + for (i = 0; i < Py_SIZE(x); i++) { + Py_ssize_t value = PyNumber_AsSsize_t( + PyTuple_GET_ITEM(x, i), PyExc_ValueError); + if (value == -1 && PyErr_Occurred()) { + Py_DECREF(new); + return NULL; + } + if (value < 0 || value >= 256) { + PyErr_SetString(PyExc_ValueError, + "bytes must be in range(0, 256)"); + Py_DECREF(new); + return NULL; + } + ((PyBytesObject *)new)->ob_sval[i] = (char) value; + } + return new; + } /* For iterator version, create a string object and resize as needed */ - /* XXX(gb): is 64 a good value? also, optimize if length is known */ - /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the - input being a truly long iterator. */ - size = 64; + size = _PyObject_LengthHint(x, 64); + if (size == -1 && PyErr_Occurred()) + return NULL; + /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from + returning a shared empty bytes string. This required because we + want to call _PyBytes_Resize() the returned object, which we can + only do on bytes objects with refcount == 1. */ + size += 1; new = PyBytes_FromStringAndSize(NULL, size); if (new == NULL) return NULL; - /* XXX Optimize this if the arguments is a list, tuple */ - /* Get the iterator */ it = PyObject_GetIter(x); if (it == NULL) @@ -3039,7 +2685,7 @@ PyBytes_FromObject(PyObject *x) /* Append the byte */ if (i >= size) { - size *= 2; + size = 2 * size + 1; if (_PyBytes_Resize(&new, size) < 0) goto error; } @@ -3085,13 +2731,14 @@ PyDoc_STRVAR(bytes_doc, "bytes(iterable_of_ints) -> bytes\n\ bytes(string, encoding[, errors]) -> bytes\n\ bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\ -bytes(memory_view) -> bytes\n\ +bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\ +bytes() -> empty bytes object\n\ \n\ Construct an immutable array of bytes from:\n\ - an iterable yielding integers in range(256)\n\ - a text string encoded using the specified encoding\n\ - - a bytes or a buffer object\n\ - - any object implementing the buffer API."); + - any object implementing the buffer API.\n\ + - an integer"); static PyObject *bytes_iter(PyObject *seq); @@ -3210,7 +2857,7 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize) /* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and * the F_ALT flag, for Python's long (unbounded) ints. It's not used for * Python's regular ints. - * Return value: a new PyString*, or NULL if error. + * Return value: a new PyBytes*, or NULL if error. * . *pbuf is set to point into it, * *plen set to the # of chars following that. * Caller must decref it when done using pbuf. |