diff options
Diffstat (limited to 'Objects/bytesobject.c')
-rw-r--r-- | Objects/bytesobject.c | 1313 |
1 files changed, 1012 insertions, 301 deletions
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index b9b49ac..d981e0e 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -7,6 +7,13 @@ #include "bytes_methods.h" #include <stddef.h> +/*[clinic input] +class bytes "PyBytesObject*" "&PyBytes_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/ + +#include "clinic/bytesobject.c.h" + #ifdef COUNT_ALLOCS Py_ssize_t null_strings, one_strings; #endif @@ -44,15 +51,12 @@ static PyBytesObject *nullstring; PyBytes_FromStringAndSize()) or the length of the string in the `str' parameter (for PyBytes_FromString()). */ -PyObject * -PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) +static PyObject * +_PyBytes_FromSize(Py_ssize_t size, int use_calloc) { PyBytesObject *op; - if (size < 0) { - PyErr_SetString(PyExc_SystemError, - "Negative size passed to PyBytes_FromStringAndSize"); - return NULL; - } + assert(size >= 0); + if (size == 0 && (op = nullstring) != NULL) { #ifdef COUNT_ALLOCS null_strings++; @@ -60,36 +64,60 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) Py_INCREF(op); return (PyObject *)op; } - if (size == 1 && str != NULL && - (op = characters[*str & UCHAR_MAX]) != NULL) - { -#ifdef COUNT_ALLOCS - one_strings++; -#endif - Py_INCREF(op); - return (PyObject *)op; - } - if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) { + if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) { PyErr_SetString(PyExc_OverflowError, "byte string is too large"); return NULL; } /* Inline PyObject_NewVar */ - op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size); + if (use_calloc) + op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size); + else + op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size); if (op == NULL) return PyErr_NoMemory(); (void)PyObject_INIT_VAR(op, &PyBytes_Type, size); op->ob_shash = -1; - if (str != NULL) - Py_MEMCPY(op->ob_sval, str, size); - op->ob_sval[size] = '\0'; - /* share short strings */ + if (!use_calloc) + op->ob_sval[size] = '\0'; + /* empty byte string singleton */ if (size == 0) { nullstring = op; Py_INCREF(op); - } else if (size == 1 && str != NULL) { + } + return (PyObject *) op; +} + +PyObject * +PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) +{ + PyBytesObject *op; + if (size < 0) { + PyErr_SetString(PyExc_SystemError, + "Negative size passed to PyBytes_FromStringAndSize"); + return NULL; + } + if (size == 1 && str != NULL && + (op = characters[*str & UCHAR_MAX]) != NULL) + { +#ifdef COUNT_ALLOCS + one_strings++; +#endif + Py_INCREF(op); + return (PyObject *)op; + } + + op = (PyBytesObject *)_PyBytes_FromSize(size, 0); + if (op == NULL) + return NULL; + if (str == NULL) + return (PyObject *) op; + + Py_MEMCPY(op->ob_sval, str, size); + /* share short strings */ + if (size == 1) { characters[*str & UCHAR_MAX] = op; Py_INCREF(op); } @@ -347,6 +375,580 @@ PyBytes_FromFormat(const char *format, ...) return ret; } +/* Helpers for formatstring */ + +Py_LOCAL_INLINE(PyObject *) +getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) +{ + Py_ssize_t argidx = *p_argidx; + if (argidx < arglen) { + (*p_argidx)++; + if (arglen < 0) + return args; + else + return PyTuple_GetItem(args, argidx); + } + PyErr_SetString(PyExc_TypeError, + "not enough arguments for format string"); + return NULL; +} + +/* Format codes + * F_LJUST '-' + * F_SIGN '+' + * F_BLANK ' ' + * F_ALT '#' + * F_ZERO '0' + */ +#define F_LJUST (1<<0) +#define F_SIGN (1<<1) +#define F_BLANK (1<<2) +#define F_ALT (1<<3) +#define F_ZERO (1<<4) + +/* Returns a new reference to a PyBytes object, or NULL on failure. */ + +static PyObject * +formatfloat(PyObject *v, int flags, int prec, int type) +{ + char *p; + PyObject *result; + double x; + + x = PyFloat_AsDouble(v); + if (x == -1.0 && PyErr_Occurred()) { + PyErr_Format(PyExc_TypeError, "float argument required, " + "not %.200s", Py_TYPE(v)->tp_name); + return NULL; + } + + if (prec < 0) + prec = 6; + + p = PyOS_double_to_string(x, type, prec, + (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL); + + if (p == NULL) + return NULL; + result = PyBytes_FromStringAndSize(p, strlen(p)); + PyMem_Free(p); + return result; +} + +static PyObject * +formatlong(PyObject *v, int flags, int prec, int type) +{ + PyObject *result, *iobj; + if (type == 'i') + type = 'd'; + if (PyLong_Check(v)) + return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type); + if (PyNumber_Check(v)) { + /* make sure number is a type of integer for o, x, and X */ + if (type == 'o' || type == 'x' || type == 'X') + iobj = PyNumber_Index(v); + else + iobj = PyNumber_Long(v); + if (iobj == NULL) { + if (!PyErr_ExceptionMatches(PyExc_TypeError)) + return NULL; + } + else if (!PyLong_Check(iobj)) + Py_CLEAR(iobj); + if (iobj != NULL) { + result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type); + Py_DECREF(iobj); + return result; + } + } + PyErr_Format(PyExc_TypeError, + "%%%c format: %s is required, not %.200s", type, + (type == 'o' || type == 'x' || type == 'X') ? "an integer" + : "a number", + Py_TYPE(v)->tp_name); + return NULL; +} + +static int +byte_converter(PyObject *arg, char *p) +{ + if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1) { + *p = PyBytes_AS_STRING(arg)[0]; + return 1; + } + else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1) { + *p = PyByteArray_AS_STRING(arg)[0]; + return 1; + } + else { + PyObject *iobj; + long ival; + int overflow; + /* make sure number is a type of integer */ + if (PyLong_Check(arg)) { + ival = PyLong_AsLongAndOverflow(arg, &overflow); + } + else { + iobj = PyNumber_Index(arg); + if (iobj == NULL) { + if (!PyErr_ExceptionMatches(PyExc_TypeError)) + return 0; + goto onError; + } + ival = PyLong_AsLongAndOverflow(iobj, &overflow); + Py_DECREF(iobj); + } + if (!overflow && ival == -1 && PyErr_Occurred()) + goto onError; + if (overflow || !(0 <= ival && ival <= 255)) { + PyErr_SetString(PyExc_OverflowError, + "%c arg not in range(256)"); + return 0; + } + *p = (char)ival; + return 1; + } + onError: + PyErr_SetString(PyExc_TypeError, + "%c requires an integer in range(256) or a single byte"); + return 0; +} + +static PyObject * +format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen) +{ + PyObject *func, *result; + _Py_IDENTIFIER(__bytes__); + /* is it a bytes object? */ + if (PyBytes_Check(v)) { + *pbuf = PyBytes_AS_STRING(v); + *plen = PyBytes_GET_SIZE(v); + Py_INCREF(v); + return v; + } + if (PyByteArray_Check(v)) { + *pbuf = PyByteArray_AS_STRING(v); + *plen = PyByteArray_GET_SIZE(v); + Py_INCREF(v); + return v; + } + /* does it support __bytes__? */ + func = _PyObject_LookupSpecial(v, &PyId___bytes__); + if (func != NULL) { + result = PyObject_CallFunctionObjArgs(func, NULL); + Py_DECREF(func); + if (result == NULL) + return NULL; + if (!PyBytes_Check(result)) { + PyErr_Format(PyExc_TypeError, + "__bytes__ returned non-bytes (type %.200s)", + Py_TYPE(result)->tp_name); + Py_DECREF(result); + return NULL; + } + *pbuf = PyBytes_AS_STRING(result); + *plen = PyBytes_GET_SIZE(result); + return result; + } + PyErr_Format(PyExc_TypeError, + "%%b requires bytes, or an object that implements __bytes__, not '%.100s'", + Py_TYPE(v)->tp_name); + return NULL; +} + +/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) + + FORMATBUFLEN is the length of the buffer in which the ints & + chars are formatted. XXX This is a magic number. Each formatting + routine does bounds checking to ensure no overflow, but a better + solution may be to malloc a buffer of appropriate size for each + format. For now, the current solution is sufficient. +*/ +#define FORMATBUFLEN (size_t)120 + +PyObject * +_PyBytes_Format(PyObject *format, PyObject *args) +{ + char *fmt, *res; + Py_ssize_t arglen, argidx; + Py_ssize_t reslen, rescnt, fmtcnt; + int args_owned = 0; + PyObject *result; + PyObject *dict = NULL; + if (format == NULL || !PyBytes_Check(format) || args == NULL) { + PyErr_BadInternalCall(); + return NULL; + } + fmt = PyBytes_AS_STRING(format); + fmtcnt = PyBytes_GET_SIZE(format); + reslen = rescnt = fmtcnt + 100; + result = PyBytes_FromStringAndSize((char *)NULL, reslen); + if (result == NULL) + return NULL; + res = PyBytes_AsString(result); + if (PyTuple_Check(args)) { + arglen = PyTuple_GET_SIZE(args); + argidx = 0; + } + else { + arglen = -1; + argidx = -2; + } + if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript && + !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) && + !PyByteArray_Check(args)) { + dict = args; + } + while (--fmtcnt >= 0) { + if (*fmt != '%') { + if (--rescnt < 0) { + rescnt = fmtcnt + 100; + reslen += rescnt; + if (_PyBytes_Resize(&result, reslen)) + return NULL; + res = PyBytes_AS_STRING(result) + + reslen - rescnt; + --rescnt; + } + *res++ = *fmt++; + } + else { + /* Got a format specifier */ + int flags = 0; + Py_ssize_t width = -1; + int prec = -1; + int c = '\0'; + int fill; + PyObject *v = NULL; + PyObject *temp = NULL; + const char *pbuf = NULL; + int sign; + Py_ssize_t len = 0; + char onechar; /* For byte_converter() */ + + fmt++; + if (*fmt == '(') { + char *keystart; + Py_ssize_t keylen; + PyObject *key; + int pcount = 1; + + if (dict == NULL) { + PyErr_SetString(PyExc_TypeError, + "format requires a mapping"); + goto error; + } + ++fmt; + --fmtcnt; + keystart = fmt; + /* Skip over balanced parentheses */ + while (pcount > 0 && --fmtcnt >= 0) { + if (*fmt == ')') + --pcount; + else if (*fmt == '(') + ++pcount; + fmt++; + } + keylen = fmt - keystart - 1; + if (fmtcnt < 0 || pcount > 0) { + PyErr_SetString(PyExc_ValueError, + "incomplete format key"); + goto error; + } + key = PyBytes_FromStringAndSize(keystart, + keylen); + if (key == NULL) + goto error; + if (args_owned) { + Py_DECREF(args); + args_owned = 0; + } + args = PyObject_GetItem(dict, key); + Py_DECREF(key); + if (args == NULL) { + goto error; + } + args_owned = 1; + arglen = -1; + argidx = -2; + } + while (--fmtcnt >= 0) { + switch (c = *fmt++) { + case '-': flags |= F_LJUST; continue; + case '+': flags |= F_SIGN; continue; + case ' ': flags |= F_BLANK; continue; + case '#': flags |= F_ALT; continue; + case '0': flags |= F_ZERO; continue; + } + break; + } + if (c == '*') { + v = getnextarg(args, arglen, &argidx); + if (v == NULL) + goto error; + if (!PyLong_Check(v)) { + PyErr_SetString(PyExc_TypeError, + "* wants int"); + goto error; + } + width = PyLong_AsSsize_t(v); + if (width == -1 && PyErr_Occurred()) + goto error; + if (width < 0) { + flags |= F_LJUST; + width = -width; + } + if (--fmtcnt >= 0) + c = *fmt++; + } + else if (c >= 0 && isdigit(c)) { + width = c - '0'; + while (--fmtcnt >= 0) { + c = Py_CHARMASK(*fmt++); + if (!isdigit(c)) + break; + if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) { + PyErr_SetString( + PyExc_ValueError, + "width too big"); + goto error; + } + width = width*10 + (c - '0'); + } + } + if (c == '.') { + prec = 0; + if (--fmtcnt >= 0) + c = *fmt++; + if (c == '*') { + v = getnextarg(args, arglen, &argidx); + if (v == NULL) + goto error; + if (!PyLong_Check(v)) { + PyErr_SetString( + PyExc_TypeError, + "* wants int"); + goto error; + } + prec = _PyLong_AsInt(v); + if (prec == -1 && PyErr_Occurred()) + goto error; + if (prec < 0) + prec = 0; + if (--fmtcnt >= 0) + c = *fmt++; + } + else if (c >= 0 && isdigit(c)) { + prec = c - '0'; + while (--fmtcnt >= 0) { + c = Py_CHARMASK(*fmt++); + if (!isdigit(c)) + break; + if (prec > (INT_MAX - ((int)c - '0')) / 10) { + PyErr_SetString( + PyExc_ValueError, + "prec too big"); + goto error; + } + prec = prec*10 + (c - '0'); + } + } + } /* prec */ + if (fmtcnt >= 0) { + if (c == 'h' || c == 'l' || c == 'L') { + if (--fmtcnt >= 0) + c = *fmt++; + } + } + if (fmtcnt < 0) { + PyErr_SetString(PyExc_ValueError, + "incomplete format"); + goto error; + } + if (c != '%') { + v = getnextarg(args, arglen, &argidx); + if (v == NULL) + goto error; + } + sign = 0; + fill = ' '; + switch (c) { + case '%': + pbuf = "%"; + len = 1; + break; + case 'r': + // %r is only for 2/3 code; 3 only code should use %a + case 'a': + temp = PyObject_ASCII(v); + if (temp == NULL) + goto error; + assert(PyUnicode_IS_ASCII(temp)); + pbuf = (const char *)PyUnicode_1BYTE_DATA(temp); + len = PyUnicode_GET_LENGTH(temp); + if (prec >= 0 && len > prec) + len = prec; + break; + case 's': + // %s is only for 2/3 code; 3 only code should use %b + case 'b': + temp = format_obj(v, &pbuf, &len); + if (temp == NULL) + goto error; + if (prec >= 0 && len > prec) + len = prec; + break; + case 'i': + case 'd': + case 'u': + case 'o': + case 'x': + case 'X': + temp = formatlong(v, flags, prec, c); + if (!temp) + goto error; + assert(PyUnicode_IS_ASCII(temp)); + pbuf = (const char *)PyUnicode_1BYTE_DATA(temp); + len = PyUnicode_GET_LENGTH(temp); + sign = 1; + if (flags & F_ZERO) + fill = '0'; + break; + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + temp = formatfloat(v, flags, prec, c); + if (temp == NULL) + goto error; + pbuf = PyBytes_AS_STRING(temp); + len = PyBytes_GET_SIZE(temp); + sign = 1; + if (flags & F_ZERO) + fill = '0'; + break; + case 'c': + pbuf = &onechar; + len = byte_converter(v, &onechar); + if (!len) + goto error; + break; + default: + PyErr_Format(PyExc_ValueError, + "unsupported format character '%c' (0x%x) " + "at index %zd", + c, c, + (Py_ssize_t)(fmt - 1 - + PyBytes_AsString(format))); + goto error; + } + if (sign) { + if (*pbuf == '-' || *pbuf == '+') { + sign = *pbuf++; + len--; + } + else if (flags & F_SIGN) + sign = '+'; + else if (flags & F_BLANK) + sign = ' '; + else + sign = 0; + } + if (width < len) + width = len; + if (rescnt - (sign != 0) < width) { + reslen -= rescnt; + rescnt = width + fmtcnt + 100; + reslen += rescnt; + if (reslen < 0) { + Py_DECREF(result); + Py_XDECREF(temp); + return PyErr_NoMemory(); + } + if (_PyBytes_Resize(&result, reslen)) { + Py_XDECREF(temp); + return NULL; + } + res = PyBytes_AS_STRING(result) + + reslen - rescnt; + } + if (sign) { + if (fill != ' ') + *res++ = sign; + rescnt--; + if (width > len) + width--; + } + if ((flags & F_ALT) && (c == 'x' || c == 'X')) { + assert(pbuf[0] == '0'); + assert(pbuf[1] == c); + if (fill != ' ') { + *res++ = *pbuf++; + *res++ = *pbuf++; + } + rescnt -= 2; + width -= 2; + if (width < 0) + width = 0; + len -= 2; + } + if (width > len && !(flags & F_LJUST)) { + do { + --rescnt; + *res++ = fill; + } while (--width > len); + } + if (fill == ' ') { + if (sign) + *res++ = sign; + if ((flags & F_ALT) && + (c == 'x' || c == 'X')) { + assert(pbuf[0] == '0'); + assert(pbuf[1] == c); + *res++ = *pbuf++; + *res++ = *pbuf++; + } + } + Py_MEMCPY(res, pbuf, len); + res += len; + rescnt -= len; + while (--width >= len) { + --rescnt; + *res++ = ' '; + } + if (dict && (argidx < arglen) && c != '%') { + PyErr_SetString(PyExc_TypeError, + "not all arguments converted during bytes formatting"); + Py_XDECREF(temp); + goto error; + } + Py_XDECREF(temp); + } /* '%' */ + } /* until end */ + if (argidx < arglen && !dict) { + PyErr_SetString(PyExc_TypeError, + "not all arguments converted during bytes formatting"); + goto error; + } + if (args_owned) { + Py_DECREF(args); + } + if (_PyBytes_Resize(&result, reslen - rescnt)) + return NULL; + return result; + + error: + Py_DECREF(result); + if (args_owned) { + Py_DECREF(args); + } + return NULL; +} + +/* =-= */ + static void bytes_dealloc(PyObject *op) { @@ -540,8 +1142,8 @@ PyBytes_AsStringAndSize(PyObject *obj, if (len != NULL) *len = PyBytes_GET_SIZE(obj); else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) { - PyErr_SetString(PyExc_TypeError, - "expected bytes with no null"); + PyErr_SetString(PyExc_ValueError, + "embedded null byte"); return -1; } return 0; @@ -819,14 +1421,23 @@ bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op) /* Make sure both arguments are strings. */ if (!(PyBytes_Check(a) && PyBytes_Check(b))) { - if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) && - (PyObject_IsInstance((PyObject*)a, - (PyObject*)&PyUnicode_Type) || - PyObject_IsInstance((PyObject*)b, - (PyObject*)&PyUnicode_Type))) { - if (PyErr_WarnEx(PyExc_BytesWarning, - "Comparison between bytes and string", 1)) - return NULL; + if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) { + if (PyObject_IsInstance((PyObject*)a, + (PyObject*)&PyUnicode_Type) || + PyObject_IsInstance((PyObject*)b, + (PyObject*)&PyUnicode_Type)) { + if (PyErr_WarnEx(PyExc_BytesWarning, + "Comparison between bytes and string", 1)) + return NULL; + } + else if (PyObject_IsInstance((PyObject*)a, + (PyObject*)&PyLong_Type) || + PyObject_IsInstance((PyObject*)b, + (PyObject*)&PyLong_Type)) { + if (PyErr_WarnEx(PyExc_BytesWarning, + "Comparison between bytes and int", 1)) + return NULL; + } } result = Py_NotImplemented; } @@ -951,7 +1562,7 @@ bytes_subscript(PyBytesObject* self, PyObject* item) } else { PyErr_Format(PyExc_TypeError, - "byte indices must be integers, not %.200s", + "byte indices must be integers or slices, not %.200s", Py_TYPE(item)->tp_name); return NULL; } @@ -991,37 +1602,34 @@ static PyBufferProcs bytes_as_buffer = { #define RIGHTSTRIP 1 #define BOTHSTRIP 2 -/* Arrays indexed by above */ -static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"}; +/*[clinic input] +bytes.split -#define STRIPNAME(i) (stripformat[i]+3) + sep: object = None + The delimiter according which to split the bytes. + None (the default value) means split on ASCII whitespace characters + (space, tab, return, newline, formfeed, vertical tab). + maxsplit: Py_ssize_t = -1 + Maximum number of splits to do. + -1 (the default value) means no limit. -PyDoc_STRVAR(split__doc__, -"B.split(sep=None, maxsplit=-1) -> list of bytes\n\ -\n\ -Return a list of the sections in B, using sep as the delimiter.\n\ -If sep is not specified or is None, B is split on ASCII whitespace\n\ -characters (space, tab, return, newline, formfeed, vertical tab).\n\ -If maxsplit is given, at most maxsplit splits are done."); +Return a list of the sections in the bytes, using sep as the delimiter. +[clinic start generated code]*/ static PyObject * -bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds) +bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit) +/*[clinic end generated code: output=8bde44dacb36ef2e input=8b809b39074abbfa]*/ { - static char *kwlist[] = {"sep", "maxsplit", 0}; Py_ssize_t len = PyBytes_GET_SIZE(self), n; - Py_ssize_t maxsplit = -1; const char *s = PyBytes_AS_STRING(self), *sub; Py_buffer vsub; - PyObject *list, *subobj = Py_None; + PyObject *list; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split", - kwlist, &subobj, &maxsplit)) - return NULL; if (maxsplit < 0) maxsplit = PY_SSIZE_T_MAX; - if (subobj == Py_None) + if (sep == Py_None) return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit); - if (PyObject_GetBuffer(subobj, &vsub, PyBUF_SIMPLE) != 0) + if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0) return NULL; sub = vsub.buf; n = vsub.len; @@ -1031,85 +1639,84 @@ bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds) return list; } -PyDoc_STRVAR(partition__doc__, -"B.partition(sep) -> (head, sep, tail)\n\ -\n\ -Search for the separator sep in B, and return the part before it,\n\ -the separator itself, and the part after it. If the separator is not\n\ -found, returns B and two empty bytes objects."); +/*[clinic input] +bytes.partition -static PyObject * -bytes_partition(PyBytesObject *self, PyObject *sep_obj) -{ - Py_buffer sep = {NULL, NULL}; - PyObject *res; + self: self(type="PyBytesObject *") + sep: Py_buffer + / - if (PyObject_GetBuffer(sep_obj, &sep, PyBUF_SIMPLE) != 0) - return NULL; +Partition the bytes into three parts using the given separator. + +This will search for the separator sep in the bytes. If the separator is found, +returns a 3-tuple containing the part before the separator, the separator +itself, and the part after it. + +If the separator is not found, returns a 3-tuple containing the original bytes +object and two empty bytes objects. +[clinic start generated code]*/ - res = stringlib_partition( +static PyObject * +bytes_partition_impl(PyBytesObject *self, Py_buffer *sep) +/*[clinic end generated code: output=f532b392a17ff695 input=bc855dc63ca949de]*/ +{ + return stringlib_partition( (PyObject*) self, PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), - sep_obj, sep.buf, sep.len + sep->obj, (const char *)sep->buf, sep->len ); - PyBuffer_Release(&sep); - return res; } -PyDoc_STRVAR(rpartition__doc__, -"B.rpartition(sep) -> (head, sep, tail)\n\ -\n\ -Search for the separator sep in B, starting at the end of B,\n\ -and return the part before it, the separator itself, and the\n\ -part after it. If the separator is not found, returns two empty\n\ -bytes objects and B."); +/*[clinic input] +bytes.rpartition -static PyObject * -bytes_rpartition(PyBytesObject *self, PyObject *sep_obj) -{ - Py_buffer sep = {NULL, NULL}; - PyObject *res; + self: self(type="PyBytesObject *") + sep: Py_buffer + / - if (PyObject_GetBuffer(sep_obj, &sep, PyBUF_SIMPLE) != 0) - return NULL; +Partition the bytes into three parts using the given separator. + +This will search for the separator sep in the bytes, starting and the end. If +the separator is found, returns a 3-tuple containing the part before the +separator, the separator itself, and the part after it. - res = stringlib_rpartition( +If the separator is not found, returns a 3-tuple containing two empty bytes +objects and the original bytes object. +[clinic start generated code]*/ + +static PyObject * +bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep) +/*[clinic end generated code: output=191b114cbb028e50 input=6588fff262a9170e]*/ +{ + return stringlib_rpartition( (PyObject*) self, PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), - sep_obj, sep.buf, sep.len + sep->obj, (const char *)sep->buf, sep->len ); - PyBuffer_Release(&sep); - return res; } -PyDoc_STRVAR(rsplit__doc__, -"B.rsplit(sep=None, maxsplit=-1) -> list of bytes\n\ -\n\ -Return a list of the sections in B, using sep as the delimiter,\n\ -starting at the end of B and working to the front.\n\ -If sep is not given, B is split on ASCII whitespace characters\n\ -(space, tab, return, newline, formfeed, vertical tab).\n\ -If maxsplit is given, at most maxsplit splits are done."); +/*[clinic input] +bytes.rsplit = bytes.split +Return a list of the sections in the bytes, using sep as the delimiter. + +Splitting is done starting at the end of the bytes and working to the front. +[clinic start generated code]*/ static PyObject * -bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds) +bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit) +/*[clinic end generated code: output=0b6570b977911d88 input=0f86c9f28f7d7b7b]*/ { - static char *kwlist[] = {"sep", "maxsplit", 0}; Py_ssize_t len = PyBytes_GET_SIZE(self), n; - Py_ssize_t maxsplit = -1; const char *s = PyBytes_AS_STRING(self), *sub; Py_buffer vsub; - PyObject *list, *subobj = Py_None; + PyObject *list; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit", - kwlist, &subobj, &maxsplit)) - return NULL; if (maxsplit < 0) maxsplit = PY_SSIZE_T_MAX; - if (subobj == Py_None) + if (sep == Py_None) return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit); - if (PyObject_GetBuffer(subobj, &vsub, PyBUF_SIMPLE) != 0) + if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0) return NULL; sub = vsub.buf; n = vsub.len; @@ -1120,16 +1727,26 @@ bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds) } -PyDoc_STRVAR(join__doc__, -"B.join(iterable_of_bytes) -> bytes\n\ -\n\ -Concatenate any number of bytes objects, with B in between each pair.\n\ -Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'."); +/*[clinic input] +bytes.join + + iterable_of_bytes: object + / + +Concatenate any number of bytes objects. + +The bytes whose method is called is inserted in between each pair. + +The result is returned as a new bytes object. + +Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'. +[clinic start generated code]*/ static PyObject * -bytes_join(PyObject *self, PyObject *iterable) +bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes) +/*[clinic end generated code: output=634aff14764ff997 input=7fe377b95bd549d2]*/ { - return stringlib_bytes_join(self, iterable); + return stringlib_bytes_join((PyObject*)self, iterable_of_bytes); } PyObject * @@ -1137,7 +1754,7 @@ _PyBytes_Join(PyObject *sep, PyObject *x) { assert(sep != NULL && PyBytes_Check(sep)); assert(x != NULL); - return bytes_join(sep, x); + return bytes_join((PyBytesObject*)sep, x); } /* helper macro to fixup start/end slice values */ @@ -1162,7 +1779,7 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir) char byte; Py_buffer subbuf; const char *sub; - Py_ssize_t sub_len; + Py_ssize_t len, sub_len; Py_ssize_t start=0, end=PY_SSIZE_T_MAX; Py_ssize_t res; @@ -1181,15 +1798,30 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir) sub = &byte; sub_len = 1; } + len = PyBytes_GET_SIZE(self); - if (dir > 0) - res = stringlib_find_slice( - PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), - sub, sub_len, start, end); - else - res = stringlib_rfind_slice( - PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), - sub, sub_len, start, end); + ADJUST_INDICES(start, end, len); + if (end - start < sub_len) + res = -1; + /* Issue #23573: FIXME, windows has no memrchr() */ + else if (sub_len == 1 && dir > 0) { + unsigned char needle = *sub; + res = stringlib_fastsearch_memchr_1char( + PyBytes_AS_STRING(self) + start, end - start, + needle, needle, FAST_SEARCH); + if (res >= 0) + res += start; + } + else { + if (dir > 0) + res = stringlib_find_slice( + PyBytes_AS_STRING(self), len, + sub, sub_len, start, end); + else + res = stringlib_rfind_slice( + PyBytes_AS_STRING(self), len, + sub, sub_len, start, end); + } if (subobj) PyBuffer_Release(&subbuf); @@ -1348,62 +1980,69 @@ do_strip(PyBytesObject *self, int striptype) Py_LOCAL_INLINE(PyObject *) -do_argstrip(PyBytesObject *self, int striptype, PyObject *args) +do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes) { - PyObject *sep = NULL; - - if (!PyArg_ParseTuple(args, stripformat[striptype], &sep)) - return NULL; - - if (sep != NULL && sep != Py_None) { - return do_xstrip(self, striptype, sep); + if (bytes != NULL && bytes != Py_None) { + return do_xstrip(self, striptype, bytes); } return do_strip(self, striptype); } +/*[clinic input] +bytes.strip + + self: self(type="PyBytesObject *") + bytes: object = None + / + +Strip leading and trailing bytes contained in the argument. + +If the argument is omitted or None, strip leading and trailing ASCII whitespace. +[clinic start generated code]*/ -PyDoc_STRVAR(strip__doc__, -"B.strip([bytes]) -> bytes\n\ -\n\ -Strip leading and trailing bytes contained in the argument.\n\ -If the argument is omitted, strip leading and trailing ASCII whitespace."); static PyObject * -bytes_strip(PyBytesObject *self, PyObject *args) +bytes_strip_impl(PyBytesObject *self, PyObject *bytes) +/*[clinic end generated code: output=c7c228d3bd104a1b input=37daa5fad1395d95]*/ { - if (PyTuple_GET_SIZE(args) == 0) - return do_strip(self, BOTHSTRIP); /* Common case */ - else - return do_argstrip(self, BOTHSTRIP, args); + return do_argstrip(self, BOTHSTRIP, bytes); } +/*[clinic input] +bytes.lstrip + + self: self(type="PyBytesObject *") + bytes: object = None + / + +Strip leading bytes contained in the argument. + +If the argument is omitted or None, strip leading ASCII whitespace. +[clinic start generated code]*/ -PyDoc_STRVAR(lstrip__doc__, -"B.lstrip([bytes]) -> bytes\n\ -\n\ -Strip leading bytes contained in the argument.\n\ -If the argument is omitted, strip leading ASCII whitespace."); static PyObject * -bytes_lstrip(PyBytesObject *self, PyObject *args) +bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes) +/*[clinic end generated code: output=28602e586f524e82 input=88811b09dfbc2988]*/ { - if (PyTuple_GET_SIZE(args) == 0) - return do_strip(self, LEFTSTRIP); /* Common case */ - else - return do_argstrip(self, LEFTSTRIP, args); + return do_argstrip(self, LEFTSTRIP, bytes); } +/*[clinic input] +bytes.rstrip + + self: self(type="PyBytesObject *") + bytes: object = None + / + +Strip trailing bytes contained in the argument. + +If the argument is omitted or None, strip trailing ASCII whitespace. +[clinic start generated code]*/ -PyDoc_STRVAR(rstrip__doc__, -"B.rstrip([bytes]) -> bytes\n\ -\n\ -Strip trailing bytes contained in the argument.\n\ -If the argument is omitted, strip trailing ASCII whitespace."); static PyObject * -bytes_rstrip(PyBytesObject *self, PyObject *args) +bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes) +/*[clinic end generated code: output=547e3815c95447da input=8f93c9cd361f0140]*/ { - if (PyTuple_GET_SIZE(args) == 0) - return do_strip(self, RIGHTSTRIP); /* Common case */ - else - return do_argstrip(self, RIGHTSTRIP, args); + return do_argstrip(self, RIGHTSTRIP, bytes); } @@ -1455,45 +2094,51 @@ bytes_count(PyBytesObject *self, PyObject *args) } -PyDoc_STRVAR(translate__doc__, -"B.translate(table[, deletechars]) -> bytes\n\ -\n\ -Return a copy of B, where all characters occurring in the\n\ -optional argument deletechars are removed, and the remaining\n\ -characters have been mapped through the given translation\n\ -table, which must be a bytes object of length 256."); +/*[clinic input] +bytes.translate + + self: self(type="PyBytesObject *") + table: object + Translation table, which must be a bytes object of length 256. + [ + deletechars: object + ] + / + +Return a copy with each character mapped by the given translation table. + +All characters occurring in the optional argument deletechars are removed. +The remaining characters are mapped through the given translation table. +[clinic start generated code]*/ static PyObject * -bytes_translate(PyBytesObject *self, PyObject *args) +bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1, + PyObject *deletechars) +/*[clinic end generated code: output=233df850eb50bf8d input=d8fa5519d7cc4be7]*/ { char *input, *output; Py_buffer table_view = {NULL, NULL}; Py_buffer del_table_view = {NULL, NULL}; - const char *table; + const char *table_chars; Py_ssize_t i, c, changed = 0; PyObject *input_obj = (PyObject*)self; - const char *output_start, *del_table=NULL; + const char *output_start, *del_table_chars=NULL; Py_ssize_t inlen, tablen, dellen = 0; PyObject *result; int trans_table[256]; - PyObject *tableobj, *delobj = NULL; - if (!PyArg_UnpackTuple(args, "translate", 1, 2, - &tableobj, &delobj)) - return NULL; - - if (PyBytes_Check(tableobj)) { - table = PyBytes_AS_STRING(tableobj); - tablen = PyBytes_GET_SIZE(tableobj); + if (PyBytes_Check(table)) { + table_chars = PyBytes_AS_STRING(table); + tablen = PyBytes_GET_SIZE(table); } - else if (tableobj == Py_None) { - table = NULL; + else if (table == Py_None) { + table_chars = NULL; tablen = 256; } else { - if (PyObject_GetBuffer(tableobj, &table_view, PyBUF_SIMPLE) != 0) + if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0) return NULL; - table = table_view.buf; + table_chars = table_view.buf; tablen = table_view.len; } @@ -1504,22 +2149,22 @@ bytes_translate(PyBytesObject *self, PyObject *args) return NULL; } - if (delobj != NULL) { - if (PyBytes_Check(delobj)) { - del_table = PyBytes_AS_STRING(delobj); - dellen = PyBytes_GET_SIZE(delobj); + if (deletechars != NULL) { + if (PyBytes_Check(deletechars)) { + del_table_chars = PyBytes_AS_STRING(deletechars); + dellen = PyBytes_GET_SIZE(deletechars); } else { - if (PyObject_GetBuffer(delobj, &del_table_view, PyBUF_SIMPLE) != 0) { + if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) { PyBuffer_Release(&table_view); return NULL; } - del_table = del_table_view.buf; + del_table_chars = del_table_view.buf; dellen = del_table_view.len; } } else { - del_table = NULL; + del_table_chars = NULL; dellen = 0; } @@ -1533,11 +2178,11 @@ bytes_translate(PyBytesObject *self, PyObject *args) output_start = output = PyBytes_AsString(result); input = PyBytes_AS_STRING(input_obj); - if (dellen == 0 && table != NULL) { + if (dellen == 0 && table_chars != NULL) { /* If no deletions are required, use faster code */ for (i = inlen; --i >= 0; ) { c = Py_CHARMASK(*input++); - if (Py_CHARMASK((*output++ = table[c])) != c) + if (Py_CHARMASK((*output++ = table_chars[c])) != c) changed = 1; } if (!changed && PyBytes_CheckExact(input_obj)) { @@ -1550,17 +2195,17 @@ bytes_translate(PyBytesObject *self, PyObject *args) return result; } - if (table == NULL) { + if (table_chars == NULL) { for (i = 0; i < 256; i++) trans_table[i] = Py_CHARMASK(i); } else { for (i = 0; i < 256; i++) - trans_table[i] = Py_CHARMASK(table[i]); + trans_table[i] = Py_CHARMASK(table_chars[i]); } PyBuffer_Release(&table_view); for (i = 0; i < dellen; i++) - trans_table[(int) Py_CHARMASK(del_table[i])] = -1; + trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1; PyBuffer_Release(&del_table_view); for (i = inlen; --i >= 0; ) { @@ -1582,10 +2227,28 @@ bytes_translate(PyBytesObject *self, PyObject *args) } +/*[clinic input] + +@staticmethod +bytes.maketrans + + frm: Py_buffer + to: Py_buffer + / + +Return a translation table useable for the bytes or bytearray translate method. + +The returned table will be one where each byte in frm is mapped to the byte at +the same position in to. + +The bytes objects frm and to must be of the same length. +[clinic start generated code]*/ + static PyObject * -bytes_maketrans(PyObject *null, PyObject *args) +bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to) +/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/ { - return _Py_bytes_maketrans(args); + return _Py_bytes_maketrans(frm, to); } /* find and count characters and substrings */ @@ -2080,31 +2743,31 @@ replace(PyBytesObject *self, } } -PyDoc_STRVAR(replace__doc__, -"B.replace(old, new[, count]) -> bytes\n\ -\n\ -Return a copy of B with all occurrences of subsection\n\ -old replaced by new. If the optional argument count is\n\ -given, only first count occurances are replaced."); -static PyObject * -bytes_replace(PyBytesObject *self, PyObject *args) -{ - PyObject *res; - Py_buffer old = {NULL, NULL}; - Py_buffer new = {NULL, NULL}; - Py_ssize_t count = -1; +/*[clinic input] +bytes.replace - if (!PyArg_ParseTuple(args, "y*y*|n:replace", &old, &new, &count)) - return NULL; + old: Py_buffer + new: Py_buffer + count: Py_ssize_t = -1 + Maximum number of occurrences to replace. + -1 (the default value) means replace all occurrences. + / - res = (PyObject *)replace((PyBytesObject *) self, - (const char *)old.buf, old.len, - (const char *)new.buf, new.len, count); +Return a copy with all occurrences of substring old replaced by new. - PyBuffer_Release(&old); - PyBuffer_Release(&new); - return res; +If the optional argument count is given, only the first count occurrences are +replaced. +[clinic start generated code]*/ + +static PyObject * +bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new, + Py_ssize_t count) +/*[clinic end generated code: output=403dc9d7a83c5a1d input=b2fbbf0bf04de8e5]*/ +{ + return (PyObject *)replace((PyBytesObject *) self, + (const char *)old->buf, old->len, + (const char *)new->buf, new->len, count); } /** End DALKE **/ @@ -2251,60 +2914,51 @@ bytes_endswith(PyBytesObject *self, PyObject *args) } -PyDoc_STRVAR(decode__doc__, -"B.decode(encoding='utf-8', errors='strict') -> str\n\ -\n\ -Decode B using the codec registered for encoding. Default encoding\n\ -is 'utf-8'. errors may be given to set a different error\n\ -handling scheme. Default is 'strict' meaning that encoding errors raise\n\ -a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\ -as well as any other name registerd with codecs.register_error that is\n\ -able to handle UnicodeDecodeErrors."); +/*[clinic input] +bytes.decode + + encoding: str(c_default="NULL") = 'utf-8' + The encoding with which to decode the bytes. + errors: str(c_default="NULL") = 'strict' + The error handling scheme to use for the handling of decoding errors. + The default is 'strict' meaning that decoding errors raise a + UnicodeDecodeError. Other possible values are 'ignore' and 'replace' + as well as any other name registered with codecs.register_error that + can handle UnicodeDecodeErrors. + +Decode the bytes using the codec registered for encoding. +[clinic start generated code]*/ static PyObject * -bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs) +bytes_decode_impl(PyBytesObject*self, const char *encoding, + const char *errors) +/*[clinic end generated code: output=2d2016ff8e0bb176 input=958174769d2a40ca]*/ { - const char *encoding = NULL; - const char *errors = NULL; - static char *kwlist[] = {"encoding", "errors", 0}; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors)) - return NULL; - return PyUnicode_FromEncodedObject(self, encoding, errors); + return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors); } -PyDoc_STRVAR(splitlines__doc__, -"B.splitlines([keepends]) -> list of lines\n\ -\n\ -Return a list of the lines in B, breaking at line boundaries.\n\ -Line breaks are not included in the resulting list unless keepends\n\ -is given and true."); +/*[clinic input] +bytes.splitlines -static PyObject* -bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds) -{ - static char *kwlist[] = {"keepends", 0}; - int keepends = 0; + keepends: int(py_default="False") = 0 - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines", - kwlist, &keepends)) - return NULL; +Return a list of the lines in the bytes, breaking at line boundaries. +Line breaks are not included in the resulting list unless keepends is given and +true. +[clinic start generated code]*/ + +static PyObject * +bytes_splitlines_impl(PyBytesObject*self, int keepends) +/*[clinic end generated code: output=995c3598f7833cad input=ddb93e3351080c8c]*/ +{ return stringlib_splitlines( (PyObject*) self, PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), keepends ); } - -PyDoc_STRVAR(fromhex_doc, -"bytes.fromhex(string) -> bytes\n\ -\n\ -Create a bytes object from a string of hexadecimal numbers.\n\ -Spaces between two numbers are accepted.\n\ -Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'."); - static int hex_digit_to_int(Py_UCS4 c) { @@ -2321,24 +2975,36 @@ hex_digit_to_int(Py_UCS4 c) return -1; } +/*[clinic input] +@classmethod +bytes.fromhex + + string: unicode + / + +Create a bytes object from a string of hexadecimal numbers. + +Spaces between two numbers are accepted. +Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'. +[clinic start generated code]*/ + static PyObject * -bytes_fromhex(PyObject *cls, PyObject *args) +bytes_fromhex_impl(PyTypeObject *type, PyObject *string) +/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/ { - PyObject *newstring, *hexobj; + PyObject *newstring; char *buf; Py_ssize_t hexlen, byteslen, i, j; int top, bot; void *data; unsigned int kind; - if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj)) + assert(PyUnicode_Check(string)); + if (PyUnicode_READY(string)) return NULL; - assert(PyUnicode_Check(hexobj)); - if (PyUnicode_READY(hexobj)) - return NULL; - kind = PyUnicode_KIND(hexobj); - data = PyUnicode_DATA(hexobj); - hexlen = PyUnicode_GET_LENGTH(hexobj); + kind = PyUnicode_KIND(string); + data = PyUnicode_DATA(string); + hexlen = PyUnicode_GET_LENGTH(string); byteslen = hexlen/2; /* This overestimates if there are spaces */ newstring = PyBytes_FromStringAndSize(NULL, byteslen); @@ -2384,14 +3050,13 @@ bytes_methods[] = { _Py_capitalize__doc__}, {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__}, {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__}, - {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__}, + BYTES_DECODE_METHODDEF {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__}, {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS, expandtabs__doc__}, {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__}, - {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS, - fromhex_doc}, + BYTES_FROMHEX_METHODDEF {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__}, {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS, _Py_isalnum__doc__}, @@ -2407,38 +3072,49 @@ bytes_methods[] = { _Py_istitle__doc__}, {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS, _Py_isupper__doc__}, - {"join", (PyCFunction)bytes_join, METH_O, join__doc__}, + BYTES_JOIN_METHODDEF {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__}, {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__}, - {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__}, - {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC, - _Py_maketrans__doc__}, - {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__}, - {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__}, + BYTES_LSTRIP_METHODDEF + BYTES_MAKETRANS_METHODDEF + BYTES_PARTITION_METHODDEF + BYTES_REPLACE_METHODDEF {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__}, {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__}, {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__}, - {"rpartition", (PyCFunction)bytes_rpartition, METH_O, - rpartition__doc__}, - {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__}, - {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__}, - {"split", (PyCFunction)bytes_split, METH_VARARGS | METH_KEYWORDS, split__doc__}, - {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS, - splitlines__doc__}, + BYTES_RPARTITION_METHODDEF + BYTES_RSPLIT_METHODDEF + BYTES_RSTRIP_METHODDEF + BYTES_SPLIT_METHODDEF + BYTES_SPLITLINES_METHODDEF {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS, startswith__doc__}, - {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__}, + BYTES_STRIP_METHODDEF {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS, _Py_swapcase__doc__}, {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__}, - {"translate", (PyCFunction)bytes_translate, METH_VARARGS, - translate__doc__}, + BYTES_TRANSLATE_METHODDEF {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__}, {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__}, {NULL, NULL} /* sentinel */ }; static PyObject * +bytes_mod(PyObject *v, PyObject *w) +{ + if (!PyBytes_Check(v)) + Py_RETURN_NOTIMPLEMENTED; + return _PyBytes_Format(v, w); +} + +static PyNumberMethods bytes_as_number = { + 0, /*nb_add*/ + 0, /*nb_subtract*/ + 0, /*nb_multiply*/ + bytes_mod, /*nb_remainder*/ +}; + +static PyObject * str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds); static PyObject * @@ -2465,7 +3141,7 @@ bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds) "argument"); return NULL; } - return PyBytes_FromString(""); + return PyBytes_FromStringAndSize(NULL, 0); } if (PyUnicode_Check(x)) { @@ -2522,11 +3198,9 @@ bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; } else { - new = PyBytes_FromStringAndSize(NULL, size); + new = _PyBytes_FromSize(size, 1); if (new == NULL) return NULL; - if (size > 0) - memset(((PyBytesObject*)new)->ob_sval, 0, size); return new; } @@ -2624,10 +3298,12 @@ PyBytes_FromObject(PyObject *x) returning a shared empty bytes string. This required because we want to call _PyBytes_Resize() the returned object, which we can only do on bytes objects with refcount == 1. */ - size += 1; + if (size == 0) + size = 1; new = PyBytes_FromStringAndSize(NULL, size); if (new == NULL) return NULL; + assert(Py_REFCNT(new) == 1); /* Get the iterator */ it = PyObject_GetIter(x); @@ -2729,7 +3405,7 @@ PyTypeObject PyBytes_Type = { 0, /* tp_setattr */ 0, /* tp_reserved */ (reprfunc)bytes_repr, /* tp_repr */ - 0, /* tp_as_number */ + &bytes_as_number, /* tp_as_number */ &bytes_as_sequence, /* tp_as_sequence */ &bytes_as_mapping, /* tp_as_mapping */ (hashfunc)bytes_hash, /* tp_hash */ @@ -2764,7 +3440,6 @@ PyTypeObject PyBytes_Type = { void PyBytes_Concat(PyObject **pv, PyObject *w) { - PyObject *v; assert(pv != NULL); if (*pv == NULL) return; @@ -2772,9 +3447,45 @@ PyBytes_Concat(PyObject **pv, PyObject *w) Py_CLEAR(*pv); return; } - v = bytes_concat(*pv, w); - Py_DECREF(*pv); - *pv = v; + + if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) { + /* Only one reference, so we can resize in place */ + Py_ssize_t oldsize; + Py_buffer wb; + + wb.len = -1; + if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) { + PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s", + Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name); + Py_CLEAR(*pv); + return; + } + + oldsize = PyBytes_GET_SIZE(*pv); + if (oldsize > PY_SSIZE_T_MAX - wb.len) { + PyErr_NoMemory(); + goto error; + } + if (_PyBytes_Resize(pv, oldsize + wb.len) < 0) + goto error; + + memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len); + PyBuffer_Release(&wb); + return; + + error: + PyBuffer_Release(&wb); + Py_CLEAR(*pv); + return; + } + + else { + /* Multiple references, need to create new object */ + PyObject *v; + v = bytes_concat(*pv, w); + Py_DECREF(*pv); + *pv = v; + } } void @@ -2785,14 +3496,14 @@ PyBytes_ConcatAndDel(PyObject **pv, PyObject *w) } -/* The following function breaks the notion that strings are immutable: - it changes the size of a string. We get away with this only if there +/* The following function breaks the notion that bytes are immutable: + it changes the size of a bytes object. We get away with this only if there is only one module referencing the object. You can also think of it - as creating a new string object and destroying the old one, only - more efficiently. In any case, don't use this if the string may + as creating a new bytes object and destroying the old one, only + more efficiently. In any case, don't use this if the bytes object may already be known to some other part of the code... - Note that if there's not enough memory to resize the string, the original - string object at *pv is deallocated, *pv is set to NULL, an "out of + Note that if there's not enough memory to resize the bytes object, the + original bytes object at *pv is deallocated, *pv is set to NULL, an "out of memory" exception is set, and -1 is returned. Else (on success) 0 is returned, and the value in *pv may or may not be the same as on input. As always, an extra byte is allocated for a trailing \0 byte (newsize @@ -2815,7 +3526,7 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize) _Py_DEC_REFTOTAL; _Py_ForgetReference(v); *pv = (PyObject *) - PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize); + PyObject_REALLOC(v, PyBytesObject_SIZE + newsize); if (*pv == NULL) { PyObject_Del(v); PyErr_NoMemory(); |