summaryrefslogtreecommitdiffstats
path: root/Objects/bytesobject.c
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/bytesobject.c')
-rw-r--r--Objects/bytesobject.c1313
1 files changed, 1012 insertions, 301 deletions
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index b9b49ac..d981e0e 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -7,6 +7,13 @@
#include "bytes_methods.h"
#include <stddef.h>
+/*[clinic input]
+class bytes "PyBytesObject*" "&PyBytes_Type"
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
+
+#include "clinic/bytesobject.c.h"
+
#ifdef COUNT_ALLOCS
Py_ssize_t null_strings, one_strings;
#endif
@@ -44,15 +51,12 @@ static PyBytesObject *nullstring;
PyBytes_FromStringAndSize()) or the length of the string in the `str'
parameter (for PyBytes_FromString()).
*/
-PyObject *
-PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
+static PyObject *
+_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
{
PyBytesObject *op;
- if (size < 0) {
- PyErr_SetString(PyExc_SystemError,
- "Negative size passed to PyBytes_FromStringAndSize");
- return NULL;
- }
+ assert(size >= 0);
+
if (size == 0 && (op = nullstring) != NULL) {
#ifdef COUNT_ALLOCS
null_strings++;
@@ -60,36 +64,60 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Py_INCREF(op);
return (PyObject *)op;
}
- if (size == 1 && str != NULL &&
- (op = characters[*str & UCHAR_MAX]) != NULL)
- {
-#ifdef COUNT_ALLOCS
- one_strings++;
-#endif
- Py_INCREF(op);
- return (PyObject *)op;
- }
- if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
+ if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
PyErr_SetString(PyExc_OverflowError,
"byte string is too large");
return NULL;
}
/* Inline PyObject_NewVar */
- op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
+ if (use_calloc)
+ op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
+ else
+ op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
if (op == NULL)
return PyErr_NoMemory();
(void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
op->ob_shash = -1;
- if (str != NULL)
- Py_MEMCPY(op->ob_sval, str, size);
- op->ob_sval[size] = '\0';
- /* share short strings */
+ if (!use_calloc)
+ op->ob_sval[size] = '\0';
+ /* empty byte string singleton */
if (size == 0) {
nullstring = op;
Py_INCREF(op);
- } else if (size == 1 && str != NULL) {
+ }
+ return (PyObject *) op;
+}
+
+PyObject *
+PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
+{
+ PyBytesObject *op;
+ if (size < 0) {
+ PyErr_SetString(PyExc_SystemError,
+ "Negative size passed to PyBytes_FromStringAndSize");
+ return NULL;
+ }
+ if (size == 1 && str != NULL &&
+ (op = characters[*str & UCHAR_MAX]) != NULL)
+ {
+#ifdef COUNT_ALLOCS
+ one_strings++;
+#endif
+ Py_INCREF(op);
+ return (PyObject *)op;
+ }
+
+ op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
+ if (op == NULL)
+ return NULL;
+ if (str == NULL)
+ return (PyObject *) op;
+
+ Py_MEMCPY(op->ob_sval, str, size);
+ /* share short strings */
+ if (size == 1) {
characters[*str & UCHAR_MAX] = op;
Py_INCREF(op);
}
@@ -347,6 +375,580 @@ PyBytes_FromFormat(const char *format, ...)
return ret;
}
+/* Helpers for formatstring */
+
+Py_LOCAL_INLINE(PyObject *)
+getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
+{
+ Py_ssize_t argidx = *p_argidx;
+ if (argidx < arglen) {
+ (*p_argidx)++;
+ if (arglen < 0)
+ return args;
+ else
+ return PyTuple_GetItem(args, argidx);
+ }
+ PyErr_SetString(PyExc_TypeError,
+ "not enough arguments for format string");
+ return NULL;
+}
+
+/* Format codes
+ * F_LJUST '-'
+ * F_SIGN '+'
+ * F_BLANK ' '
+ * F_ALT '#'
+ * F_ZERO '0'
+ */
+#define F_LJUST (1<<0)
+#define F_SIGN (1<<1)
+#define F_BLANK (1<<2)
+#define F_ALT (1<<3)
+#define F_ZERO (1<<4)
+
+/* Returns a new reference to a PyBytes object, or NULL on failure. */
+
+static PyObject *
+formatfloat(PyObject *v, int flags, int prec, int type)
+{
+ char *p;
+ PyObject *result;
+ double x;
+
+ x = PyFloat_AsDouble(v);
+ if (x == -1.0 && PyErr_Occurred()) {
+ PyErr_Format(PyExc_TypeError, "float argument required, "
+ "not %.200s", Py_TYPE(v)->tp_name);
+ return NULL;
+ }
+
+ if (prec < 0)
+ prec = 6;
+
+ p = PyOS_double_to_string(x, type, prec,
+ (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
+
+ if (p == NULL)
+ return NULL;
+ result = PyBytes_FromStringAndSize(p, strlen(p));
+ PyMem_Free(p);
+ return result;
+}
+
+static PyObject *
+formatlong(PyObject *v, int flags, int prec, int type)
+{
+ PyObject *result, *iobj;
+ if (type == 'i')
+ type = 'd';
+ if (PyLong_Check(v))
+ return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
+ if (PyNumber_Check(v)) {
+ /* make sure number is a type of integer for o, x, and X */
+ if (type == 'o' || type == 'x' || type == 'X')
+ iobj = PyNumber_Index(v);
+ else
+ iobj = PyNumber_Long(v);
+ if (iobj == NULL) {
+ if (!PyErr_ExceptionMatches(PyExc_TypeError))
+ return NULL;
+ }
+ else if (!PyLong_Check(iobj))
+ Py_CLEAR(iobj);
+ if (iobj != NULL) {
+ result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
+ Py_DECREF(iobj);
+ return result;
+ }
+ }
+ PyErr_Format(PyExc_TypeError,
+ "%%%c format: %s is required, not %.200s", type,
+ (type == 'o' || type == 'x' || type == 'X') ? "an integer"
+ : "a number",
+ Py_TYPE(v)->tp_name);
+ return NULL;
+}
+
+static int
+byte_converter(PyObject *arg, char *p)
+{
+ if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1) {
+ *p = PyBytes_AS_STRING(arg)[0];
+ return 1;
+ }
+ else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1) {
+ *p = PyByteArray_AS_STRING(arg)[0];
+ return 1;
+ }
+ else {
+ PyObject *iobj;
+ long ival;
+ int overflow;
+ /* make sure number is a type of integer */
+ if (PyLong_Check(arg)) {
+ ival = PyLong_AsLongAndOverflow(arg, &overflow);
+ }
+ else {
+ iobj = PyNumber_Index(arg);
+ if (iobj == NULL) {
+ if (!PyErr_ExceptionMatches(PyExc_TypeError))
+ return 0;
+ goto onError;
+ }
+ ival = PyLong_AsLongAndOverflow(iobj, &overflow);
+ Py_DECREF(iobj);
+ }
+ if (!overflow && ival == -1 && PyErr_Occurred())
+ goto onError;
+ if (overflow || !(0 <= ival && ival <= 255)) {
+ PyErr_SetString(PyExc_OverflowError,
+ "%c arg not in range(256)");
+ return 0;
+ }
+ *p = (char)ival;
+ return 1;
+ }
+ onError:
+ PyErr_SetString(PyExc_TypeError,
+ "%c requires an integer in range(256) or a single byte");
+ return 0;
+}
+
+static PyObject *
+format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
+{
+ PyObject *func, *result;
+ _Py_IDENTIFIER(__bytes__);
+ /* is it a bytes object? */
+ if (PyBytes_Check(v)) {
+ *pbuf = PyBytes_AS_STRING(v);
+ *plen = PyBytes_GET_SIZE(v);
+ Py_INCREF(v);
+ return v;
+ }
+ if (PyByteArray_Check(v)) {
+ *pbuf = PyByteArray_AS_STRING(v);
+ *plen = PyByteArray_GET_SIZE(v);
+ Py_INCREF(v);
+ return v;
+ }
+ /* does it support __bytes__? */
+ func = _PyObject_LookupSpecial(v, &PyId___bytes__);
+ if (func != NULL) {
+ result = PyObject_CallFunctionObjArgs(func, NULL);
+ Py_DECREF(func);
+ if (result == NULL)
+ return NULL;
+ if (!PyBytes_Check(result)) {
+ PyErr_Format(PyExc_TypeError,
+ "__bytes__ returned non-bytes (type %.200s)",
+ Py_TYPE(result)->tp_name);
+ Py_DECREF(result);
+ return NULL;
+ }
+ *pbuf = PyBytes_AS_STRING(result);
+ *plen = PyBytes_GET_SIZE(result);
+ return result;
+ }
+ PyErr_Format(PyExc_TypeError,
+ "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
+ Py_TYPE(v)->tp_name);
+ return NULL;
+}
+
+/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
+
+ FORMATBUFLEN is the length of the buffer in which the ints &
+ chars are formatted. XXX This is a magic number. Each formatting
+ routine does bounds checking to ensure no overflow, but a better
+ solution may be to malloc a buffer of appropriate size for each
+ format. For now, the current solution is sufficient.
+*/
+#define FORMATBUFLEN (size_t)120
+
+PyObject *
+_PyBytes_Format(PyObject *format, PyObject *args)
+{
+ char *fmt, *res;
+ Py_ssize_t arglen, argidx;
+ Py_ssize_t reslen, rescnt, fmtcnt;
+ int args_owned = 0;
+ PyObject *result;
+ PyObject *dict = NULL;
+ if (format == NULL || !PyBytes_Check(format) || args == NULL) {
+ PyErr_BadInternalCall();
+ return NULL;
+ }
+ fmt = PyBytes_AS_STRING(format);
+ fmtcnt = PyBytes_GET_SIZE(format);
+ reslen = rescnt = fmtcnt + 100;
+ result = PyBytes_FromStringAndSize((char *)NULL, reslen);
+ if (result == NULL)
+ return NULL;
+ res = PyBytes_AsString(result);
+ if (PyTuple_Check(args)) {
+ arglen = PyTuple_GET_SIZE(args);
+ argidx = 0;
+ }
+ else {
+ arglen = -1;
+ argidx = -2;
+ }
+ if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
+ !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
+ !PyByteArray_Check(args)) {
+ dict = args;
+ }
+ while (--fmtcnt >= 0) {
+ if (*fmt != '%') {
+ if (--rescnt < 0) {
+ rescnt = fmtcnt + 100;
+ reslen += rescnt;
+ if (_PyBytes_Resize(&result, reslen))
+ return NULL;
+ res = PyBytes_AS_STRING(result)
+ + reslen - rescnt;
+ --rescnt;
+ }
+ *res++ = *fmt++;
+ }
+ else {
+ /* Got a format specifier */
+ int flags = 0;
+ Py_ssize_t width = -1;
+ int prec = -1;
+ int c = '\0';
+ int fill;
+ PyObject *v = NULL;
+ PyObject *temp = NULL;
+ const char *pbuf = NULL;
+ int sign;
+ Py_ssize_t len = 0;
+ char onechar; /* For byte_converter() */
+
+ fmt++;
+ if (*fmt == '(') {
+ char *keystart;
+ Py_ssize_t keylen;
+ PyObject *key;
+ int pcount = 1;
+
+ if (dict == NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ "format requires a mapping");
+ goto error;
+ }
+ ++fmt;
+ --fmtcnt;
+ keystart = fmt;
+ /* Skip over balanced parentheses */
+ while (pcount > 0 && --fmtcnt >= 0) {
+ if (*fmt == ')')
+ --pcount;
+ else if (*fmt == '(')
+ ++pcount;
+ fmt++;
+ }
+ keylen = fmt - keystart - 1;
+ if (fmtcnt < 0 || pcount > 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "incomplete format key");
+ goto error;
+ }
+ key = PyBytes_FromStringAndSize(keystart,
+ keylen);
+ if (key == NULL)
+ goto error;
+ if (args_owned) {
+ Py_DECREF(args);
+ args_owned = 0;
+ }
+ args = PyObject_GetItem(dict, key);
+ Py_DECREF(key);
+ if (args == NULL) {
+ goto error;
+ }
+ args_owned = 1;
+ arglen = -1;
+ argidx = -2;
+ }
+ while (--fmtcnt >= 0) {
+ switch (c = *fmt++) {
+ case '-': flags |= F_LJUST; continue;
+ case '+': flags |= F_SIGN; continue;
+ case ' ': flags |= F_BLANK; continue;
+ case '#': flags |= F_ALT; continue;
+ case '0': flags |= F_ZERO; continue;
+ }
+ break;
+ }
+ if (c == '*') {
+ v = getnextarg(args, arglen, &argidx);
+ if (v == NULL)
+ goto error;
+ if (!PyLong_Check(v)) {
+ PyErr_SetString(PyExc_TypeError,
+ "* wants int");
+ goto error;
+ }
+ width = PyLong_AsSsize_t(v);
+ if (width == -1 && PyErr_Occurred())
+ goto error;
+ if (width < 0) {
+ flags |= F_LJUST;
+ width = -width;
+ }
+ if (--fmtcnt >= 0)
+ c = *fmt++;
+ }
+ else if (c >= 0 && isdigit(c)) {
+ width = c - '0';
+ while (--fmtcnt >= 0) {
+ c = Py_CHARMASK(*fmt++);
+ if (!isdigit(c))
+ break;
+ if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
+ PyErr_SetString(
+ PyExc_ValueError,
+ "width too big");
+ goto error;
+ }
+ width = width*10 + (c - '0');
+ }
+ }
+ if (c == '.') {
+ prec = 0;
+ if (--fmtcnt >= 0)
+ c = *fmt++;
+ if (c == '*') {
+ v = getnextarg(args, arglen, &argidx);
+ if (v == NULL)
+ goto error;
+ if (!PyLong_Check(v)) {
+ PyErr_SetString(
+ PyExc_TypeError,
+ "* wants int");
+ goto error;
+ }
+ prec = _PyLong_AsInt(v);
+ if (prec == -1 && PyErr_Occurred())
+ goto error;
+ if (prec < 0)
+ prec = 0;
+ if (--fmtcnt >= 0)
+ c = *fmt++;
+ }
+ else if (c >= 0 && isdigit(c)) {
+ prec = c - '0';
+ while (--fmtcnt >= 0) {
+ c = Py_CHARMASK(*fmt++);
+ if (!isdigit(c))
+ break;
+ if (prec > (INT_MAX - ((int)c - '0')) / 10) {
+ PyErr_SetString(
+ PyExc_ValueError,
+ "prec too big");
+ goto error;
+ }
+ prec = prec*10 + (c - '0');
+ }
+ }
+ } /* prec */
+ if (fmtcnt >= 0) {
+ if (c == 'h' || c == 'l' || c == 'L') {
+ if (--fmtcnt >= 0)
+ c = *fmt++;
+ }
+ }
+ if (fmtcnt < 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "incomplete format");
+ goto error;
+ }
+ if (c != '%') {
+ v = getnextarg(args, arglen, &argidx);
+ if (v == NULL)
+ goto error;
+ }
+ sign = 0;
+ fill = ' ';
+ switch (c) {
+ case '%':
+ pbuf = "%";
+ len = 1;
+ break;
+ case 'r':
+ // %r is only for 2/3 code; 3 only code should use %a
+ case 'a':
+ temp = PyObject_ASCII(v);
+ if (temp == NULL)
+ goto error;
+ assert(PyUnicode_IS_ASCII(temp));
+ pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
+ len = PyUnicode_GET_LENGTH(temp);
+ if (prec >= 0 && len > prec)
+ len = prec;
+ break;
+ case 's':
+ // %s is only for 2/3 code; 3 only code should use %b
+ case 'b':
+ temp = format_obj(v, &pbuf, &len);
+ if (temp == NULL)
+ goto error;
+ if (prec >= 0 && len > prec)
+ len = prec;
+ break;
+ case 'i':
+ case 'd':
+ case 'u':
+ case 'o':
+ case 'x':
+ case 'X':
+ temp = formatlong(v, flags, prec, c);
+ if (!temp)
+ goto error;
+ assert(PyUnicode_IS_ASCII(temp));
+ pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
+ len = PyUnicode_GET_LENGTH(temp);
+ sign = 1;
+ if (flags & F_ZERO)
+ fill = '0';
+ break;
+ case 'e':
+ case 'E':
+ case 'f':
+ case 'F':
+ case 'g':
+ case 'G':
+ temp = formatfloat(v, flags, prec, c);
+ if (temp == NULL)
+ goto error;
+ pbuf = PyBytes_AS_STRING(temp);
+ len = PyBytes_GET_SIZE(temp);
+ sign = 1;
+ if (flags & F_ZERO)
+ fill = '0';
+ break;
+ case 'c':
+ pbuf = &onechar;
+ len = byte_converter(v, &onechar);
+ if (!len)
+ goto error;
+ break;
+ default:
+ PyErr_Format(PyExc_ValueError,
+ "unsupported format character '%c' (0x%x) "
+ "at index %zd",
+ c, c,
+ (Py_ssize_t)(fmt - 1 -
+ PyBytes_AsString(format)));
+ goto error;
+ }
+ if (sign) {
+ if (*pbuf == '-' || *pbuf == '+') {
+ sign = *pbuf++;
+ len--;
+ }
+ else if (flags & F_SIGN)
+ sign = '+';
+ else if (flags & F_BLANK)
+ sign = ' ';
+ else
+ sign = 0;
+ }
+ if (width < len)
+ width = len;
+ if (rescnt - (sign != 0) < width) {
+ reslen -= rescnt;
+ rescnt = width + fmtcnt + 100;
+ reslen += rescnt;
+ if (reslen < 0) {
+ Py_DECREF(result);
+ Py_XDECREF(temp);
+ return PyErr_NoMemory();
+ }
+ if (_PyBytes_Resize(&result, reslen)) {
+ Py_XDECREF(temp);
+ return NULL;
+ }
+ res = PyBytes_AS_STRING(result)
+ + reslen - rescnt;
+ }
+ if (sign) {
+ if (fill != ' ')
+ *res++ = sign;
+ rescnt--;
+ if (width > len)
+ width--;
+ }
+ if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
+ assert(pbuf[0] == '0');
+ assert(pbuf[1] == c);
+ if (fill != ' ') {
+ *res++ = *pbuf++;
+ *res++ = *pbuf++;
+ }
+ rescnt -= 2;
+ width -= 2;
+ if (width < 0)
+ width = 0;
+ len -= 2;
+ }
+ if (width > len && !(flags & F_LJUST)) {
+ do {
+ --rescnt;
+ *res++ = fill;
+ } while (--width > len);
+ }
+ if (fill == ' ') {
+ if (sign)
+ *res++ = sign;
+ if ((flags & F_ALT) &&
+ (c == 'x' || c == 'X')) {
+ assert(pbuf[0] == '0');
+ assert(pbuf[1] == c);
+ *res++ = *pbuf++;
+ *res++ = *pbuf++;
+ }
+ }
+ Py_MEMCPY(res, pbuf, len);
+ res += len;
+ rescnt -= len;
+ while (--width >= len) {
+ --rescnt;
+ *res++ = ' ';
+ }
+ if (dict && (argidx < arglen) && c != '%') {
+ PyErr_SetString(PyExc_TypeError,
+ "not all arguments converted during bytes formatting");
+ Py_XDECREF(temp);
+ goto error;
+ }
+ Py_XDECREF(temp);
+ } /* '%' */
+ } /* until end */
+ if (argidx < arglen && !dict) {
+ PyErr_SetString(PyExc_TypeError,
+ "not all arguments converted during bytes formatting");
+ goto error;
+ }
+ if (args_owned) {
+ Py_DECREF(args);
+ }
+ if (_PyBytes_Resize(&result, reslen - rescnt))
+ return NULL;
+ return result;
+
+ error:
+ Py_DECREF(result);
+ if (args_owned) {
+ Py_DECREF(args);
+ }
+ return NULL;
+}
+
+/* =-= */
+
static void
bytes_dealloc(PyObject *op)
{
@@ -540,8 +1142,8 @@ PyBytes_AsStringAndSize(PyObject *obj,
if (len != NULL)
*len = PyBytes_GET_SIZE(obj);
else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
- PyErr_SetString(PyExc_TypeError,
- "expected bytes with no null");
+ PyErr_SetString(PyExc_ValueError,
+ "embedded null byte");
return -1;
}
return 0;
@@ -819,14 +1421,23 @@ bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
/* Make sure both arguments are strings. */
if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
- if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
- (PyObject_IsInstance((PyObject*)a,
- (PyObject*)&PyUnicode_Type) ||
- PyObject_IsInstance((PyObject*)b,
- (PyObject*)&PyUnicode_Type))) {
- if (PyErr_WarnEx(PyExc_BytesWarning,
- "Comparison between bytes and string", 1))
- return NULL;
+ if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
+ if (PyObject_IsInstance((PyObject*)a,
+ (PyObject*)&PyUnicode_Type) ||
+ PyObject_IsInstance((PyObject*)b,
+ (PyObject*)&PyUnicode_Type)) {
+ if (PyErr_WarnEx(PyExc_BytesWarning,
+ "Comparison between bytes and string", 1))
+ return NULL;
+ }
+ else if (PyObject_IsInstance((PyObject*)a,
+ (PyObject*)&PyLong_Type) ||
+ PyObject_IsInstance((PyObject*)b,
+ (PyObject*)&PyLong_Type)) {
+ if (PyErr_WarnEx(PyExc_BytesWarning,
+ "Comparison between bytes and int", 1))
+ return NULL;
+ }
}
result = Py_NotImplemented;
}
@@ -951,7 +1562,7 @@ bytes_subscript(PyBytesObject* self, PyObject* item)
}
else {
PyErr_Format(PyExc_TypeError,
- "byte indices must be integers, not %.200s",
+ "byte indices must be integers or slices, not %.200s",
Py_TYPE(item)->tp_name);
return NULL;
}
@@ -991,37 +1602,34 @@ static PyBufferProcs bytes_as_buffer = {
#define RIGHTSTRIP 1
#define BOTHSTRIP 2
-/* Arrays indexed by above */
-static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
+/*[clinic input]
+bytes.split
-#define STRIPNAME(i) (stripformat[i]+3)
+ sep: object = None
+ The delimiter according which to split the bytes.
+ None (the default value) means split on ASCII whitespace characters
+ (space, tab, return, newline, formfeed, vertical tab).
+ maxsplit: Py_ssize_t = -1
+ Maximum number of splits to do.
+ -1 (the default value) means no limit.
-PyDoc_STRVAR(split__doc__,
-"B.split(sep=None, maxsplit=-1) -> list of bytes\n\
-\n\
-Return a list of the sections in B, using sep as the delimiter.\n\
-If sep is not specified or is None, B is split on ASCII whitespace\n\
-characters (space, tab, return, newline, formfeed, vertical tab).\n\
-If maxsplit is given, at most maxsplit splits are done.");
+Return a list of the sections in the bytes, using sep as the delimiter.
+[clinic start generated code]*/
static PyObject *
-bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds)
+bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
+/*[clinic end generated code: output=8bde44dacb36ef2e input=8b809b39074abbfa]*/
{
- static char *kwlist[] = {"sep", "maxsplit", 0};
Py_ssize_t len = PyBytes_GET_SIZE(self), n;
- Py_ssize_t maxsplit = -1;
const char *s = PyBytes_AS_STRING(self), *sub;
Py_buffer vsub;
- PyObject *list, *subobj = Py_None;
+ PyObject *list;
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split",
- kwlist, &subobj, &maxsplit))
- return NULL;
if (maxsplit < 0)
maxsplit = PY_SSIZE_T_MAX;
- if (subobj == Py_None)
+ if (sep == Py_None)
return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
- if (PyObject_GetBuffer(subobj, &vsub, PyBUF_SIMPLE) != 0)
+ if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
return NULL;
sub = vsub.buf;
n = vsub.len;
@@ -1031,85 +1639,84 @@ bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds)
return list;
}
-PyDoc_STRVAR(partition__doc__,
-"B.partition(sep) -> (head, sep, tail)\n\
-\n\
-Search for the separator sep in B, and return the part before it,\n\
-the separator itself, and the part after it. If the separator is not\n\
-found, returns B and two empty bytes objects.");
+/*[clinic input]
+bytes.partition
-static PyObject *
-bytes_partition(PyBytesObject *self, PyObject *sep_obj)
-{
- Py_buffer sep = {NULL, NULL};
- PyObject *res;
+ self: self(type="PyBytesObject *")
+ sep: Py_buffer
+ /
- if (PyObject_GetBuffer(sep_obj, &sep, PyBUF_SIMPLE) != 0)
- return NULL;
+Partition the bytes into three parts using the given separator.
+
+This will search for the separator sep in the bytes. If the separator is found,
+returns a 3-tuple containing the part before the separator, the separator
+itself, and the part after it.
+
+If the separator is not found, returns a 3-tuple containing the original bytes
+object and two empty bytes objects.
+[clinic start generated code]*/
- res = stringlib_partition(
+static PyObject *
+bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
+/*[clinic end generated code: output=f532b392a17ff695 input=bc855dc63ca949de]*/
+{
+ return stringlib_partition(
(PyObject*) self,
PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
- sep_obj, sep.buf, sep.len
+ sep->obj, (const char *)sep->buf, sep->len
);
- PyBuffer_Release(&sep);
- return res;
}
-PyDoc_STRVAR(rpartition__doc__,
-"B.rpartition(sep) -> (head, sep, tail)\n\
-\n\
-Search for the separator sep in B, starting at the end of B,\n\
-and return the part before it, the separator itself, and the\n\
-part after it. If the separator is not found, returns two empty\n\
-bytes objects and B.");
+/*[clinic input]
+bytes.rpartition
-static PyObject *
-bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
-{
- Py_buffer sep = {NULL, NULL};
- PyObject *res;
+ self: self(type="PyBytesObject *")
+ sep: Py_buffer
+ /
- if (PyObject_GetBuffer(sep_obj, &sep, PyBUF_SIMPLE) != 0)
- return NULL;
+Partition the bytes into three parts using the given separator.
+
+This will search for the separator sep in the bytes, starting and the end. If
+the separator is found, returns a 3-tuple containing the part before the
+separator, the separator itself, and the part after it.
- res = stringlib_rpartition(
+If the separator is not found, returns a 3-tuple containing two empty bytes
+objects and the original bytes object.
+[clinic start generated code]*/
+
+static PyObject *
+bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
+/*[clinic end generated code: output=191b114cbb028e50 input=6588fff262a9170e]*/
+{
+ return stringlib_rpartition(
(PyObject*) self,
PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
- sep_obj, sep.buf, sep.len
+ sep->obj, (const char *)sep->buf, sep->len
);
- PyBuffer_Release(&sep);
- return res;
}
-PyDoc_STRVAR(rsplit__doc__,
-"B.rsplit(sep=None, maxsplit=-1) -> list of bytes\n\
-\n\
-Return a list of the sections in B, using sep as the delimiter,\n\
-starting at the end of B and working to the front.\n\
-If sep is not given, B is split on ASCII whitespace characters\n\
-(space, tab, return, newline, formfeed, vertical tab).\n\
-If maxsplit is given, at most maxsplit splits are done.");
+/*[clinic input]
+bytes.rsplit = bytes.split
+Return a list of the sections in the bytes, using sep as the delimiter.
+
+Splitting is done starting at the end of the bytes and working to the front.
+[clinic start generated code]*/
static PyObject *
-bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds)
+bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
+/*[clinic end generated code: output=0b6570b977911d88 input=0f86c9f28f7d7b7b]*/
{
- static char *kwlist[] = {"sep", "maxsplit", 0};
Py_ssize_t len = PyBytes_GET_SIZE(self), n;
- Py_ssize_t maxsplit = -1;
const char *s = PyBytes_AS_STRING(self), *sub;
Py_buffer vsub;
- PyObject *list, *subobj = Py_None;
+ PyObject *list;
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit",
- kwlist, &subobj, &maxsplit))
- return NULL;
if (maxsplit < 0)
maxsplit = PY_SSIZE_T_MAX;
- if (subobj == Py_None)
+ if (sep == Py_None)
return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
- if (PyObject_GetBuffer(subobj, &vsub, PyBUF_SIMPLE) != 0)
+ if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
return NULL;
sub = vsub.buf;
n = vsub.len;
@@ -1120,16 +1727,26 @@ bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds)
}
-PyDoc_STRVAR(join__doc__,
-"B.join(iterable_of_bytes) -> bytes\n\
-\n\
-Concatenate any number of bytes objects, with B in between each pair.\n\
-Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
+/*[clinic input]
+bytes.join
+
+ iterable_of_bytes: object
+ /
+
+Concatenate any number of bytes objects.
+
+The bytes whose method is called is inserted in between each pair.
+
+The result is returned as a new bytes object.
+
+Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
+[clinic start generated code]*/
static PyObject *
-bytes_join(PyObject *self, PyObject *iterable)
+bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
+/*[clinic end generated code: output=634aff14764ff997 input=7fe377b95bd549d2]*/
{
- return stringlib_bytes_join(self, iterable);
+ return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
}
PyObject *
@@ -1137,7 +1754,7 @@ _PyBytes_Join(PyObject *sep, PyObject *x)
{
assert(sep != NULL && PyBytes_Check(sep));
assert(x != NULL);
- return bytes_join(sep, x);
+ return bytes_join((PyBytesObject*)sep, x);
}
/* helper macro to fixup start/end slice values */
@@ -1162,7 +1779,7 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
char byte;
Py_buffer subbuf;
const char *sub;
- Py_ssize_t sub_len;
+ Py_ssize_t len, sub_len;
Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Py_ssize_t res;
@@ -1181,15 +1798,30 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
sub = &byte;
sub_len = 1;
}
+ len = PyBytes_GET_SIZE(self);
- if (dir > 0)
- res = stringlib_find_slice(
- PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
- sub, sub_len, start, end);
- else
- res = stringlib_rfind_slice(
- PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
- sub, sub_len, start, end);
+ ADJUST_INDICES(start, end, len);
+ if (end - start < sub_len)
+ res = -1;
+ /* Issue #23573: FIXME, windows has no memrchr() */
+ else if (sub_len == 1 && dir > 0) {
+ unsigned char needle = *sub;
+ res = stringlib_fastsearch_memchr_1char(
+ PyBytes_AS_STRING(self) + start, end - start,
+ needle, needle, FAST_SEARCH);
+ if (res >= 0)
+ res += start;
+ }
+ else {
+ if (dir > 0)
+ res = stringlib_find_slice(
+ PyBytes_AS_STRING(self), len,
+ sub, sub_len, start, end);
+ else
+ res = stringlib_rfind_slice(
+ PyBytes_AS_STRING(self), len,
+ sub, sub_len, start, end);
+ }
if (subobj)
PyBuffer_Release(&subbuf);
@@ -1348,62 +1980,69 @@ do_strip(PyBytesObject *self, int striptype)
Py_LOCAL_INLINE(PyObject *)
-do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
+do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
{
- PyObject *sep = NULL;
-
- if (!PyArg_ParseTuple(args, stripformat[striptype], &sep))
- return NULL;
-
- if (sep != NULL && sep != Py_None) {
- return do_xstrip(self, striptype, sep);
+ if (bytes != NULL && bytes != Py_None) {
+ return do_xstrip(self, striptype, bytes);
}
return do_strip(self, striptype);
}
+/*[clinic input]
+bytes.strip
+
+ self: self(type="PyBytesObject *")
+ bytes: object = None
+ /
+
+Strip leading and trailing bytes contained in the argument.
+
+If the argument is omitted or None, strip leading and trailing ASCII whitespace.
+[clinic start generated code]*/
-PyDoc_STRVAR(strip__doc__,
-"B.strip([bytes]) -> bytes\n\
-\n\
-Strip leading and trailing bytes contained in the argument.\n\
-If the argument is omitted, strip leading and trailing ASCII whitespace.");
static PyObject *
-bytes_strip(PyBytesObject *self, PyObject *args)
+bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
+/*[clinic end generated code: output=c7c228d3bd104a1b input=37daa5fad1395d95]*/
{
- if (PyTuple_GET_SIZE(args) == 0)
- return do_strip(self, BOTHSTRIP); /* Common case */
- else
- return do_argstrip(self, BOTHSTRIP, args);
+ return do_argstrip(self, BOTHSTRIP, bytes);
}
+/*[clinic input]
+bytes.lstrip
+
+ self: self(type="PyBytesObject *")
+ bytes: object = None
+ /
+
+Strip leading bytes contained in the argument.
+
+If the argument is omitted or None, strip leading ASCII whitespace.
+[clinic start generated code]*/
-PyDoc_STRVAR(lstrip__doc__,
-"B.lstrip([bytes]) -> bytes\n\
-\n\
-Strip leading bytes contained in the argument.\n\
-If the argument is omitted, strip leading ASCII whitespace.");
static PyObject *
-bytes_lstrip(PyBytesObject *self, PyObject *args)
+bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
+/*[clinic end generated code: output=28602e586f524e82 input=88811b09dfbc2988]*/
{
- if (PyTuple_GET_SIZE(args) == 0)
- return do_strip(self, LEFTSTRIP); /* Common case */
- else
- return do_argstrip(self, LEFTSTRIP, args);
+ return do_argstrip(self, LEFTSTRIP, bytes);
}
+/*[clinic input]
+bytes.rstrip
+
+ self: self(type="PyBytesObject *")
+ bytes: object = None
+ /
+
+Strip trailing bytes contained in the argument.
+
+If the argument is omitted or None, strip trailing ASCII whitespace.
+[clinic start generated code]*/
-PyDoc_STRVAR(rstrip__doc__,
-"B.rstrip([bytes]) -> bytes\n\
-\n\
-Strip trailing bytes contained in the argument.\n\
-If the argument is omitted, strip trailing ASCII whitespace.");
static PyObject *
-bytes_rstrip(PyBytesObject *self, PyObject *args)
+bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
+/*[clinic end generated code: output=547e3815c95447da input=8f93c9cd361f0140]*/
{
- if (PyTuple_GET_SIZE(args) == 0)
- return do_strip(self, RIGHTSTRIP); /* Common case */
- else
- return do_argstrip(self, RIGHTSTRIP, args);
+ return do_argstrip(self, RIGHTSTRIP, bytes);
}
@@ -1455,45 +2094,51 @@ bytes_count(PyBytesObject *self, PyObject *args)
}
-PyDoc_STRVAR(translate__doc__,
-"B.translate(table[, deletechars]) -> bytes\n\
-\n\
-Return a copy of B, where all characters occurring in the\n\
-optional argument deletechars are removed, and the remaining\n\
-characters have been mapped through the given translation\n\
-table, which must be a bytes object of length 256.");
+/*[clinic input]
+bytes.translate
+
+ self: self(type="PyBytesObject *")
+ table: object
+ Translation table, which must be a bytes object of length 256.
+ [
+ deletechars: object
+ ]
+ /
+
+Return a copy with each character mapped by the given translation table.
+
+All characters occurring in the optional argument deletechars are removed.
+The remaining characters are mapped through the given translation table.
+[clinic start generated code]*/
static PyObject *
-bytes_translate(PyBytesObject *self, PyObject *args)
+bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1,
+ PyObject *deletechars)
+/*[clinic end generated code: output=233df850eb50bf8d input=d8fa5519d7cc4be7]*/
{
char *input, *output;
Py_buffer table_view = {NULL, NULL};
Py_buffer del_table_view = {NULL, NULL};
- const char *table;
+ const char *table_chars;
Py_ssize_t i, c, changed = 0;
PyObject *input_obj = (PyObject*)self;
- const char *output_start, *del_table=NULL;
+ const char *output_start, *del_table_chars=NULL;
Py_ssize_t inlen, tablen, dellen = 0;
PyObject *result;
int trans_table[256];
- PyObject *tableobj, *delobj = NULL;
- if (!PyArg_UnpackTuple(args, "translate", 1, 2,
- &tableobj, &delobj))
- return NULL;
-
- if (PyBytes_Check(tableobj)) {
- table = PyBytes_AS_STRING(tableobj);
- tablen = PyBytes_GET_SIZE(tableobj);
+ if (PyBytes_Check(table)) {
+ table_chars = PyBytes_AS_STRING(table);
+ tablen = PyBytes_GET_SIZE(table);
}
- else if (tableobj == Py_None) {
- table = NULL;
+ else if (table == Py_None) {
+ table_chars = NULL;
tablen = 256;
}
else {
- if (PyObject_GetBuffer(tableobj, &table_view, PyBUF_SIMPLE) != 0)
+ if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
return NULL;
- table = table_view.buf;
+ table_chars = table_view.buf;
tablen = table_view.len;
}
@@ -1504,22 +2149,22 @@ bytes_translate(PyBytesObject *self, PyObject *args)
return NULL;
}
- if (delobj != NULL) {
- if (PyBytes_Check(delobj)) {
- del_table = PyBytes_AS_STRING(delobj);
- dellen = PyBytes_GET_SIZE(delobj);
+ if (deletechars != NULL) {
+ if (PyBytes_Check(deletechars)) {
+ del_table_chars = PyBytes_AS_STRING(deletechars);
+ dellen = PyBytes_GET_SIZE(deletechars);
}
else {
- if (PyObject_GetBuffer(delobj, &del_table_view, PyBUF_SIMPLE) != 0) {
+ if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
PyBuffer_Release(&table_view);
return NULL;
}
- del_table = del_table_view.buf;
+ del_table_chars = del_table_view.buf;
dellen = del_table_view.len;
}
}
else {
- del_table = NULL;
+ del_table_chars = NULL;
dellen = 0;
}
@@ -1533,11 +2178,11 @@ bytes_translate(PyBytesObject *self, PyObject *args)
output_start = output = PyBytes_AsString(result);
input = PyBytes_AS_STRING(input_obj);
- if (dellen == 0 && table != NULL) {
+ if (dellen == 0 && table_chars != NULL) {
/* If no deletions are required, use faster code */
for (i = inlen; --i >= 0; ) {
c = Py_CHARMASK(*input++);
- if (Py_CHARMASK((*output++ = table[c])) != c)
+ if (Py_CHARMASK((*output++ = table_chars[c])) != c)
changed = 1;
}
if (!changed && PyBytes_CheckExact(input_obj)) {
@@ -1550,17 +2195,17 @@ bytes_translate(PyBytesObject *self, PyObject *args)
return result;
}
- if (table == NULL) {
+ if (table_chars == NULL) {
for (i = 0; i < 256; i++)
trans_table[i] = Py_CHARMASK(i);
} else {
for (i = 0; i < 256; i++)
- trans_table[i] = Py_CHARMASK(table[i]);
+ trans_table[i] = Py_CHARMASK(table_chars[i]);
}
PyBuffer_Release(&table_view);
for (i = 0; i < dellen; i++)
- trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
+ trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
PyBuffer_Release(&del_table_view);
for (i = inlen; --i >= 0; ) {
@@ -1582,10 +2227,28 @@ bytes_translate(PyBytesObject *self, PyObject *args)
}
+/*[clinic input]
+
+@staticmethod
+bytes.maketrans
+
+ frm: Py_buffer
+ to: Py_buffer
+ /
+
+Return a translation table useable for the bytes or bytearray translate method.
+
+The returned table will be one where each byte in frm is mapped to the byte at
+the same position in to.
+
+The bytes objects frm and to must be of the same length.
+[clinic start generated code]*/
+
static PyObject *
-bytes_maketrans(PyObject *null, PyObject *args)
+bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
+/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
{
- return _Py_bytes_maketrans(args);
+ return _Py_bytes_maketrans(frm, to);
}
/* find and count characters and substrings */
@@ -2080,31 +2743,31 @@ replace(PyBytesObject *self,
}
}
-PyDoc_STRVAR(replace__doc__,
-"B.replace(old, new[, count]) -> bytes\n\
-\n\
-Return a copy of B with all occurrences of subsection\n\
-old replaced by new. If the optional argument count is\n\
-given, only first count occurances are replaced.");
-static PyObject *
-bytes_replace(PyBytesObject *self, PyObject *args)
-{
- PyObject *res;
- Py_buffer old = {NULL, NULL};
- Py_buffer new = {NULL, NULL};
- Py_ssize_t count = -1;
+/*[clinic input]
+bytes.replace
- if (!PyArg_ParseTuple(args, "y*y*|n:replace", &old, &new, &count))
- return NULL;
+ old: Py_buffer
+ new: Py_buffer
+ count: Py_ssize_t = -1
+ Maximum number of occurrences to replace.
+ -1 (the default value) means replace all occurrences.
+ /
- res = (PyObject *)replace((PyBytesObject *) self,
- (const char *)old.buf, old.len,
- (const char *)new.buf, new.len, count);
+Return a copy with all occurrences of substring old replaced by new.
- PyBuffer_Release(&old);
- PyBuffer_Release(&new);
- return res;
+If the optional argument count is given, only the first count occurrences are
+replaced.
+[clinic start generated code]*/
+
+static PyObject *
+bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new,
+ Py_ssize_t count)
+/*[clinic end generated code: output=403dc9d7a83c5a1d input=b2fbbf0bf04de8e5]*/
+{
+ return (PyObject *)replace((PyBytesObject *) self,
+ (const char *)old->buf, old->len,
+ (const char *)new->buf, new->len, count);
}
/** End DALKE **/
@@ -2251,60 +2914,51 @@ bytes_endswith(PyBytesObject *self, PyObject *args)
}
-PyDoc_STRVAR(decode__doc__,
-"B.decode(encoding='utf-8', errors='strict') -> str\n\
-\n\
-Decode B using the codec registered for encoding. Default encoding\n\
-is 'utf-8'. errors may be given to set a different error\n\
-handling scheme. Default is 'strict' meaning that encoding errors raise\n\
-a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
-as well as any other name registerd with codecs.register_error that is\n\
-able to handle UnicodeDecodeErrors.");
+/*[clinic input]
+bytes.decode
+
+ encoding: str(c_default="NULL") = 'utf-8'
+ The encoding with which to decode the bytes.
+ errors: str(c_default="NULL") = 'strict'
+ The error handling scheme to use for the handling of decoding errors.
+ The default is 'strict' meaning that decoding errors raise a
+ UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
+ as well as any other name registered with codecs.register_error that
+ can handle UnicodeDecodeErrors.
+
+Decode the bytes using the codec registered for encoding.
+[clinic start generated code]*/
static PyObject *
-bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
+bytes_decode_impl(PyBytesObject*self, const char *encoding,
+ const char *errors)
+/*[clinic end generated code: output=2d2016ff8e0bb176 input=958174769d2a40ca]*/
{
- const char *encoding = NULL;
- const char *errors = NULL;
- static char *kwlist[] = {"encoding", "errors", 0};
-
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
- return NULL;
- return PyUnicode_FromEncodedObject(self, encoding, errors);
+ return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
}
-PyDoc_STRVAR(splitlines__doc__,
-"B.splitlines([keepends]) -> list of lines\n\
-\n\
-Return a list of the lines in B, breaking at line boundaries.\n\
-Line breaks are not included in the resulting list unless keepends\n\
-is given and true.");
+/*[clinic input]
+bytes.splitlines
-static PyObject*
-bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds)
-{
- static char *kwlist[] = {"keepends", 0};
- int keepends = 0;
+ keepends: int(py_default="False") = 0
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines",
- kwlist, &keepends))
- return NULL;
+Return a list of the lines in the bytes, breaking at line boundaries.
+Line breaks are not included in the resulting list unless keepends is given and
+true.
+[clinic start generated code]*/
+
+static PyObject *
+bytes_splitlines_impl(PyBytesObject*self, int keepends)
+/*[clinic end generated code: output=995c3598f7833cad input=ddb93e3351080c8c]*/
+{
return stringlib_splitlines(
(PyObject*) self, PyBytes_AS_STRING(self),
PyBytes_GET_SIZE(self), keepends
);
}
-
-PyDoc_STRVAR(fromhex_doc,
-"bytes.fromhex(string) -> bytes\n\
-\n\
-Create a bytes object from a string of hexadecimal numbers.\n\
-Spaces between two numbers are accepted.\n\
-Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
-
static int
hex_digit_to_int(Py_UCS4 c)
{
@@ -2321,24 +2975,36 @@ hex_digit_to_int(Py_UCS4 c)
return -1;
}
+/*[clinic input]
+@classmethod
+bytes.fromhex
+
+ string: unicode
+ /
+
+Create a bytes object from a string of hexadecimal numbers.
+
+Spaces between two numbers are accepted.
+Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
+[clinic start generated code]*/
+
static PyObject *
-bytes_fromhex(PyObject *cls, PyObject *args)
+bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
+/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
{
- PyObject *newstring, *hexobj;
+ PyObject *newstring;
char *buf;
Py_ssize_t hexlen, byteslen, i, j;
int top, bot;
void *data;
unsigned int kind;
- if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
+ assert(PyUnicode_Check(string));
+ if (PyUnicode_READY(string))
return NULL;
- assert(PyUnicode_Check(hexobj));
- if (PyUnicode_READY(hexobj))
- return NULL;
- kind = PyUnicode_KIND(hexobj);
- data = PyUnicode_DATA(hexobj);
- hexlen = PyUnicode_GET_LENGTH(hexobj);
+ kind = PyUnicode_KIND(string);
+ data = PyUnicode_DATA(string);
+ hexlen = PyUnicode_GET_LENGTH(string);
byteslen = hexlen/2; /* This overestimates if there are spaces */
newstring = PyBytes_FromStringAndSize(NULL, byteslen);
@@ -2384,14 +3050,13 @@ bytes_methods[] = {
_Py_capitalize__doc__},
{"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
{"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
- {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
+ BYTES_DECODE_METHODDEF
{"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
endswith__doc__},
{"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
expandtabs__doc__},
{"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
- {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
- fromhex_doc},
+ BYTES_FROMHEX_METHODDEF
{"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
{"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
_Py_isalnum__doc__},
@@ -2407,38 +3072,49 @@ bytes_methods[] = {
_Py_istitle__doc__},
{"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
_Py_isupper__doc__},
- {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
+ BYTES_JOIN_METHODDEF
{"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
{"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
- {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
- {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
- _Py_maketrans__doc__},
- {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
- {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
+ BYTES_LSTRIP_METHODDEF
+ BYTES_MAKETRANS_METHODDEF
+ BYTES_PARTITION_METHODDEF
+ BYTES_REPLACE_METHODDEF
{"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
{"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
{"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
- {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
- rpartition__doc__},
- {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__},
- {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
- {"split", (PyCFunction)bytes_split, METH_VARARGS | METH_KEYWORDS, split__doc__},
- {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS,
- splitlines__doc__},
+ BYTES_RPARTITION_METHODDEF
+ BYTES_RSPLIT_METHODDEF
+ BYTES_RSTRIP_METHODDEF
+ BYTES_SPLIT_METHODDEF
+ BYTES_SPLITLINES_METHODDEF
{"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
startswith__doc__},
- {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
+ BYTES_STRIP_METHODDEF
{"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
_Py_swapcase__doc__},
{"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
- {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
- translate__doc__},
+ BYTES_TRANSLATE_METHODDEF
{"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
{"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
{NULL, NULL} /* sentinel */
};
static PyObject *
+bytes_mod(PyObject *v, PyObject *w)
+{
+ if (!PyBytes_Check(v))
+ Py_RETURN_NOTIMPLEMENTED;
+ return _PyBytes_Format(v, w);
+}
+
+static PyNumberMethods bytes_as_number = {
+ 0, /*nb_add*/
+ 0, /*nb_subtract*/
+ 0, /*nb_multiply*/
+ bytes_mod, /*nb_remainder*/
+};
+
+static PyObject *
str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
static PyObject *
@@ -2465,7 +3141,7 @@ bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
"argument");
return NULL;
}
- return PyBytes_FromString("");
+ return PyBytes_FromStringAndSize(NULL, 0);
}
if (PyUnicode_Check(x)) {
@@ -2522,11 +3198,9 @@ bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return NULL;
}
else {
- new = PyBytes_FromStringAndSize(NULL, size);
+ new = _PyBytes_FromSize(size, 1);
if (new == NULL)
return NULL;
- if (size > 0)
- memset(((PyBytesObject*)new)->ob_sval, 0, size);
return new;
}
@@ -2624,10 +3298,12 @@ PyBytes_FromObject(PyObject *x)
returning a shared empty bytes string. This required because we
want to call _PyBytes_Resize() the returned object, which we can
only do on bytes objects with refcount == 1. */
- size += 1;
+ if (size == 0)
+ size = 1;
new = PyBytes_FromStringAndSize(NULL, size);
if (new == NULL)
return NULL;
+ assert(Py_REFCNT(new) == 1);
/* Get the iterator */
it = PyObject_GetIter(x);
@@ -2729,7 +3405,7 @@ PyTypeObject PyBytes_Type = {
0, /* tp_setattr */
0, /* tp_reserved */
(reprfunc)bytes_repr, /* tp_repr */
- 0, /* tp_as_number */
+ &bytes_as_number, /* tp_as_number */
&bytes_as_sequence, /* tp_as_sequence */
&bytes_as_mapping, /* tp_as_mapping */
(hashfunc)bytes_hash, /* tp_hash */
@@ -2764,7 +3440,6 @@ PyTypeObject PyBytes_Type = {
void
PyBytes_Concat(PyObject **pv, PyObject *w)
{
- PyObject *v;
assert(pv != NULL);
if (*pv == NULL)
return;
@@ -2772,9 +3447,45 @@ PyBytes_Concat(PyObject **pv, PyObject *w)
Py_CLEAR(*pv);
return;
}
- v = bytes_concat(*pv, w);
- Py_DECREF(*pv);
- *pv = v;
+
+ if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
+ /* Only one reference, so we can resize in place */
+ Py_ssize_t oldsize;
+ Py_buffer wb;
+
+ wb.len = -1;
+ if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
+ PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
+ Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
+ Py_CLEAR(*pv);
+ return;
+ }
+
+ oldsize = PyBytes_GET_SIZE(*pv);
+ if (oldsize > PY_SSIZE_T_MAX - wb.len) {
+ PyErr_NoMemory();
+ goto error;
+ }
+ if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
+ goto error;
+
+ memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
+ PyBuffer_Release(&wb);
+ return;
+
+ error:
+ PyBuffer_Release(&wb);
+ Py_CLEAR(*pv);
+ return;
+ }
+
+ else {
+ /* Multiple references, need to create new object */
+ PyObject *v;
+ v = bytes_concat(*pv, w);
+ Py_DECREF(*pv);
+ *pv = v;
+ }
}
void
@@ -2785,14 +3496,14 @@ PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
}
-/* The following function breaks the notion that strings are immutable:
- it changes the size of a string. We get away with this only if there
+/* The following function breaks the notion that bytes are immutable:
+ it changes the size of a bytes object. We get away with this only if there
is only one module referencing the object. You can also think of it
- as creating a new string object and destroying the old one, only
- more efficiently. In any case, don't use this if the string may
+ as creating a new bytes object and destroying the old one, only
+ more efficiently. In any case, don't use this if the bytes object may
already be known to some other part of the code...
- Note that if there's not enough memory to resize the string, the original
- string object at *pv is deallocated, *pv is set to NULL, an "out of
+ Note that if there's not enough memory to resize the bytes object, the
+ original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
memory" exception is set, and -1 is returned. Else (on success) 0 is
returned, and the value in *pv may or may not be the same as on input.
As always, an extra byte is allocated for a trailing \0 byte (newsize
@@ -2815,7 +3526,7 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
_Py_DEC_REFTOTAL;
_Py_ForgetReference(v);
*pv = (PyObject *)
- PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
+ PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
if (*pv == NULL) {
PyObject_Del(v);
PyErr_NoMemory();