diff options
author | Ethan Furman <ethan@stoneleaf.us> | 2015-01-24 04:05:18 (GMT) |
---|---|---|
committer | Ethan Furman <ethan@stoneleaf.us> | 2015-01-24 04:05:18 (GMT) |
commit | b95b56150fc3e7834783b54acdddeaed4fe44e27 (patch) | |
tree | c1994946e84b457841024402b50f8a9640211cb4 /Objects/bytesobject.c | |
parent | 8861502e0746465c4124548681f05969c08f4cae (diff) | |
download | cpython-b95b56150fc3e7834783b54acdddeaed4fe44e27.zip cpython-b95b56150fc3e7834783b54acdddeaed4fe44e27.tar.gz cpython-b95b56150fc3e7834783b54acdddeaed4fe44e27.tar.bz2 |
Issue20284: Implement PEP461
Diffstat (limited to 'Objects/bytesobject.c')
-rw-r--r-- | Objects/bytesobject.c | 657 |
1 files changed, 650 insertions, 7 deletions
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index a5b9feb..bf919b5 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -400,6 +400,634 @@ PyBytes_FromFormat(const char *format, ...) return ret; } +/* Helpers for formatstring */ + +Py_LOCAL_INLINE(PyObject *) +getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) +{ + Py_ssize_t argidx = *p_argidx; + if (argidx < arglen) { + (*p_argidx)++; + if (arglen < 0) + return args; + else + return PyTuple_GetItem(args, argidx); + } + PyErr_SetString(PyExc_TypeError, + "not enough arguments for format string"); + return NULL; +} + +/* Format codes + * F_LJUST '-' + * F_SIGN '+' + * F_BLANK ' ' + * F_ALT '#' + * F_ZERO '0' + */ +#define F_LJUST (1<<0) +#define F_SIGN (1<<1) +#define F_BLANK (1<<2) +#define F_ALT (1<<3) +#define F_ZERO (1<<4) + +/* Returns a new reference to a PyBytes object, or NULL on failure. */ + +static PyObject * +formatfloat(PyObject *v, int flags, int prec, int type) +{ + char *p; + PyObject *result; + double x; + + x = PyFloat_AsDouble(v); + if (x == -1.0 && PyErr_Occurred()) { + PyErr_Format(PyExc_TypeError, "float argument required, " + "not %.200s", Py_TYPE(v)->tp_name); + return NULL; + } + + if (prec < 0) + prec = 6; + + p = PyOS_double_to_string(x, type, prec, + (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL); + + if (p == NULL) + return NULL; + result = PyBytes_FromStringAndSize(p, strlen(p)); + PyMem_Free(p); + return result; +} + +/* format_long emulates the format codes d, u, o, x and X, and + * the F_ALT flag, for Python's long (unbounded) ints. It's not used for + * Python's regular ints. + * Return value: a new PyBytes*, or NULL if error. + * . *pbuf is set to point into it, + * *plen set to the # of chars following that. + * Caller must decref it when done using pbuf. + * The string starting at *pbuf is of the form + * "-"? ("0x" | "0X")? digit+ + * "0x"/"0X" are present only for x and X conversions, with F_ALT + * set in flags. The case of hex digits will be correct, + * There will be at least prec digits, zero-filled on the left if + * necessary to get that many. + * val object to be converted + * flags bitmask of format flags; only F_ALT is looked at + * prec minimum number of digits; 0-fill on left if needed + * type a character in [duoxX]; u acts the same as d + * + * CAUTION: o, x and X conversions on regular ints can never + * produce a '-' sign, but can for Python's unbounded ints. + */ + +static PyObject * +format_long(PyObject *val, int flags, int prec, int type, + char **pbuf, int *plen) +{ + PyObject *s; + PyObject *result = NULL; + + s = _PyUnicode_FormatLong(val, flags & F_ALT, prec, type); + if (!s) + return NULL; + result = _PyUnicode_AsASCIIString(s, "strict"); + Py_DECREF(s); + if (!result) + return NULL; + *pbuf = PyBytes_AS_STRING(result); + *plen = PyBytes_GET_SIZE(result); + return result; +} + +Py_LOCAL_INLINE(int) +formatchar(char *buf, size_t buflen, PyObject *v) +{ + PyObject *w = NULL; + /* convert bytearray to bytes */ + if (PyByteArray_Check(v)) { + w = PyBytes_FromObject(v); + if (w == NULL) + goto error; + v = w; + } + /* presume that the buffer is at least 2 characters long */ + if (PyBytes_Check(v)) { + if (!PyArg_Parse(v, "c;%c requires an integer in range(256) or a single byte", &buf[0])) + goto error; + } + else { + long ival = PyLong_AsLong(v); + if (ival == -1 && PyErr_Occurred()) { + PyErr_SetString(PyExc_TypeError, + "%c requires an integer in range(256) or a single byte"); + goto error; + } + if (ival < 0 || ival > 255) { + PyErr_SetString(PyExc_TypeError, + "%c requires an integer in range(256) or a single byte"); + goto error; + } + buf[0] = ival; + } + Py_XDECREF(w); + buf[1] = '\0'; + return 1; + + error: + Py_XDECREF(w); + return -1; +} + +static PyObject * +format_obj(PyObject *v) +{ + PyObject *result = NULL, *w = NULL; + PyObject *func; + _Py_IDENTIFIER(__bytes__); + /* convert bytearray to bytes */ + if (PyByteArray_Check(v)) { + w = PyBytes_FromObject(v); + if (w == NULL) + return NULL; + v = w; + } + /* is it a bytes object? */ + if (PyBytes_Check(v)) { + result = v; + Py_INCREF(v); + Py_XDECREF(w); + return result; + } + /* does it support __bytes__? */ + func = _PyObject_LookupSpecial(v, &PyId___bytes__); + if (func != NULL) { + result = PyObject_CallFunctionObjArgs(func, NULL); + Py_DECREF(func); + if (result == NULL) + return NULL; + if (!PyBytes_Check(result)) { + PyErr_Format(PyExc_TypeError, + "__bytes__ returned non-bytes (type %.200s)", + Py_TYPE(result)->tp_name); + Py_DECREF(result); + return NULL; + } + return result; + } + PyErr_Format(PyExc_TypeError, + "%%b requires bytes, or an object that implements __bytes__, not '%.100s'", + Py_TYPE(v)->tp_name); + return NULL; +} + +/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) + + FORMATBUFLEN is the length of the buffer in which the ints & + chars are formatted. XXX This is a magic number. Each formatting + routine does bounds checking to ensure no overflow, but a better + solution may be to malloc a buffer of appropriate size for each + format. For now, the current solution is sufficient. +*/ +#define FORMATBUFLEN (size_t)120 + +PyObject * +_PyBytes_Format(PyObject *format, PyObject *args) +{ + char *fmt, *res; + Py_ssize_t arglen, argidx; + Py_ssize_t reslen, rescnt, fmtcnt; + int args_owned = 0; + PyObject *result; + PyObject *repr; + PyObject *dict = NULL; + if (format == NULL || !PyBytes_Check(format) || args == NULL) { + PyErr_BadInternalCall(); + return NULL; + } + fmt = PyBytes_AS_STRING(format); + fmtcnt = PyBytes_GET_SIZE(format); + reslen = rescnt = fmtcnt + 100; + result = PyBytes_FromStringAndSize((char *)NULL, reslen); + if (result == NULL) + return NULL; + res = PyBytes_AsString(result); + if (PyTuple_Check(args)) { + arglen = PyTuple_GET_SIZE(args); + argidx = 0; + } + else { + arglen = -1; + argidx = -2; + } + if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript && + !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) && + !PyByteArray_Check(args)) { + dict = args; + } + while (--fmtcnt >= 0) { + if (*fmt != '%') { + if (--rescnt < 0) { + rescnt = fmtcnt + 100; + reslen += rescnt; + if (_PyBytes_Resize(&result, reslen)) + return NULL; + res = PyBytes_AS_STRING(result) + + reslen - rescnt; + --rescnt; + } + *res++ = *fmt++; + } + else { + /* Got a format specifier */ + int flags = 0; + Py_ssize_t width = -1; + int prec = -1; + int c = '\0'; + int fill; + int isnumok; + PyObject *v = NULL; + PyObject *temp = NULL; + Py_buffer buf; + char *pbuf; + int sign; + Py_ssize_t len; + char formatbuf[FORMATBUFLEN]; + /* For format{int,char}() */ + + buf.obj = NULL; + fmt++; + if (*fmt == '(') { + char *keystart; + Py_ssize_t keylen; + PyObject *key; + int pcount = 1; + + if (dict == NULL) { + PyErr_SetString(PyExc_TypeError, + "format requires a mapping"); + goto error; + } + ++fmt; + --fmtcnt; + keystart = fmt; + /* Skip over balanced parentheses */ + while (pcount > 0 && --fmtcnt >= 0) { + if (*fmt == ')') + --pcount; + else if (*fmt == '(') + ++pcount; + fmt++; + } + keylen = fmt - keystart - 1; + if (fmtcnt < 0 || pcount > 0) { + PyErr_SetString(PyExc_ValueError, + "incomplete format key"); + goto error; + } + key = PyBytes_FromStringAndSize(keystart, + keylen); + if (key == NULL) + goto error; + if (args_owned) { + Py_DECREF(args); + args_owned = 0; + } + args = PyObject_GetItem(dict, key); + Py_DECREF(key); + if (args == NULL) { + goto error; + } + args_owned = 1; + arglen = -1; + argidx = -2; + } + while (--fmtcnt >= 0) { + switch (c = *fmt++) { + case '-': flags |= F_LJUST; continue; + case '+': flags |= F_SIGN; continue; + case ' ': flags |= F_BLANK; continue; + case '#': flags |= F_ALT; continue; + case '0': flags |= F_ZERO; continue; + } + break; + } + if (c == '*') { + v = getnextarg(args, arglen, &argidx); + if (v == NULL) + goto error; + if (!PyLong_Check(v)) { + PyErr_SetString(PyExc_TypeError, + "* wants int"); + goto error; + } + width = PyLong_AsSsize_t(v); + if (width == -1 && PyErr_Occurred()) + goto error; + if (width < 0) { + flags |= F_LJUST; + width = -width; + } + if (--fmtcnt >= 0) + c = *fmt++; + } + else if (c >= 0 && isdigit(c)) { + width = c - '0'; + while (--fmtcnt >= 0) { + c = Py_CHARMASK(*fmt++); + if (!isdigit(c)) + break; + if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) { + PyErr_SetString( + PyExc_ValueError, + "width too big"); + goto error; + } + width = width*10 + (c - '0'); + } + } + if (c == '.') { + prec = 0; + if (--fmtcnt >= 0) + c = *fmt++; + if (c == '*') { + v = getnextarg(args, arglen, &argidx); + if (v == NULL) + goto error; + if (!PyLong_Check(v)) { + PyErr_SetString( + PyExc_TypeError, + "* wants int"); + goto error; + } + prec = PyLong_AsSsize_t(v); + if (prec == -1 && PyErr_Occurred()) + goto error; + if (prec < 0) + prec = 0; + if (--fmtcnt >= 0) + c = *fmt++; + } + else if (c >= 0 && isdigit(c)) { + prec = c - '0'; + while (--fmtcnt >= 0) { + c = Py_CHARMASK(*fmt++); + if (!isdigit(c)) + break; + if (prec > (INT_MAX - ((int)c - '0')) / 10) { + PyErr_SetString( + PyExc_ValueError, + "prec too big"); + goto error; + } + prec = prec*10 + (c - '0'); + } + } + } /* prec */ + if (fmtcnt >= 0) { + if (c == 'h' || c == 'l' || c == 'L') { + if (--fmtcnt >= 0) + c = *fmt++; + } + } + if (fmtcnt < 0) { + PyErr_SetString(PyExc_ValueError, + "incomplete format"); + goto error; + } + if (c != '%') { + v = getnextarg(args, arglen, &argidx); + if (v == NULL) + goto error; + } + sign = 0; + fill = ' '; + switch (c) { + case '%': + pbuf = "%"; + len = 1; + break; + case 'a': + temp = PyObject_Repr(v); + if (temp == NULL) + goto error; + repr = PyUnicode_AsEncodedObject(temp, "ascii", "backslashreplace"); + if (repr == NULL) { + Py_DECREF(temp); + goto error; + } + if (_getbuffer(repr, &buf) < 0) { + temp = format_obj(repr); + if (temp == NULL) { + Py_DECREF(repr); + goto error; + } + Py_DECREF(repr); + repr = temp; + } + pbuf = PyBytes_AS_STRING(repr); + len = PyBytes_GET_SIZE(repr); + Py_DECREF(repr); + if (prec >= 0 && len > prec) + len = prec; + break; + case 's': + // %s is only for 2/3 code; 3 only code should use %b + case 'b': + temp = format_obj(v); + if (temp == NULL) + goto error; + pbuf = PyBytes_AS_STRING(temp); + len = PyBytes_GET_SIZE(temp); + if (prec >= 0 && len > prec) + len = prec; + break; + case 'i': + case 'd': + case 'u': + case 'o': + case 'x': + case 'X': + if (c == 'i') + c = 'd'; + isnumok = 0; + if (PyNumber_Check(v)) { + PyObject *iobj=NULL; + + if ((PyLong_Check(v))) { + iobj = v; + Py_INCREF(iobj); + } + else { + iobj = PyNumber_Long(v); + } + if (iobj!=NULL) { + if (PyLong_Check(iobj)) { + int ilen; + + isnumok = 1; + temp = format_long(iobj, flags, prec, c, + &pbuf, &ilen); + Py_DECREF(iobj); + len = ilen; + if (!temp) + goto error; + sign = 1; + } + else { + Py_DECREF(iobj); + } + } + } + if (!isnumok) { + PyErr_Format(PyExc_TypeError, + "%%%c format: a number is required, " + "not %.200s", c, Py_TYPE(v)->tp_name); + goto error; + } + if (flags & F_ZERO) + fill = '0'; + break; + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + temp = formatfloat(v, flags, prec, c); + if (temp == NULL) + goto error; + pbuf = PyBytes_AS_STRING(temp); + len = PyBytes_GET_SIZE(temp); + sign = 1; + if (flags & F_ZERO) + fill = '0'; + break; + case 'c': + pbuf = formatbuf; + len = formatchar(pbuf, sizeof(formatbuf), v); + if (len < 0) + goto error; + break; + default: + PyErr_Format(PyExc_ValueError, + "unsupported format character '%c' (0x%x) " + "at index %zd", + c, c, + (Py_ssize_t)(fmt - 1 - + PyBytes_AsString(format))); + goto error; + } + if (sign) { + if (*pbuf == '-' || *pbuf == '+') { + sign = *pbuf++; + len--; + } + else if (flags & F_SIGN) + sign = '+'; + else if (flags & F_BLANK) + sign = ' '; + else + sign = 0; + } + if (width < len) + width = len; + if (rescnt - (sign != 0) < width) { + reslen -= rescnt; + rescnt = width + fmtcnt + 100; + reslen += rescnt; + if (reslen < 0) { + Py_DECREF(result); + PyBuffer_Release(&buf); + Py_XDECREF(temp); + return PyErr_NoMemory(); + } + if (_PyBytes_Resize(&result, reslen)) { + PyBuffer_Release(&buf); + Py_XDECREF(temp); + return NULL; + } + res = PyBytes_AS_STRING(result) + + reslen - rescnt; + } + if (sign) { + if (fill != ' ') + *res++ = sign; + rescnt--; + if (width > len) + width--; + } + if ((flags & F_ALT) && (c == 'x' || c == 'X')) { + assert(pbuf[0] == '0'); + assert(pbuf[1] == c); + if (fill != ' ') { + *res++ = *pbuf++; + *res++ = *pbuf++; + } + rescnt -= 2; + width -= 2; + if (width < 0) + width = 0; + len -= 2; + } + if (width > len && !(flags & F_LJUST)) { + do { + --rescnt; + *res++ = fill; + } while (--width > len); + } + if (fill == ' ') { + if (sign) + *res++ = sign; + if ((flags & F_ALT) && + (c == 'x' || c == 'X')) { + assert(pbuf[0] == '0'); + assert(pbuf[1] == c); + *res++ = *pbuf++; + *res++ = *pbuf++; + } + } + Py_MEMCPY(res, pbuf, len); + res += len; + rescnt -= len; + while (--width >= len) { + --rescnt; + *res++ = ' '; + } + if (dict && (argidx < arglen) && c != '%') { + PyErr_SetString(PyExc_TypeError, + "not all arguments converted during bytes formatting"); + PyBuffer_Release(&buf); + Py_XDECREF(temp); + goto error; + } + PyBuffer_Release(&buf); + Py_XDECREF(temp); + } /* '%' */ + } /* until end */ + if (argidx < arglen && !dict) { + PyErr_SetString(PyExc_TypeError, + "not all arguments converted during bytes formatting"); + goto error; + } + if (args_owned) { + Py_DECREF(args); + } + if (_PyBytes_Resize(&result, reslen - rescnt)) + return NULL; + return result; + + error: + Py_DECREF(result); + if (args_owned) { + Py_DECREF(args); + } + return NULL; +} + +/* =-= */ + static void bytes_dealloc(PyObject *op) { @@ -2996,6 +3624,21 @@ bytes_methods[] = { }; static PyObject * +bytes_mod(PyObject *v, PyObject *w) +{ + if (!PyBytes_Check(v)) + Py_RETURN_NOTIMPLEMENTED; + return _PyBytes_Format(v, w); +} + +static PyNumberMethods bytes_as_number = { + 0, /*nb_add*/ + 0, /*nb_subtract*/ + 0, /*nb_multiply*/ + bytes_mod, /*nb_remainder*/ +}; + +static PyObject * str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds); static PyObject * @@ -3286,7 +3929,7 @@ PyTypeObject PyBytes_Type = { 0, /* tp_setattr */ 0, /* tp_reserved */ (reprfunc)bytes_repr, /* tp_repr */ - 0, /* tp_as_number */ + &bytes_as_number, /* tp_as_number */ &bytes_as_sequence, /* tp_as_sequence */ &bytes_as_mapping, /* tp_as_mapping */ (hashfunc)bytes_hash, /* tp_hash */ @@ -3377,14 +4020,14 @@ PyBytes_ConcatAndDel(PyObject **pv, PyObject *w) } -/* The following function breaks the notion that strings are immutable: - it changes the size of a string. We get away with this only if there +/* The following function breaks the notion that bytes are immutable: + it changes the size of a bytes object. We get away with this only if there is only one module referencing the object. You can also think of it - as creating a new string object and destroying the old one, only - more efficiently. In any case, don't use this if the string may + as creating a new bytes object and destroying the old one, only + more efficiently. In any case, don't use this if the bytes object may already be known to some other part of the code... - Note that if there's not enough memory to resize the string, the original - string object at *pv is deallocated, *pv is set to NULL, an "out of + Note that if there's not enough memory to resize the bytes object, the + original bytes object at *pv is deallocated, *pv is set to NULL, an "out of memory" exception is set, and -1 is returned. Else (on success) 0 is returned, and the value in *pv may or may not be the same as on input. As always, an extra byte is allocated for a trailing \0 byte (newsize |