summaryrefslogtreecommitdiffstats
path: root/Objects/bytesobject.c
diff options
context:
space:
mode:
authorEthan Furman <ethan@stoneleaf.us>2015-01-24 04:05:18 (GMT)
committerEthan Furman <ethan@stoneleaf.us>2015-01-24 04:05:18 (GMT)
commitb95b56150fc3e7834783b54acdddeaed4fe44e27 (patch)
treec1994946e84b457841024402b50f8a9640211cb4 /Objects/bytesobject.c
parent8861502e0746465c4124548681f05969c08f4cae (diff)
downloadcpython-b95b56150fc3e7834783b54acdddeaed4fe44e27.zip
cpython-b95b56150fc3e7834783b54acdddeaed4fe44e27.tar.gz
cpython-b95b56150fc3e7834783b54acdddeaed4fe44e27.tar.bz2
Issue20284: Implement PEP461
Diffstat (limited to 'Objects/bytesobject.c')
-rw-r--r--Objects/bytesobject.c657
1 files changed, 650 insertions, 7 deletions
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index a5b9feb..bf919b5 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -400,6 +400,634 @@ PyBytes_FromFormat(const char *format, ...)
return ret;
}
+/* Helpers for formatstring */
+
+Py_LOCAL_INLINE(PyObject *)
+getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
+{
+ Py_ssize_t argidx = *p_argidx;
+ if (argidx < arglen) {
+ (*p_argidx)++;
+ if (arglen < 0)
+ return args;
+ else
+ return PyTuple_GetItem(args, argidx);
+ }
+ PyErr_SetString(PyExc_TypeError,
+ "not enough arguments for format string");
+ return NULL;
+}
+
+/* Format codes
+ * F_LJUST '-'
+ * F_SIGN '+'
+ * F_BLANK ' '
+ * F_ALT '#'
+ * F_ZERO '0'
+ */
+#define F_LJUST (1<<0)
+#define F_SIGN (1<<1)
+#define F_BLANK (1<<2)
+#define F_ALT (1<<3)
+#define F_ZERO (1<<4)
+
+/* Returns a new reference to a PyBytes object, or NULL on failure. */
+
+static PyObject *
+formatfloat(PyObject *v, int flags, int prec, int type)
+{
+ char *p;
+ PyObject *result;
+ double x;
+
+ x = PyFloat_AsDouble(v);
+ if (x == -1.0 && PyErr_Occurred()) {
+ PyErr_Format(PyExc_TypeError, "float argument required, "
+ "not %.200s", Py_TYPE(v)->tp_name);
+ return NULL;
+ }
+
+ if (prec < 0)
+ prec = 6;
+
+ p = PyOS_double_to_string(x, type, prec,
+ (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
+
+ if (p == NULL)
+ return NULL;
+ result = PyBytes_FromStringAndSize(p, strlen(p));
+ PyMem_Free(p);
+ return result;
+}
+
+/* format_long emulates the format codes d, u, o, x and X, and
+ * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
+ * Python's regular ints.
+ * Return value: a new PyBytes*, or NULL if error.
+ * . *pbuf is set to point into it,
+ * *plen set to the # of chars following that.
+ * Caller must decref it when done using pbuf.
+ * The string starting at *pbuf is of the form
+ * "-"? ("0x" | "0X")? digit+
+ * "0x"/"0X" are present only for x and X conversions, with F_ALT
+ * set in flags. The case of hex digits will be correct,
+ * There will be at least prec digits, zero-filled on the left if
+ * necessary to get that many.
+ * val object to be converted
+ * flags bitmask of format flags; only F_ALT is looked at
+ * prec minimum number of digits; 0-fill on left if needed
+ * type a character in [duoxX]; u acts the same as d
+ *
+ * CAUTION: o, x and X conversions on regular ints can never
+ * produce a '-' sign, but can for Python's unbounded ints.
+ */
+
+static PyObject *
+format_long(PyObject *val, int flags, int prec, int type,
+ char **pbuf, int *plen)
+{
+ PyObject *s;
+ PyObject *result = NULL;
+
+ s = _PyUnicode_FormatLong(val, flags & F_ALT, prec, type);
+ if (!s)
+ return NULL;
+ result = _PyUnicode_AsASCIIString(s, "strict");
+ Py_DECREF(s);
+ if (!result)
+ return NULL;
+ *pbuf = PyBytes_AS_STRING(result);
+ *plen = PyBytes_GET_SIZE(result);
+ return result;
+}
+
+Py_LOCAL_INLINE(int)
+formatchar(char *buf, size_t buflen, PyObject *v)
+{
+ PyObject *w = NULL;
+ /* convert bytearray to bytes */
+ if (PyByteArray_Check(v)) {
+ w = PyBytes_FromObject(v);
+ if (w == NULL)
+ goto error;
+ v = w;
+ }
+ /* presume that the buffer is at least 2 characters long */
+ if (PyBytes_Check(v)) {
+ if (!PyArg_Parse(v, "c;%c requires an integer in range(256) or a single byte", &buf[0]))
+ goto error;
+ }
+ else {
+ long ival = PyLong_AsLong(v);
+ if (ival == -1 && PyErr_Occurred()) {
+ PyErr_SetString(PyExc_TypeError,
+ "%c requires an integer in range(256) or a single byte");
+ goto error;
+ }
+ if (ival < 0 || ival > 255) {
+ PyErr_SetString(PyExc_TypeError,
+ "%c requires an integer in range(256) or a single byte");
+ goto error;
+ }
+ buf[0] = ival;
+ }
+ Py_XDECREF(w);
+ buf[1] = '\0';
+ return 1;
+
+ error:
+ Py_XDECREF(w);
+ return -1;
+}
+
+static PyObject *
+format_obj(PyObject *v)
+{
+ PyObject *result = NULL, *w = NULL;
+ PyObject *func;
+ _Py_IDENTIFIER(__bytes__);
+ /* convert bytearray to bytes */
+ if (PyByteArray_Check(v)) {
+ w = PyBytes_FromObject(v);
+ if (w == NULL)
+ return NULL;
+ v = w;
+ }
+ /* is it a bytes object? */
+ if (PyBytes_Check(v)) {
+ result = v;
+ Py_INCREF(v);
+ Py_XDECREF(w);
+ return result;
+ }
+ /* does it support __bytes__? */
+ func = _PyObject_LookupSpecial(v, &PyId___bytes__);
+ if (func != NULL) {
+ result = PyObject_CallFunctionObjArgs(func, NULL);
+ Py_DECREF(func);
+ if (result == NULL)
+ return NULL;
+ if (!PyBytes_Check(result)) {
+ PyErr_Format(PyExc_TypeError,
+ "__bytes__ returned non-bytes (type %.200s)",
+ Py_TYPE(result)->tp_name);
+ Py_DECREF(result);
+ return NULL;
+ }
+ return result;
+ }
+ PyErr_Format(PyExc_TypeError,
+ "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
+ Py_TYPE(v)->tp_name);
+ return NULL;
+}
+
+/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
+
+ FORMATBUFLEN is the length of the buffer in which the ints &
+ chars are formatted. XXX This is a magic number. Each formatting
+ routine does bounds checking to ensure no overflow, but a better
+ solution may be to malloc a buffer of appropriate size for each
+ format. For now, the current solution is sufficient.
+*/
+#define FORMATBUFLEN (size_t)120
+
+PyObject *
+_PyBytes_Format(PyObject *format, PyObject *args)
+{
+ char *fmt, *res;
+ Py_ssize_t arglen, argidx;
+ Py_ssize_t reslen, rescnt, fmtcnt;
+ int args_owned = 0;
+ PyObject *result;
+ PyObject *repr;
+ PyObject *dict = NULL;
+ if (format == NULL || !PyBytes_Check(format) || args == NULL) {
+ PyErr_BadInternalCall();
+ return NULL;
+ }
+ fmt = PyBytes_AS_STRING(format);
+ fmtcnt = PyBytes_GET_SIZE(format);
+ reslen = rescnt = fmtcnt + 100;
+ result = PyBytes_FromStringAndSize((char *)NULL, reslen);
+ if (result == NULL)
+ return NULL;
+ res = PyBytes_AsString(result);
+ if (PyTuple_Check(args)) {
+ arglen = PyTuple_GET_SIZE(args);
+ argidx = 0;
+ }
+ else {
+ arglen = -1;
+ argidx = -2;
+ }
+ if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
+ !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
+ !PyByteArray_Check(args)) {
+ dict = args;
+ }
+ while (--fmtcnt >= 0) {
+ if (*fmt != '%') {
+ if (--rescnt < 0) {
+ rescnt = fmtcnt + 100;
+ reslen += rescnt;
+ if (_PyBytes_Resize(&result, reslen))
+ return NULL;
+ res = PyBytes_AS_STRING(result)
+ + reslen - rescnt;
+ --rescnt;
+ }
+ *res++ = *fmt++;
+ }
+ else {
+ /* Got a format specifier */
+ int flags = 0;
+ Py_ssize_t width = -1;
+ int prec = -1;
+ int c = '\0';
+ int fill;
+ int isnumok;
+ PyObject *v = NULL;
+ PyObject *temp = NULL;
+ Py_buffer buf;
+ char *pbuf;
+ int sign;
+ Py_ssize_t len;
+ char formatbuf[FORMATBUFLEN];
+ /* For format{int,char}() */
+
+ buf.obj = NULL;
+ fmt++;
+ if (*fmt == '(') {
+ char *keystart;
+ Py_ssize_t keylen;
+ PyObject *key;
+ int pcount = 1;
+
+ if (dict == NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ "format requires a mapping");
+ goto error;
+ }
+ ++fmt;
+ --fmtcnt;
+ keystart = fmt;
+ /* Skip over balanced parentheses */
+ while (pcount > 0 && --fmtcnt >= 0) {
+ if (*fmt == ')')
+ --pcount;
+ else if (*fmt == '(')
+ ++pcount;
+ fmt++;
+ }
+ keylen = fmt - keystart - 1;
+ if (fmtcnt < 0 || pcount > 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "incomplete format key");
+ goto error;
+ }
+ key = PyBytes_FromStringAndSize(keystart,
+ keylen);
+ if (key == NULL)
+ goto error;
+ if (args_owned) {
+ Py_DECREF(args);
+ args_owned = 0;
+ }
+ args = PyObject_GetItem(dict, key);
+ Py_DECREF(key);
+ if (args == NULL) {
+ goto error;
+ }
+ args_owned = 1;
+ arglen = -1;
+ argidx = -2;
+ }
+ while (--fmtcnt >= 0) {
+ switch (c = *fmt++) {
+ case '-': flags |= F_LJUST; continue;
+ case '+': flags |= F_SIGN; continue;
+ case ' ': flags |= F_BLANK; continue;
+ case '#': flags |= F_ALT; continue;
+ case '0': flags |= F_ZERO; continue;
+ }
+ break;
+ }
+ if (c == '*') {
+ v = getnextarg(args, arglen, &argidx);
+ if (v == NULL)
+ goto error;
+ if (!PyLong_Check(v)) {
+ PyErr_SetString(PyExc_TypeError,
+ "* wants int");
+ goto error;
+ }
+ width = PyLong_AsSsize_t(v);
+ if (width == -1 && PyErr_Occurred())
+ goto error;
+ if (width < 0) {
+ flags |= F_LJUST;
+ width = -width;
+ }
+ if (--fmtcnt >= 0)
+ c = *fmt++;
+ }
+ else if (c >= 0 && isdigit(c)) {
+ width = c - '0';
+ while (--fmtcnt >= 0) {
+ c = Py_CHARMASK(*fmt++);
+ if (!isdigit(c))
+ break;
+ if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
+ PyErr_SetString(
+ PyExc_ValueError,
+ "width too big");
+ goto error;
+ }
+ width = width*10 + (c - '0');
+ }
+ }
+ if (c == '.') {
+ prec = 0;
+ if (--fmtcnt >= 0)
+ c = *fmt++;
+ if (c == '*') {
+ v = getnextarg(args, arglen, &argidx);
+ if (v == NULL)
+ goto error;
+ if (!PyLong_Check(v)) {
+ PyErr_SetString(
+ PyExc_TypeError,
+ "* wants int");
+ goto error;
+ }
+ prec = PyLong_AsSsize_t(v);
+ if (prec == -1 && PyErr_Occurred())
+ goto error;
+ if (prec < 0)
+ prec = 0;
+ if (--fmtcnt >= 0)
+ c = *fmt++;
+ }
+ else if (c >= 0 && isdigit(c)) {
+ prec = c - '0';
+ while (--fmtcnt >= 0) {
+ c = Py_CHARMASK(*fmt++);
+ if (!isdigit(c))
+ break;
+ if (prec > (INT_MAX - ((int)c - '0')) / 10) {
+ PyErr_SetString(
+ PyExc_ValueError,
+ "prec too big");
+ goto error;
+ }
+ prec = prec*10 + (c - '0');
+ }
+ }
+ } /* prec */
+ if (fmtcnt >= 0) {
+ if (c == 'h' || c == 'l' || c == 'L') {
+ if (--fmtcnt >= 0)
+ c = *fmt++;
+ }
+ }
+ if (fmtcnt < 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "incomplete format");
+ goto error;
+ }
+ if (c != '%') {
+ v = getnextarg(args, arglen, &argidx);
+ if (v == NULL)
+ goto error;
+ }
+ sign = 0;
+ fill = ' ';
+ switch (c) {
+ case '%':
+ pbuf = "%";
+ len = 1;
+ break;
+ case 'a':
+ temp = PyObject_Repr(v);
+ if (temp == NULL)
+ goto error;
+ repr = PyUnicode_AsEncodedObject(temp, "ascii", "backslashreplace");
+ if (repr == NULL) {
+ Py_DECREF(temp);
+ goto error;
+ }
+ if (_getbuffer(repr, &buf) < 0) {
+ temp = format_obj(repr);
+ if (temp == NULL) {
+ Py_DECREF(repr);
+ goto error;
+ }
+ Py_DECREF(repr);
+ repr = temp;
+ }
+ pbuf = PyBytes_AS_STRING(repr);
+ len = PyBytes_GET_SIZE(repr);
+ Py_DECREF(repr);
+ if (prec >= 0 && len > prec)
+ len = prec;
+ break;
+ case 's':
+ // %s is only for 2/3 code; 3 only code should use %b
+ case 'b':
+ temp = format_obj(v);
+ if (temp == NULL)
+ goto error;
+ pbuf = PyBytes_AS_STRING(temp);
+ len = PyBytes_GET_SIZE(temp);
+ if (prec >= 0 && len > prec)
+ len = prec;
+ break;
+ case 'i':
+ case 'd':
+ case 'u':
+ case 'o':
+ case 'x':
+ case 'X':
+ if (c == 'i')
+ c = 'd';
+ isnumok = 0;
+ if (PyNumber_Check(v)) {
+ PyObject *iobj=NULL;
+
+ if ((PyLong_Check(v))) {
+ iobj = v;
+ Py_INCREF(iobj);
+ }
+ else {
+ iobj = PyNumber_Long(v);
+ }
+ if (iobj!=NULL) {
+ if (PyLong_Check(iobj)) {
+ int ilen;
+
+ isnumok = 1;
+ temp = format_long(iobj, flags, prec, c,
+ &pbuf, &ilen);
+ Py_DECREF(iobj);
+ len = ilen;
+ if (!temp)
+ goto error;
+ sign = 1;
+ }
+ else {
+ Py_DECREF(iobj);
+ }
+ }
+ }
+ if (!isnumok) {
+ PyErr_Format(PyExc_TypeError,
+ "%%%c format: a number is required, "
+ "not %.200s", c, Py_TYPE(v)->tp_name);
+ goto error;
+ }
+ if (flags & F_ZERO)
+ fill = '0';
+ break;
+ case 'e':
+ case 'E':
+ case 'f':
+ case 'F':
+ case 'g':
+ case 'G':
+ temp = formatfloat(v, flags, prec, c);
+ if (temp == NULL)
+ goto error;
+ pbuf = PyBytes_AS_STRING(temp);
+ len = PyBytes_GET_SIZE(temp);
+ sign = 1;
+ if (flags & F_ZERO)
+ fill = '0';
+ break;
+ case 'c':
+ pbuf = formatbuf;
+ len = formatchar(pbuf, sizeof(formatbuf), v);
+ if (len < 0)
+ goto error;
+ break;
+ default:
+ PyErr_Format(PyExc_ValueError,
+ "unsupported format character '%c' (0x%x) "
+ "at index %zd",
+ c, c,
+ (Py_ssize_t)(fmt - 1 -
+ PyBytes_AsString(format)));
+ goto error;
+ }
+ if (sign) {
+ if (*pbuf == '-' || *pbuf == '+') {
+ sign = *pbuf++;
+ len--;
+ }
+ else if (flags & F_SIGN)
+ sign = '+';
+ else if (flags & F_BLANK)
+ sign = ' ';
+ else
+ sign = 0;
+ }
+ if (width < len)
+ width = len;
+ if (rescnt - (sign != 0) < width) {
+ reslen -= rescnt;
+ rescnt = width + fmtcnt + 100;
+ reslen += rescnt;
+ if (reslen < 0) {
+ Py_DECREF(result);
+ PyBuffer_Release(&buf);
+ Py_XDECREF(temp);
+ return PyErr_NoMemory();
+ }
+ if (_PyBytes_Resize(&result, reslen)) {
+ PyBuffer_Release(&buf);
+ Py_XDECREF(temp);
+ return NULL;
+ }
+ res = PyBytes_AS_STRING(result)
+ + reslen - rescnt;
+ }
+ if (sign) {
+ if (fill != ' ')
+ *res++ = sign;
+ rescnt--;
+ if (width > len)
+ width--;
+ }
+ if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
+ assert(pbuf[0] == '0');
+ assert(pbuf[1] == c);
+ if (fill != ' ') {
+ *res++ = *pbuf++;
+ *res++ = *pbuf++;
+ }
+ rescnt -= 2;
+ width -= 2;
+ if (width < 0)
+ width = 0;
+ len -= 2;
+ }
+ if (width > len && !(flags & F_LJUST)) {
+ do {
+ --rescnt;
+ *res++ = fill;
+ } while (--width > len);
+ }
+ if (fill == ' ') {
+ if (sign)
+ *res++ = sign;
+ if ((flags & F_ALT) &&
+ (c == 'x' || c == 'X')) {
+ assert(pbuf[0] == '0');
+ assert(pbuf[1] == c);
+ *res++ = *pbuf++;
+ *res++ = *pbuf++;
+ }
+ }
+ Py_MEMCPY(res, pbuf, len);
+ res += len;
+ rescnt -= len;
+ while (--width >= len) {
+ --rescnt;
+ *res++ = ' ';
+ }
+ if (dict && (argidx < arglen) && c != '%') {
+ PyErr_SetString(PyExc_TypeError,
+ "not all arguments converted during bytes formatting");
+ PyBuffer_Release(&buf);
+ Py_XDECREF(temp);
+ goto error;
+ }
+ PyBuffer_Release(&buf);
+ Py_XDECREF(temp);
+ } /* '%' */
+ } /* until end */
+ if (argidx < arglen && !dict) {
+ PyErr_SetString(PyExc_TypeError,
+ "not all arguments converted during bytes formatting");
+ goto error;
+ }
+ if (args_owned) {
+ Py_DECREF(args);
+ }
+ if (_PyBytes_Resize(&result, reslen - rescnt))
+ return NULL;
+ return result;
+
+ error:
+ Py_DECREF(result);
+ if (args_owned) {
+ Py_DECREF(args);
+ }
+ return NULL;
+}
+
+/* =-= */
+
static void
bytes_dealloc(PyObject *op)
{
@@ -2996,6 +3624,21 @@ bytes_methods[] = {
};
static PyObject *
+bytes_mod(PyObject *v, PyObject *w)
+{
+ if (!PyBytes_Check(v))
+ Py_RETURN_NOTIMPLEMENTED;
+ return _PyBytes_Format(v, w);
+}
+
+static PyNumberMethods bytes_as_number = {
+ 0, /*nb_add*/
+ 0, /*nb_subtract*/
+ 0, /*nb_multiply*/
+ bytes_mod, /*nb_remainder*/
+};
+
+static PyObject *
str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
static PyObject *
@@ -3286,7 +3929,7 @@ PyTypeObject PyBytes_Type = {
0, /* tp_setattr */
0, /* tp_reserved */
(reprfunc)bytes_repr, /* tp_repr */
- 0, /* tp_as_number */
+ &bytes_as_number, /* tp_as_number */
&bytes_as_sequence, /* tp_as_sequence */
&bytes_as_mapping, /* tp_as_mapping */
(hashfunc)bytes_hash, /* tp_hash */
@@ -3377,14 +4020,14 @@ PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
}
-/* The following function breaks the notion that strings are immutable:
- it changes the size of a string. We get away with this only if there
+/* The following function breaks the notion that bytes are immutable:
+ it changes the size of a bytes object. We get away with this only if there
is only one module referencing the object. You can also think of it
- as creating a new string object and destroying the old one, only
- more efficiently. In any case, don't use this if the string may
+ as creating a new bytes object and destroying the old one, only
+ more efficiently. In any case, don't use this if the bytes object may
already be known to some other part of the code...
- Note that if there's not enough memory to resize the string, the original
- string object at *pv is deallocated, *pv is set to NULL, an "out of
+ Note that if there's not enough memory to resize the bytes object, the
+ original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
memory" exception is set, and -1 is returned. Else (on success) 0 is
returned, and the value in *pv may or may not be the same as on input.
As always, an extra byte is allocated for a trailing \0 byte (newsize