diff options
Diffstat (limited to 'Objects/bytesobject.c')
-rw-r--r-- | Objects/bytesobject.c | 440 |
1 files changed, 167 insertions, 273 deletions
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 796e400..efe6ef8 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -41,10 +41,6 @@ static PyBytesObject *nullstring; #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1) /* - For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the - parameter `size' denotes number of characters to allocate, not counting any - null terminating character. - For PyBytes_FromString(), the parameter `str' points to a null-terminated string containing exactly `size' bytes. @@ -61,8 +57,8 @@ static PyBytesObject *nullstring; The PyObject member `op->ob_size', which denotes the number of "extra items" in a variable-size object, will contain the number of bytes - allocated for string data, not counting the null terminating character. It - is therefore equal to the equal to the `size' parameter (for + allocated for string data, not counting the null terminating character. + It is therefore equal to the `size' parameter (for PyBytes_FromStringAndSize()) or the length of the string in the `str' parameter (for PyBytes_FromString()). */ @@ -568,76 +564,70 @@ PyBytes_AsStringAndSize(register PyObject *obj, PyObject * PyBytes_Repr(PyObject *obj, int smartquotes) { - static const char *hexdigits = "0123456789abcdef"; register PyBytesObject* op = (PyBytesObject*) obj; - Py_ssize_t length = Py_SIZE(op); - size_t newsize; + Py_ssize_t i, length = Py_SIZE(op); + size_t newsize, squotes, dquotes; PyObject *v; - if (length > (PY_SSIZE_T_MAX - 3) / 4) { + unsigned char quote, *s, *p; + + /* Compute size of output string */ + squotes = dquotes = 0; + newsize = 3; /* b'' */ + s = (unsigned char*)op->ob_sval; + for (i = 0; i < length; i++) { + switch(s[i]) { + case '\'': squotes++; newsize++; break; + case '"': dquotes++; newsize++; break; + case '\\': case '\t': case '\n': case '\r': + newsize += 2; break; /* \C */ + default: + if (s[i] < ' ' || s[i] >= 0x7f) + newsize += 4; /* \xHH */ + else + newsize++; + } + } + quote = '\''; + if (smartquotes && squotes && !dquotes) + quote = '"'; + if (squotes && quote == '\'') + newsize += squotes; + + if (newsize > (PY_SSIZE_T_MAX - sizeof(PyUnicodeObject) - 1)) { PyErr_SetString(PyExc_OverflowError, "bytes object is too large to make repr"); return NULL; } - newsize = 3 + 4 * length; - v = PyUnicode_FromUnicode(NULL, newsize); + + v = PyUnicode_New(newsize, 127); if (v == NULL) { return NULL; } - else { - register Py_ssize_t i; - register Py_UNICODE c; - register Py_UNICODE *p = PyUnicode_AS_UNICODE(v); - int quote; - - /* Figure out which quote to use; single is preferred */ - quote = '\''; - if (smartquotes) { - char *test, *start; - start = PyBytes_AS_STRING(op); - for (test = start; test < start+length; ++test) { - if (*test == '"') { - quote = '\''; /* back to single */ - goto decided; - } - else if (*test == '\'') - quote = '"'; - } - decided: - ; - } - - *p++ = 'b', *p++ = quote; - for (i = 0; i < length; i++) { - /* There's at least enough room for a hex escape - and a closing quote. */ - assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5); - c = op->ob_sval[i]; - if (c == quote || c == '\\') - *p++ = '\\', *p++ = c; - else if (c == '\t') - *p++ = '\\', *p++ = 't'; - else if (c == '\n') - *p++ = '\\', *p++ = 'n'; - else if (c == '\r') - *p++ = '\\', *p++ = 'r'; - else if (c < ' ' || c >= 0x7f) { - *p++ = '\\'; - *p++ = 'x'; - *p++ = hexdigits[(c & 0xf0) >> 4]; - *p++ = hexdigits[c & 0xf]; - } - else - *p++ = c; - } - assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1); - *p++ = quote; - *p = '\0'; - if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) { - Py_DECREF(v); - return NULL; + p = PyUnicode_1BYTE_DATA(v); + + *p++ = 'b', *p++ = quote; + for (i = 0; i < length; i++) { + unsigned char c = op->ob_sval[i]; + if (c == quote || c == '\\') + *p++ = '\\', *p++ = c; + else if (c == '\t') + *p++ = '\\', *p++ = 't'; + else if (c == '\n') + *p++ = '\\', *p++ = 'n'; + else if (c == '\r') + *p++ = '\\', *p++ = 'r'; + else if (c < ' ' || c >= 0x7f) { + *p++ = '\\'; + *p++ = 'x'; + *p++ = Py_hexdigits[(c & 0xf0) >> 4]; + *p++ = Py_hexdigits[c & 0xf]; } - return v; + else + *p++ = c; } + *p++ = quote; + assert(_PyUnicode_CheckConsistency(v, 1)); + return v; } static PyObject * @@ -871,35 +861,11 @@ bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op) static Py_hash_t bytes_hash(PyBytesObject *a) { - register Py_ssize_t len; - register unsigned char *p; - register Py_hash_t x; - -#ifdef Py_DEBUG - assert(_Py_HashSecret_Initialized); -#endif - if (a->ob_shash != -1) - return a->ob_shash; - len = Py_SIZE(a); - /* - We make the hash of the empty string be 0, rather than using - (prefix ^ suffix), since this slightly obfuscates the hash secret - */ - if (len == 0) { - a->ob_shash = 0; - return 0; - } - p = (unsigned char *) a->ob_sval; - x = _Py_HashSecret.prefix; - x ^= *p << 7; - while (--len >= 0) - x = (_PyHASH_MULTIPLIER*x) ^ *p++; - x ^= Py_SIZE(a); - x ^= _Py_HashSecret.suffix; - if (x == -1) - x = -2; - a->ob_shash = x; - return x; + if (a->ob_shash == -1) { + /* Can't fail */ + a->ob_shash = _Py_HashBytes((unsigned char *) a->ob_sval, Py_SIZE(a)); + } + return a->ob_shash; } static PyObject* @@ -1007,7 +973,7 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"}; #define STRIPNAME(i) (stripformat[i]+3) PyDoc_STRVAR(split__doc__, -"B.split([sep[, maxsplit]]) -> list of bytes\n\ +"B.split(sep=None, maxsplit=-1) -> list of bytes\n\ \n\ Return a list of the sections in B, using sep as the delimiter.\n\ If sep is not specified or is None, B is split on ASCII whitespace\n\ @@ -1015,15 +981,17 @@ characters (space, tab, return, newline, formfeed, vertical tab).\n\ If maxsplit is given, at most maxsplit splits are done."); static PyObject * -bytes_split(PyBytesObject *self, PyObject *args) +bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds) { + static char *kwlist[] = {"sep", "maxsplit", 0}; Py_ssize_t len = PyBytes_GET_SIZE(self), n; Py_ssize_t maxsplit = -1; const char *s = PyBytes_AS_STRING(self), *sub; Py_buffer vsub; PyObject *list, *subobj = Py_None; - if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split", + kwlist, &subobj, &maxsplit)) return NULL; if (maxsplit < 0) maxsplit = PY_SSIZE_T_MAX; @@ -1095,7 +1063,7 @@ bytes_rpartition(PyBytesObject *self, PyObject *sep_obj) } PyDoc_STRVAR(rsplit__doc__, -"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\ +"B.rsplit(sep=None, maxsplit=-1) -> list of bytes\n\ \n\ Return a list of the sections in B, using sep as the delimiter,\n\ starting at the end of B and working to the front.\n\ @@ -1105,15 +1073,17 @@ If maxsplit is given, at most maxsplit splits are done."); static PyObject * -bytes_rsplit(PyBytesObject *self, PyObject *args) +bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds) { + static char *kwlist[] = {"sep", "maxsplit", 0}; Py_ssize_t len = PyBytes_GET_SIZE(self), n; Py_ssize_t maxsplit = -1; const char *s = PyBytes_AS_STRING(self), *sub; Py_buffer vsub; PyObject *list, *subobj = Py_None; - if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit", + kwlist, &subobj, &maxsplit)) return NULL; if (maxsplit < 0) maxsplit = PY_SSIZE_T_MAX; @@ -1254,31 +1224,42 @@ Py_LOCAL_INLINE(Py_ssize_t) bytes_find_internal(PyBytesObject *self, PyObject *args, int dir) { PyObject *subobj; + char byte; + Py_buffer subbuf; const char *sub; Py_ssize_t sub_len; Py_ssize_t start=0, end=PY_SSIZE_T_MAX; + Py_ssize_t res; - if (!stringlib_parse_args_finds("find/rfind/index/rindex", - args, &subobj, &start, &end)) + if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex", + args, &subobj, &byte, &start, &end)) return -2; - if (PyBytes_Check(subobj)) { - sub = PyBytes_AS_STRING(subobj); - sub_len = PyBytes_GET_SIZE(subobj); + if (subobj) { + if (_getbuffer(subobj, &subbuf) < 0) + return -2; + + sub = subbuf.buf; + sub_len = subbuf.len; + } + else { + sub = &byte; + sub_len = 1; } - else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len)) - /* XXX - the "expected a character buffer object" is pretty - confusing for a non-expert. remap to something else ? */ - return -2; if (dir > 0) - return stringlib_find_slice( + res = stringlib_find_slice( PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), sub, sub_len, start, end); else - return stringlib_rfind_slice( + res = stringlib_rfind_slice( PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), sub, sub_len, start, end); + + if (subobj) + PyBuffer_Release(&subbuf); + + return res; } @@ -1504,23 +1485,38 @@ bytes_count(PyBytesObject *self, PyObject *args) PyObject *sub_obj; const char *str = PyBytes_AS_STRING(self), *sub; Py_ssize_t sub_len; + char byte; Py_ssize_t start = 0, end = PY_SSIZE_T_MAX; - if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end)) + Py_buffer vsub; + PyObject *count_obj; + + if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte, + &start, &end)) return NULL; - if (PyBytes_Check(sub_obj)) { - sub = PyBytes_AS_STRING(sub_obj); - sub_len = PyBytes_GET_SIZE(sub_obj); + if (sub_obj) { + if (_getbuffer(sub_obj, &vsub) < 0) + return NULL; + + sub = vsub.buf; + sub_len = vsub.len; + } + else { + sub = &byte; + sub_len = 1; } - else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len)) - return NULL; ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self)); - return PyLong_FromSsize_t( + count_obj = PyLong_FromSsize_t( stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX) ); + + if (sub_obj) + PyBuffer_Release(&vsub); + + return count_obj; } @@ -2329,11 +2325,13 @@ Line breaks are not included in the resulting list unless keepends\n\ is given and true."); static PyObject* -bytes_splitlines(PyObject *self, PyObject *args) +bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds) { + static char *kwlist[] = {"keepends", 0}; int keepends = 0; - if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines", + kwlist, &keepends)) return NULL; return stringlib_splitlines( @@ -2351,7 +2349,7 @@ Spaces between two numbers are accepted.\n\ Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'."); static int -hex_digit_to_int(Py_UNICODE c) +hex_digit_to_int(Py_UCS4 c) { if (c >= 128) return -1; @@ -2371,15 +2369,20 @@ bytes_fromhex(PyObject *cls, PyObject *args) { PyObject *newstring, *hexobj; char *buf; - Py_UNICODE *hex; Py_ssize_t hexlen, byteslen, i, j; int top, bot; + void *data; + unsigned int kind; if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj)) return NULL; assert(PyUnicode_Check(hexobj)); - hexlen = PyUnicode_GET_SIZE(hexobj); - hex = PyUnicode_AS_UNICODE(hexobj); + if (PyUnicode_READY(hexobj)) + return NULL; + kind = PyUnicode_KIND(hexobj); + data = PyUnicode_DATA(hexobj); + hexlen = PyUnicode_GET_LENGTH(hexobj); + byteslen = hexlen/2; /* This overestimates if there are spaces */ newstring = PyBytes_FromStringAndSize(NULL, byteslen); if (!newstring) @@ -2387,12 +2390,12 @@ bytes_fromhex(PyObject *cls, PyObject *args) buf = PyBytes_AS_STRING(newstring); for (i = j = 0; i < hexlen; i += 2) { /* skip over spaces in the input */ - while (hex[i] == ' ') + while (PyUnicode_READ(kind, data, i) == ' ') i++; if (i >= hexlen) break; - top = hex_digit_to_int(hex[i]); - bot = hex_digit_to_int(hex[i+1]); + top = hex_digit_to_int(PyUnicode_READ(kind, data, i)); + bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1)); if (top == -1 || bot == -1) { PyErr_Format(PyExc_ValueError, "non-hexadecimal number found in " @@ -2472,10 +2475,10 @@ bytes_methods[] = { {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__}, {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__}, - {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__}, + {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__}, {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__}, - {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__}, - {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS, + {"split", (PyCFunction)bytes_split, METH_VARARGS | METH_KEYWORDS, split__doc__}, + {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS, splitlines__doc__}, {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS, startswith__doc__}, @@ -2857,149 +2860,6 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize) return 0; } -/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and - * the F_ALT flag, for Python's long (unbounded) ints. It's not used for - * Python's regular ints. - * Return value: a new PyBytes*, or NULL if error. - * . *pbuf is set to point into it, - * *plen set to the # of chars following that. - * Caller must decref it when done using pbuf. - * The string starting at *pbuf is of the form - * "-"? ("0x" | "0X")? digit+ - * "0x"/"0X" are present only for x and X conversions, with F_ALT - * set in flags. The case of hex digits will be correct, - * There will be at least prec digits, zero-filled on the left if - * necessary to get that many. - * val object to be converted - * flags bitmask of format flags; only F_ALT is looked at - * prec minimum number of digits; 0-fill on left if needed - * type a character in [duoxX]; u acts the same as d - * - * CAUTION: o, x and X conversions on regular ints can never - * produce a '-' sign, but can for Python's unbounded ints. - */ -PyObject* -_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type, - char **pbuf, int *plen) -{ - PyObject *result = NULL; - char *buf; - Py_ssize_t i; - int sign; /* 1 if '-', else 0 */ - int len; /* number of characters */ - Py_ssize_t llen; - int numdigits; /* len == numnondigits + numdigits */ - int numnondigits = 0; - - /* Avoid exceeding SSIZE_T_MAX */ - if (prec > INT_MAX-3) { - PyErr_SetString(PyExc_OverflowError, - "precision too large"); - return NULL; - } - - switch (type) { - case 'd': - case 'u': - /* Special-case boolean: we want 0/1 */ - if (PyBool_Check(val)) - result = PyNumber_ToBase(val, 10); - else - result = Py_TYPE(val)->tp_str(val); - break; - case 'o': - numnondigits = 2; - result = PyNumber_ToBase(val, 8); - break; - case 'x': - case 'X': - numnondigits = 2; - result = PyNumber_ToBase(val, 16); - break; - default: - assert(!"'type' not in [duoxX]"); - } - if (!result) - return NULL; - - buf = _PyUnicode_AsString(result); - if (!buf) { - Py_DECREF(result); - return NULL; - } - - /* To modify the string in-place, there can only be one reference. */ - if (Py_REFCNT(result) != 1) { - PyErr_BadInternalCall(); - return NULL; - } - llen = PyUnicode_GetSize(result); - if (llen > INT_MAX) { - PyErr_SetString(PyExc_ValueError, - "string too large in _PyBytes_FormatLong"); - return NULL; - } - len = (int)llen; - if (buf[len-1] == 'L') { - --len; - buf[len] = '\0'; - } - sign = buf[0] == '-'; - numnondigits += sign; - numdigits = len - numnondigits; - assert(numdigits > 0); - - /* Get rid of base marker unless F_ALT */ - if (((flags & F_ALT) == 0 && - (type == 'o' || type == 'x' || type == 'X'))) { - assert(buf[sign] == '0'); - assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' || - buf[sign+1] == 'o'); - numnondigits -= 2; - buf += 2; - len -= 2; - if (sign) - buf[0] = '-'; - assert(len == numnondigits + numdigits); - assert(numdigits > 0); - } - - /* Fill with leading zeroes to meet minimum width. */ - if (prec > numdigits) { - PyObject *r1 = PyBytes_FromStringAndSize(NULL, - numnondigits + prec); - char *b1; - if (!r1) { - Py_DECREF(result); - return NULL; - } - b1 = PyBytes_AS_STRING(r1); - for (i = 0; i < numnondigits; ++i) - *b1++ = *buf++; - for (i = 0; i < prec - numdigits; i++) - *b1++ = '0'; - for (i = 0; i < numdigits; i++) - *b1++ = *buf++; - *b1 = '\0'; - Py_DECREF(result); - result = r1; - buf = PyBytes_AS_STRING(result); - len = numnondigits + prec; - } - - /* Fix up case for hex conversions. */ - if (type == 'X') { - /* Need to convert all lower case letters to upper case. - and need to convert 0x to 0X (and -0x to -0X). */ - for (i = 0; i < len; i++) - if (buf[i] >= 'a' && buf[i] <= 'x') - buf[i] -= 'a'-'A'; - } - *pbuf = buf; - *plen = len; - return result; -} - void PyBytes_Fini(void) { @@ -3072,9 +2932,43 @@ striter_len(striterobject *it) PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it))."); +static PyObject * +striter_reduce(striterobject *it) +{ + if (it->it_seq != NULL) { + return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"), + it->it_seq, it->it_index); + } else { + PyObject *u = PyUnicode_FromUnicode(NULL, 0); + if (u == NULL) + return NULL; + return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u); + } +} + +PyDoc_STRVAR(reduce_doc, "Return state information for pickling."); + +static PyObject * +striter_setstate(striterobject *it, PyObject *state) +{ + Py_ssize_t index = PyLong_AsSsize_t(state); + if (index == -1 && PyErr_Occurred()) + return NULL; + if (index < 0) + index = 0; + it->it_index = index; + Py_RETURN_NONE; +} + +PyDoc_STRVAR(setstate_doc, "Set state information for unpickling."); + static PyMethodDef striter_methods[] = { {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS, length_hint_doc}, + {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS, + reduce_doc}, + {"__setstate__", (PyCFunction)striter_setstate, METH_O, + setstate_doc}, {NULL, NULL} /* sentinel */ }; |