diff options
Diffstat (limited to 'Objects/bytesobject.c')
| -rw-r--r-- | Objects/bytesobject.c | 447 | 
1 files changed, 173 insertions, 274 deletions
| diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 796e400..bf9259f 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -41,10 +41,6 @@ static PyBytesObject *nullstring;  #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)  /* -   For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the -   parameter `size' denotes number of characters to allocate, not counting any -   null terminating character. -     For PyBytes_FromString(), the parameter `str' points to a null-terminated     string containing exactly `size' bytes. @@ -61,8 +57,8 @@ static PyBytesObject *nullstring;     The PyObject member `op->ob_size', which denotes the number of "extra     items" in a variable-size object, will contain the number of bytes -   allocated for string data, not counting the null terminating character.  It -   is therefore equal to the equal to the `size' parameter (for +   allocated for string data, not counting the null terminating character. +   It is therefore equal to the `size' parameter (for     PyBytes_FromStringAndSize()) or the length of the string in the `str'     parameter (for PyBytes_FromString()).  */ @@ -568,76 +564,70 @@ PyBytes_AsStringAndSize(register PyObject *obj,  PyObject *  PyBytes_Repr(PyObject *obj, int smartquotes)  { -    static const char *hexdigits = "0123456789abcdef";      register PyBytesObject* op = (PyBytesObject*) obj; -    Py_ssize_t length = Py_SIZE(op); -    size_t newsize; +    Py_ssize_t i, length = Py_SIZE(op); +    size_t newsize, squotes, dquotes;      PyObject *v; -    if (length > (PY_SSIZE_T_MAX - 3) / 4) { +    unsigned char quote, *s, *p; + +    /* Compute size of output string */ +    squotes = dquotes = 0; +    newsize = 3; /* b'' */ +    s = (unsigned char*)op->ob_sval; +    for (i = 0; i < length; i++) { +        switch(s[i]) { +        case '\'': squotes++; newsize++; break; +        case '"':  dquotes++; newsize++; break; +        case '\\': case '\t': case '\n': case '\r': +            newsize += 2; break; /* \C */ +        default: +            if (s[i] < ' ' || s[i] >= 0x7f) +                newsize += 4; /* \xHH */ +            else +                newsize++; +        } +    } +    quote = '\''; +    if (smartquotes && squotes && !dquotes) +        quote = '"'; +    if (squotes && quote == '\'') +        newsize += squotes; + +    if (newsize > (PY_SSIZE_T_MAX - sizeof(PyUnicodeObject) - 1)) {          PyErr_SetString(PyExc_OverflowError,              "bytes object is too large to make repr");          return NULL;      } -    newsize = 3 + 4 * length; -    v = PyUnicode_FromUnicode(NULL, newsize); + +    v = PyUnicode_New(newsize, 127);      if (v == NULL) {          return NULL;      } -    else { -        register Py_ssize_t i; -        register Py_UNICODE c; -        register Py_UNICODE *p = PyUnicode_AS_UNICODE(v); -        int quote; - -        /* Figure out which quote to use; single is preferred */ -        quote = '\''; -        if (smartquotes) { -            char *test, *start; -            start = PyBytes_AS_STRING(op); -            for (test = start; test < start+length; ++test) { -                if (*test == '"') { -                    quote = '\''; /* back to single */ -                    goto decided; -                } -                else if (*test == '\'') -                    quote = '"'; -            } -            decided: -            ; -        } - -        *p++ = 'b', *p++ = quote; -        for (i = 0; i < length; i++) { -            /* There's at least enough room for a hex escape -               and a closing quote. */ -            assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5); -            c = op->ob_sval[i]; -            if (c == quote || c == '\\') -                *p++ = '\\', *p++ = c; -            else if (c == '\t') -                *p++ = '\\', *p++ = 't'; -            else if (c == '\n') -                *p++ = '\\', *p++ = 'n'; -            else if (c == '\r') -                *p++ = '\\', *p++ = 'r'; -            else if (c < ' ' || c >= 0x7f) { -                *p++ = '\\'; -                *p++ = 'x'; -                *p++ = hexdigits[(c & 0xf0) >> 4]; -                *p++ = hexdigits[c & 0xf]; -            } -            else -                *p++ = c; -        } -        assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1); -        *p++ = quote; -        *p = '\0'; -        if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) { -            Py_DECREF(v); -            return NULL; +    p = PyUnicode_1BYTE_DATA(v); + +    *p++ = 'b', *p++ = quote; +    for (i = 0; i < length; i++) { +        unsigned char c = op->ob_sval[i]; +        if (c == quote || c == '\\') +            *p++ = '\\', *p++ = c; +        else if (c == '\t') +            *p++ = '\\', *p++ = 't'; +        else if (c == '\n') +            *p++ = '\\', *p++ = 'n'; +        else if (c == '\r') +            *p++ = '\\', *p++ = 'r'; +        else if (c < ' ' || c >= 0x7f) { +            *p++ = '\\'; +            *p++ = 'x'; +            *p++ = Py_hexdigits[(c & 0xf0) >> 4]; +            *p++ = Py_hexdigits[c & 0xf];          } -        return v; +        else +            *p++ = c;      } +    *p++ = quote; +    assert(_PyUnicode_CheckConsistency(v, 1)); +    return v;  }  static PyObject * @@ -871,35 +861,11 @@ bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)  static Py_hash_t  bytes_hash(PyBytesObject *a)  { -    register Py_ssize_t len; -    register unsigned char *p; -    register Py_hash_t x; - -#ifdef Py_DEBUG -    assert(_Py_HashSecret_Initialized); -#endif -    if (a->ob_shash != -1) -        return a->ob_shash; -    len = Py_SIZE(a); -    /* -      We make the hash of the empty string be 0, rather than using -      (prefix ^ suffix), since this slightly obfuscates the hash secret -    */ -    if (len == 0) { -        a->ob_shash = 0; -        return 0; -    } -    p = (unsigned char *) a->ob_sval; -    x = _Py_HashSecret.prefix; -    x ^= *p << 7; -    while (--len >= 0) -        x = (_PyHASH_MULTIPLIER*x) ^ *p++; -    x ^= Py_SIZE(a); -    x ^= _Py_HashSecret.suffix; -    if (x == -1) -        x = -2; -    a->ob_shash = x; -    return x; +    if (a->ob_shash == -1) { +        /* Can't fail */ +        a->ob_shash = _Py_HashBytes((unsigned char *) a->ob_sval, Py_SIZE(a)); +    } +    return a->ob_shash;  }  static PyObject* @@ -1007,7 +973,7 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};  #define STRIPNAME(i) (stripformat[i]+3)  PyDoc_STRVAR(split__doc__, -"B.split([sep[, maxsplit]]) -> list of bytes\n\ +"B.split(sep=None, maxsplit=-1) -> list of bytes\n\  \n\  Return a list of the sections in B, using sep as the delimiter.\n\  If sep is not specified or is None, B is split on ASCII whitespace\n\ @@ -1015,15 +981,17 @@ characters (space, tab, return, newline, formfeed, vertical tab).\n\  If maxsplit is given, at most maxsplit splits are done.");  static PyObject * -bytes_split(PyBytesObject *self, PyObject *args) +bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds)  { +    static char *kwlist[] = {"sep", "maxsplit", 0};      Py_ssize_t len = PyBytes_GET_SIZE(self), n;      Py_ssize_t maxsplit = -1;      const char *s = PyBytes_AS_STRING(self), *sub;      Py_buffer vsub;      PyObject *list, *subobj = Py_None; -    if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit)) +    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split", +                                     kwlist, &subobj, &maxsplit))          return NULL;      if (maxsplit < 0)          maxsplit = PY_SSIZE_T_MAX; @@ -1095,7 +1063,7 @@ bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)  }  PyDoc_STRVAR(rsplit__doc__, -"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\ +"B.rsplit(sep=None, maxsplit=-1) -> list of bytes\n\  \n\  Return a list of the sections in B, using sep as the delimiter,\n\  starting at the end of B and working to the front.\n\ @@ -1105,15 +1073,17 @@ If maxsplit is given, at most maxsplit splits are done.");  static PyObject * -bytes_rsplit(PyBytesObject *self, PyObject *args) +bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds)  { +    static char *kwlist[] = {"sep", "maxsplit", 0};      Py_ssize_t len = PyBytes_GET_SIZE(self), n;      Py_ssize_t maxsplit = -1;      const char *s = PyBytes_AS_STRING(self), *sub;      Py_buffer vsub;      PyObject *list, *subobj = Py_None; -    if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit)) +    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit", +                                     kwlist, &subobj, &maxsplit))          return NULL;      if (maxsplit < 0)          maxsplit = PY_SSIZE_T_MAX; @@ -1254,31 +1224,42 @@ Py_LOCAL_INLINE(Py_ssize_t)  bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)  {      PyObject *subobj; +    char byte; +    Py_buffer subbuf;      const char *sub;      Py_ssize_t sub_len;      Py_ssize_t start=0, end=PY_SSIZE_T_MAX; +    Py_ssize_t res; -    if (!stringlib_parse_args_finds("find/rfind/index/rindex", -                                    args, &subobj, &start, &end)) +    if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex", +                                         args, &subobj, &byte, &start, &end))          return -2; -    if (PyBytes_Check(subobj)) { -        sub = PyBytes_AS_STRING(subobj); -        sub_len = PyBytes_GET_SIZE(subobj); +    if (subobj) { +        if (_getbuffer(subobj, &subbuf) < 0) +            return -2; + +        sub = subbuf.buf; +        sub_len = subbuf.len; +    } +    else { +        sub = &byte; +        sub_len = 1;      } -    else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len)) -        /* XXX - the "expected a character buffer object" is pretty -           confusing for a non-expert.  remap to something else ? */ -        return -2;      if (dir > 0) -        return stringlib_find_slice( +        res = stringlib_find_slice(              PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),              sub, sub_len, start, end);      else -        return stringlib_rfind_slice( +        res = stringlib_rfind_slice(              PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),              sub, sub_len, start, end); + +    if (subobj) +        PyBuffer_Release(&subbuf); + +    return res;  } @@ -1504,23 +1485,38 @@ bytes_count(PyBytesObject *self, PyObject *args)      PyObject *sub_obj;      const char *str = PyBytes_AS_STRING(self), *sub;      Py_ssize_t sub_len; +    char byte;      Py_ssize_t start = 0, end = PY_SSIZE_T_MAX; -    if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end)) +    Py_buffer vsub; +    PyObject *count_obj; + +    if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte, +                                         &start, &end))          return NULL; -    if (PyBytes_Check(sub_obj)) { -        sub = PyBytes_AS_STRING(sub_obj); -        sub_len = PyBytes_GET_SIZE(sub_obj); +    if (sub_obj) { +        if (_getbuffer(sub_obj, &vsub) < 0) +            return NULL; + +        sub = vsub.buf; +        sub_len = vsub.len; +    } +    else { +        sub = &byte; +        sub_len = 1;      } -    else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len)) -        return NULL;      ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self)); -    return PyLong_FromSsize_t( +    count_obj = PyLong_FromSsize_t(          stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)          ); + +    if (sub_obj) +        PyBuffer_Release(&vsub); + +    return count_obj;  } @@ -2329,11 +2325,13 @@ Line breaks are not included in the resulting list unless keepends\n\  is given and true.");  static PyObject* -bytes_splitlines(PyObject *self, PyObject *args) +bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds)  { +    static char *kwlist[] = {"keepends", 0};      int keepends = 0; -    if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends)) +    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines", +                                     kwlist, &keepends))          return NULL;      return stringlib_splitlines( @@ -2351,7 +2349,7 @@ Spaces between two numbers are accepted.\n\  Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");  static int -hex_digit_to_int(Py_UNICODE c) +hex_digit_to_int(Py_UCS4 c)  {      if (c >= 128)          return -1; @@ -2371,15 +2369,20 @@ bytes_fromhex(PyObject *cls, PyObject *args)  {      PyObject *newstring, *hexobj;      char *buf; -    Py_UNICODE *hex;      Py_ssize_t hexlen, byteslen, i, j;      int top, bot; +    void *data; +    unsigned int kind;      if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))          return NULL;      assert(PyUnicode_Check(hexobj)); -    hexlen = PyUnicode_GET_SIZE(hexobj); -    hex = PyUnicode_AS_UNICODE(hexobj); +    if (PyUnicode_READY(hexobj)) +        return NULL; +    kind = PyUnicode_KIND(hexobj); +    data = PyUnicode_DATA(hexobj); +    hexlen = PyUnicode_GET_LENGTH(hexobj); +      byteslen = hexlen/2; /* This overestimates if there are spaces */      newstring = PyBytes_FromStringAndSize(NULL, byteslen);      if (!newstring) @@ -2387,12 +2390,12 @@ bytes_fromhex(PyObject *cls, PyObject *args)      buf = PyBytes_AS_STRING(newstring);      for (i = j = 0; i < hexlen; i += 2) {          /* skip over spaces in the input */ -        while (hex[i] == ' ') +        while (PyUnicode_READ(kind, data, i) == ' ')              i++;          if (i >= hexlen)              break; -        top = hex_digit_to_int(hex[i]); -        bot = hex_digit_to_int(hex[i+1]); +        top = hex_digit_to_int(PyUnicode_READ(kind, data, i)); +        bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));          if (top == -1 || bot == -1) {              PyErr_Format(PyExc_ValueError,                           "non-hexadecimal number found in " @@ -2472,10 +2475,10 @@ bytes_methods[] = {      {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},      {"rpartition", (PyCFunction)bytes_rpartition, METH_O,       rpartition__doc__}, -    {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__}, +    {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__},      {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__}, -    {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__}, -    {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS, +    {"split", (PyCFunction)bytes_split, METH_VARARGS | METH_KEYWORDS, split__doc__}, +    {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS,       splitlines__doc__},      {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,       startswith__doc__}, @@ -2574,6 +2577,12 @@ PyBytes_FromObject(PyObject *x)          PyErr_BadInternalCall();          return NULL;      } + +    if (PyBytes_CheckExact(x)) { +        Py_INCREF(x); +        return x; +    } +      /* Use the modern buffer interface */      if (PyObject_CheckBuffer(x)) {          Py_buffer view; @@ -2582,7 +2591,6 @@ PyBytes_FromObject(PyObject *x)          new = PyBytes_FromStringAndSize(NULL, view.len);          if (!new)              goto fail; -        /* XXX(brett.cannon): Better way to get to internal buffer? */          if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,                                    &view, view.len, 'C') < 0)              goto fail; @@ -2857,149 +2865,6 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)      return 0;  } -/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and - * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for - * Python's regular ints. - * Return value:  a new PyBytes*, or NULL if error. - *  .  *pbuf is set to point into it, - *     *plen set to the # of chars following that. - *     Caller must decref it when done using pbuf. - *     The string starting at *pbuf is of the form - *         "-"? ("0x" | "0X")? digit+ - *     "0x"/"0X" are present only for x and X conversions, with F_ALT - *         set in flags.  The case of hex digits will be correct, - *     There will be at least prec digits, zero-filled on the left if - *         necessary to get that many. - * val          object to be converted - * flags        bitmask of format flags; only F_ALT is looked at - * prec         minimum number of digits; 0-fill on left if needed - * type         a character in [duoxX]; u acts the same as d - * - * CAUTION:  o, x and X conversions on regular ints can never - * produce a '-' sign, but can for Python's unbounded ints. - */ -PyObject* -_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type, -                     char **pbuf, int *plen) -{ -    PyObject *result = NULL; -    char *buf; -    Py_ssize_t i; -    int sign;           /* 1 if '-', else 0 */ -    int len;            /* number of characters */ -    Py_ssize_t llen; -    int numdigits;      /* len == numnondigits + numdigits */ -    int numnondigits = 0; - -    /* Avoid exceeding SSIZE_T_MAX */ -    if (prec > INT_MAX-3) { -        PyErr_SetString(PyExc_OverflowError, -                        "precision too large"); -        return NULL; -    } - -    switch (type) { -    case 'd': -    case 'u': -        /* Special-case boolean: we want 0/1 */ -        if (PyBool_Check(val)) -            result = PyNumber_ToBase(val, 10); -        else -            result = Py_TYPE(val)->tp_str(val); -        break; -    case 'o': -        numnondigits = 2; -        result = PyNumber_ToBase(val, 8); -        break; -    case 'x': -    case 'X': -        numnondigits = 2; -        result = PyNumber_ToBase(val, 16); -        break; -    default: -        assert(!"'type' not in [duoxX]"); -    } -    if (!result) -        return NULL; - -    buf = _PyUnicode_AsString(result); -    if (!buf) { -        Py_DECREF(result); -        return NULL; -    } - -    /* To modify the string in-place, there can only be one reference. */ -    if (Py_REFCNT(result) != 1) { -        PyErr_BadInternalCall(); -        return NULL; -    } -    llen = PyUnicode_GetSize(result); -    if (llen > INT_MAX) { -        PyErr_SetString(PyExc_ValueError, -                        "string too large in _PyBytes_FormatLong"); -        return NULL; -    } -    len = (int)llen; -    if (buf[len-1] == 'L') { -        --len; -        buf[len] = '\0'; -    } -    sign = buf[0] == '-'; -    numnondigits += sign; -    numdigits = len - numnondigits; -    assert(numdigits > 0); - -    /* Get rid of base marker unless F_ALT */ -    if (((flags & F_ALT) == 0 && -        (type == 'o' || type == 'x' || type == 'X'))) { -        assert(buf[sign] == '0'); -        assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' || -               buf[sign+1] == 'o'); -        numnondigits -= 2; -        buf += 2; -        len -= 2; -        if (sign) -            buf[0] = '-'; -        assert(len == numnondigits + numdigits); -        assert(numdigits > 0); -    } - -    /* Fill with leading zeroes to meet minimum width. */ -    if (prec > numdigits) { -        PyObject *r1 = PyBytes_FromStringAndSize(NULL, -                                numnondigits + prec); -        char *b1; -        if (!r1) { -            Py_DECREF(result); -            return NULL; -        } -        b1 = PyBytes_AS_STRING(r1); -        for (i = 0; i < numnondigits; ++i) -            *b1++ = *buf++; -        for (i = 0; i < prec - numdigits; i++) -            *b1++ = '0'; -        for (i = 0; i < numdigits; i++) -            *b1++ = *buf++; -        *b1 = '\0'; -        Py_DECREF(result); -        result = r1; -        buf = PyBytes_AS_STRING(result); -        len = numnondigits + prec; -    } - -    /* Fix up case for hex conversions. */ -    if (type == 'X') { -        /* Need to convert all lower case letters to upper case. -           and need to convert 0x to 0X (and -0x to -0X). */ -        for (i = 0; i < len; i++) -            if (buf[i] >= 'a' && buf[i] <= 'x') -                buf[i] -= 'a'-'A'; -    } -    *pbuf = buf; -    *plen = len; -    return result; -} -  void  PyBytes_Fini(void)  { @@ -3072,9 +2937,43 @@ striter_len(striterobject *it)  PyDoc_STRVAR(length_hint_doc,               "Private method returning an estimate of len(list(it))."); +static PyObject * +striter_reduce(striterobject *it) +{ +    if (it->it_seq != NULL) { +        return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"), +                             it->it_seq, it->it_index); +    } else { +        PyObject *u = PyUnicode_FromUnicode(NULL, 0); +        if (u == NULL) +            return NULL; +        return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u); +    } +} + +PyDoc_STRVAR(reduce_doc, "Return state information for pickling."); + +static PyObject * +striter_setstate(striterobject *it, PyObject *state) +{ +    Py_ssize_t index = PyLong_AsSsize_t(state); +    if (index == -1 && PyErr_Occurred()) +        return NULL; +    if (index < 0) +        index = 0; +    it->it_index = index; +    Py_RETURN_NONE; +} + +PyDoc_STRVAR(setstate_doc, "Set state information for unpickling."); +  static PyMethodDef striter_methods[] = {      {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,       length_hint_doc}, +    {"__reduce__",      (PyCFunction)striter_reduce, METH_NOARGS, +     reduce_doc}, +    {"__setstate__",    (PyCFunction)striter_setstate, METH_O, +     setstate_doc},      {NULL,              NULL}           /* sentinel */  }; | 
