From 10a60b3ec0cdf7eeac98258fc53a33b7026f8ff3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20v=2E=20L=C3=B6wis?= Date: Wed, 18 Jul 2007 02:28:27 +0000 Subject: Change Py_BuildValue to generate Unicode objects for 's' and 'c' codes. Change pickle to dump bytes objects using the 'S' code, and to load the 'S' code as byte objects. Change datetime and array to generate and expect bytes objects in reduce/unreduce. --- Lib/pickle.py | 20 +++++++-- Lib/test/test_datetime.py | 3 +- Modules/arraymodule.c | 2 +- Modules/cPickle.c | 109 +++++++++++++++++++++++++++++++++++++++++++--- Modules/datetimemodule.c | 50 +++++++++++---------- Objects/bytesobject.c | 3 ++ Objects/exceptions.c | 20 +++++---- Python/modsupport.c | 4 +- 8 files changed, 166 insertions(+), 45 deletions(-) diff --git a/Lib/pickle.py b/Lib/pickle.py index 27f7eca..c158b8d 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -506,6 +506,20 @@ class Pickler: self.memoize(obj) dispatch[str8] = save_string + def save_bytes(self, obj): + # Like save_string + if self.bin: + n = len(obj) + if n < 256: + self.write(SHORT_BINSTRING + bytes([n]) + bytes(obj)) + else: + self.write(BINSTRING + pack("ob_size > 0) { - result = Py_BuildValue("O(cs#)O", + result = Py_BuildValue("O(cy#)O", array->ob_type, array->ob_descr->typecode, array->ob_item, diff --git a/Modules/cPickle.c b/Modules/cPickle.c index d50c743..ff29b67 100644 --- a/Modules/cPickle.c +++ b/Modules/cPickle.c @@ -1151,6 +1151,92 @@ save_string(Picklerobject *self, PyObject *args, int doput) } +static int +save_bytes(Picklerobject *self, PyObject *args, int doput) +{ + int size, len; + PyObject *repr=0; + + if ((size = PyBytes_Size(args)) < 0) + return -1; + + if (!self->bin) { + char *repr_str; + + static char string = STRING; + + if (!( repr = PyObject_ReprStr8(args))) + return -1; + + if ((len = PyString_Size(repr)) < 0) + goto err; + repr_str = PyString_AS_STRING((PyStringObject *)repr); + + /* Strip leading 's' due to repr() of str8() returning s'...' */ + if (repr_str[0] == 'b') { + repr_str++; + len--; + } + + if (self->write_func(self, &string, 1) < 0) + goto err; + + if (self->write_func(self, repr_str, len) < 0) + goto err; + + if (self->write_func(self, "\n", 1) < 0) + goto err; + + Py_XDECREF(repr); + } + else { + int i; + char c_str[5]; + + if ((size = PyBytes_Size(args)) < 0) + return -1; + + if (size < 256) { + c_str[0] = SHORT_BINSTRING; + c_str[1] = size; + len = 2; + } + else if (size <= INT_MAX) { + c_str[0] = BINSTRING; + for (i = 1; i < 5; i++) + c_str[i] = (int)(size >> ((i - 1) * 8)); + len = 5; + } + else + return -1; /* string too large */ + + if (self->write_func(self, c_str, len) < 0) + return -1; + + if (size > 128 && Pdata_Check(self->file)) { + if (write_other(self, NULL, 0) < 0) return -1; + PDATA_APPEND(self->file, args, -1); + } + else { + if (self->write_func(self, + PyBytes_AsString(args), + size) < 0) + return -1; + } + } + + if (doput) + if (put(self, args) < 0) + return -1; + + return 0; + + err: + Py_XDECREF(repr); + return -1; +} + + /* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates backslash and newline characters to \uXXXX escapes. */ static PyObject * @@ -2086,11 +2172,11 @@ save(Picklerobject *self, PyObject *args, int pers_save) type = args->ob_type; switch (type->tp_name[0]) { - case 'b': + case 'b': /* XXX may want to save short byte strings here. */ if (args == Py_False || args == Py_True) { res = save_bool(self, args); goto finally; - } + } break; case 'i': if (type == &PyLong_Type) { @@ -2197,6 +2283,11 @@ save(Picklerobject *self, PyObject *args, int pers_save) res = save_global(self, args, NULL); goto finally; } + else if (type == &PyBytes_Type) { + res = save_bytes(self, args, 1); + goto finally; + } + break; } if (!pers_save && self->inst_pers_func) { @@ -3131,11 +3222,17 @@ load_string(Unpicklerobject *self) goto insecure; /********************************************/ + /* XXX avoid going through str8 here. */ str = PyString_DecodeEscape(p, len, NULL, 0, NULL); free(s); if (str) { - PDATA_PUSH(self->stack, str, -1); - res = 0; + PyObject *str2 = PyBytes_FromStringAndSize( + PyString_AsString(str), PyString_Size(str)); + Py_DECREF(str); + if (str2) { + PDATA_PUSH(self->stack, str2, -1); + res = 0; + } } return res; @@ -3160,7 +3257,7 @@ load_binstring(Unpicklerobject *self) if (self->read_func(self, &s, l) < 0) return -1; - if (!( py_string = PyString_FromStringAndSize(s, l))) + if (!( py_string = PyBytes_FromStringAndSize(s, l))) return -1; PDATA_PUSH(self->stack, py_string, -1); @@ -3182,7 +3279,7 @@ load_short_binstring(Unpicklerobject *self) if (self->read_func(self, &s, l) < 0) return -1; - if (!( py_string = PyString_FromStringAndSize(s, l))) return -1; + if (!( py_string = PyBytes_FromStringAndSize(s, l))) return -1; PDATA_PUSH(self->stack, py_string, -1); return 0; diff --git a/Modules/datetimemodule.c b/Modules/datetimemodule.c index 61f9ab7..aa037c2 100644 --- a/Modules/datetimemodule.c +++ b/Modules/datetimemodule.c @@ -2183,15 +2183,15 @@ date_new(PyTypeObject *type, PyObject *args, PyObject *kw) /* Check for invocation from pickle with __getstate__ state */ if (PyTuple_GET_SIZE(args) == 1 && - PyString_Check(state = PyTuple_GET_ITEM(args, 0)) && - PyString_GET_SIZE(state) == _PyDateTime_DATE_DATASIZE && - MONTH_IS_SANE(PyString_AS_STRING(state)[2])) + PyBytes_Check(state = PyTuple_GET_ITEM(args, 0)) && + PyBytes_GET_SIZE(state) == _PyDateTime_DATE_DATASIZE && + MONTH_IS_SANE(PyBytes_AS_STRING(state)[2])) { PyDateTime_Date *me; me = (PyDateTime_Date *) (type->tp_alloc(type, 0)); if (me != NULL) { - char *pdata = PyString_AS_STRING(state); + char *pdata = PyBytes_AS_STRING(state); memcpy(me->data, pdata, _PyDateTime_DATE_DATASIZE); me->hashcode = -1; } @@ -2509,13 +2509,13 @@ date_replace(PyDateTime_Date *self, PyObject *args, PyObject *kw) return clone; } -static PyObject *date_getstate(PyDateTime_Date *self); +static PyObject *date_getstate(PyDateTime_Date *self, int hashable); static long date_hash(PyDateTime_Date *self) { if (self->hashcode == -1) { - PyObject *temp = date_getstate(self); + PyObject *temp = date_getstate(self, 1); if (temp != NULL) { self->hashcode = PyObject_Hash(temp); Py_DECREF(temp); @@ -2543,18 +2543,22 @@ date_weekday(PyDateTime_Date *self) /* __getstate__ isn't exposed */ static PyObject * -date_getstate(PyDateTime_Date *self) +date_getstate(PyDateTime_Date *self, int hashable) { - return Py_BuildValue( - "(N)", - PyString_FromStringAndSize((char *)self->data, - _PyDateTime_DATE_DATASIZE)); + PyObject* field; + if (hashable) + field = PyString_FromStringAndSize( + (char*)self->data, _PyDateTime_DATE_DATASIZE); + else + field = PyBytes_FromStringAndSize( + (char*)self->data, _PyDateTime_DATE_DATASIZE); + return Py_BuildValue("(N)", field); } static PyObject * date_reduce(PyDateTime_Date *self, PyObject *arg) { - return Py_BuildValue("(ON)", self->ob_type, date_getstate(self)); + return Py_BuildValue("(ON)", self->ob_type, date_getstate(self, 0)); } static PyMethodDef date_methods[] = { @@ -2998,9 +3002,9 @@ time_new(PyTypeObject *type, PyObject *args, PyObject *kw) /* Check for invocation from pickle with __getstate__ state */ if (PyTuple_GET_SIZE(args) >= 1 && PyTuple_GET_SIZE(args) <= 2 && - PyString_Check(state = PyTuple_GET_ITEM(args, 0)) && - PyString_GET_SIZE(state) == _PyDateTime_TIME_DATASIZE && - ((unsigned char) (PyString_AS_STRING(state)[0])) < 24) + PyBytes_Check(state = PyTuple_GET_ITEM(args, 0)) && + PyBytes_GET_SIZE(state) == _PyDateTime_TIME_DATASIZE && + ((unsigned char) (PyBytes_AS_STRING(state)[0])) < 24) { PyDateTime_Time *me; char aware; @@ -3016,7 +3020,7 @@ time_new(PyTypeObject *type, PyObject *args, PyObject *kw) aware = (char)(tzinfo != Py_None); me = (PyDateTime_Time *) (type->tp_alloc(type, aware)); if (me != NULL) { - char *pdata = PyString_AS_STRING(state); + char *pdata = PyBytes_AS_STRING(state); memcpy(me->data, pdata, _PyDateTime_TIME_DATASIZE); me->hashcode = -1; @@ -3331,7 +3335,7 @@ time_getstate(PyDateTime_Time *self) PyObject *basestate; PyObject *result = NULL; - basestate = PyString_FromStringAndSize((char *)self->data, + basestate = PyBytes_FromStringAndSize((char *)self->data, _PyDateTime_TIME_DATASIZE); if (basestate != NULL) { if (! HASTZINFO(self) || self->tzinfo == Py_None) @@ -3513,9 +3517,9 @@ datetime_new(PyTypeObject *type, PyObject *args, PyObject *kw) /* Check for invocation from pickle with __getstate__ state */ if (PyTuple_GET_SIZE(args) >= 1 && PyTuple_GET_SIZE(args) <= 2 && - PyString_Check(state = PyTuple_GET_ITEM(args, 0)) && - PyString_GET_SIZE(state) == _PyDateTime_DATETIME_DATASIZE && - MONTH_IS_SANE(PyString_AS_STRING(state)[2])) + PyBytes_Check(state = PyTuple_GET_ITEM(args, 0)) && + PyBytes_GET_SIZE(state) == _PyDateTime_DATETIME_DATASIZE && + MONTH_IS_SANE(PyBytes_AS_STRING(state)[2])) { PyDateTime_DateTime *me; char aware; @@ -3531,7 +3535,7 @@ datetime_new(PyTypeObject *type, PyObject *args, PyObject *kw) aware = (char)(tzinfo != Py_None); me = (PyDateTime_DateTime *) (type->tp_alloc(type , aware)); if (me != NULL) { - char *pdata = PyString_AS_STRING(state); + char *pdata = PyBytes_AS_STRING(state); memcpy(me->data, pdata, _PyDateTime_DATETIME_DATASIZE); me->hashcode = -1; @@ -4375,8 +4379,8 @@ datetime_getstate(PyDateTime_DateTime *self) PyObject *basestate; PyObject *result = NULL; - basestate = PyString_FromStringAndSize((char *)self->data, - _PyDateTime_DATETIME_DATASIZE); + basestate = PyBytes_FromStringAndSize((char *)self->data, + _PyDateTime_DATETIME_DATASIZE); if (basestate != NULL) { if (! HASTZINFO(self) || self->tzinfo == Py_None) result = PyTuple_Pack(1, basestate); diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 532e637..6340b46 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2724,6 +2724,9 @@ PyDoc_STRVAR(reduce_doc, "Return state information for pickling."); static PyObject * bytes_reduce(PyBytesObject *self) { + /* XXX: This currently returns a Py_UNICODE-widened string + in the tuple which is completely useless. Pickle stopped + using it for that reason. */ return Py_BuildValue("(O(s#))", self->ob_type, self->ob_bytes == NULL ? "" : self->ob_bytes, diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 1df0ea0..a401806 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -831,28 +831,32 @@ my_basename(char *name) static PyObject * SyntaxError_str(PySyntaxErrorObject *self) { - int have_filename = 0; int have_lineno = 0; + char *filename = 0; /* XXX -- do all the additional formatting with filename and lineno here */ - have_filename = (self->filename != NULL) && - PyString_Check(self->filename); + if (self->filename) { + if (PyString_Check(self->filename)) + filename = PyString_AsString(self->filename); + else if (PyUnicode_Check(self->filename)) + filename = PyUnicode_AsString(self->filename); + } have_lineno = (self->lineno != NULL) && PyInt_CheckExact(self->lineno); - if (!have_filename && !have_lineno) + if (!filename && !have_lineno) return PyObject_Unicode(self->msg ? self->msg : Py_None); - if (have_filename && have_lineno) + if (filename && have_lineno) return PyUnicode_FromFormat("%S (%s, line %ld)", self->msg ? self->msg : Py_None, - my_basename(PyString_AS_STRING(self->filename)), + my_basename(filename), PyInt_AsLong(self->lineno)); - else if (have_filename) + else if (filename) return PyUnicode_FromFormat("%S (%s)", self->msg ? self->msg : Py_None, - my_basename(PyString_AS_STRING(self->filename))); + my_basename(filename)); else /* only have_lineno */ return PyUnicode_FromFormat("%S (line %ld)", self->msg ? self->msg : Py_None, diff --git a/Python/modsupport.c b/Python/modsupport.c index 330da5f..d29fe9b 100644 --- a/Python/modsupport.c +++ b/Python/modsupport.c @@ -387,7 +387,7 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags) { char p[1]; p[0] = (char)va_arg(*p_va, int); - return PyString_FromStringAndSize(p, 1); + return PyUnicode_FromStringAndSize(p, 1); } case 'C': { @@ -438,7 +438,7 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags) } n = (Py_ssize_t)m; } - v = PyString_FromStringAndSize(str, n); + v = PyUnicode_FromStringAndSize(str, n); } return v; } -- cgit v0.12