diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2012-08-05 22:46:05 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@gmail.com> | 2012-08-05 22:46:05 (GMT) |
commit | 62bb394729a167a46d950954c4aed5f3ba7b8a69 (patch) | |
tree | 734100e548a373ceaed15000bc94cd2fed6c82c6 /Modules/arraymodule.c | |
parent | 3af26174016fc5547a36fba37cacfaeaac9a9b6e (diff) | |
download | cpython-62bb394729a167a46d950954c4aed5f3ba7b8a69.zip cpython-62bb394729a167a46d950954c4aed5f3ba7b8a69.tar.gz cpython-62bb394729a167a46d950954c4aed5f3ba7b8a69.tar.bz2 |
Close #13072: Restore code before the PEP 393 for the array module
'u' format of the array module uses again Py_UNICODE type for backward
compatibility with Python 3.2.
The only change from Python 3.2 is that PyUnicode_AsUnicode() result is now
checked for NULL value.
Diffstat (limited to 'Modules/arraymodule.c')
-rw-r--r-- | Modules/arraymodule.c | 86 |
1 files changed, 48 insertions, 38 deletions
diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index b0921c8..f0615c9 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -174,25 +174,24 @@ BB_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) static PyObject * u_getitem(arrayobject *ap, Py_ssize_t i) { - return PyUnicode_FromOrdinal(((Py_UCS4 *) ap->ob_item)[i]); + return PyUnicode_FromUnicode(&((Py_UNICODE *) ap->ob_item)[i], 1); } static int u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) { - PyObject *p; + Py_UNICODE *p; + Py_ssize_t len; - if (!PyArg_Parse(v, "U;array item must be unicode character", &p)) - return -1; - if (PyUnicode_READY(p)) + if (!PyArg_Parse(v, "u#;array item must be unicode character", &p, &len)) return -1; - if (PyUnicode_GET_LENGTH(p) != 1) { + if (len != 1) { PyErr_SetString(PyExc_TypeError, "array item must be unicode character"); return -1; } if (i >= 0) - ((Py_UCS4 *)ap->ob_item)[i] = PyUnicode_READ_CHAR(p, 0); + ((Py_UNICODE *)ap->ob_item)[i] = p[0]; return 0; } @@ -444,13 +443,6 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) return 0; } -#if SIZEOF_INT == 4 -# define STRUCT_LONG_FORMAT "I" -#elif SIZEOF_LONG == 4 -# define STRUCT_LONG_FORMAT "L" -#else -# error "Unable to get struct format for Py_UCS4" -#endif /* Description of types. * @@ -460,7 +452,7 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) static struct arraydescr descriptors[] = { {'b', 1, b_getitem, b_setitem, "b", 1, 1}, {'B', 1, BB_getitem, BB_setitem, "B", 1, 0}, - {'u', sizeof(Py_UCS4), u_getitem, u_setitem, STRUCT_LONG_FORMAT, 0, 0}, + {'u', sizeof(Py_UNICODE), u_getitem, u_setitem, "u", 0, 0}, {'h', sizeof(short), h_getitem, h_setitem, "h", 1, 1}, {'H', sizeof(short), HH_getitem, HH_setitem, "H", 1, 0}, {'i', sizeof(int), i_getitem, i_setitem, "i", 1, 1}, @@ -1519,26 +1511,25 @@ This method is deprecated. Use tobytes instead."); static PyObject * array_fromunicode(arrayobject *self, PyObject *args) { - PyObject *ustr; + Py_UNICODE *ustr; Py_ssize_t n; + char typecode; - if (!PyArg_ParseTuple(args, "U:fromunicode", &ustr)) + if (!PyArg_ParseTuple(args, "u#:fromunicode", &ustr, &n)) return NULL; - if (self->ob_descr->typecode != 'u') { + typecode = self->ob_descr->typecode; + if ((typecode != 'u')) { PyErr_SetString(PyExc_ValueError, "fromunicode() may only be called on " "unicode type arrays"); return NULL; } - if (PyUnicode_READY(ustr)) - return NULL; - n = PyUnicode_GET_LENGTH(ustr); if (n > 0) { Py_ssize_t old_size = Py_SIZE(self); if (array_resize(self, old_size + n) == -1) return NULL; - if (!PyUnicode_AsUCS4(ustr, (Py_UCS4 *)self->ob_item + old_size, n, 0)) - return NULL; + memcpy(self->ob_item + old_size * sizeof(Py_UNICODE), + ustr, n * sizeof(Py_UNICODE)); } Py_INCREF(Py_None); @@ -1557,14 +1548,14 @@ append Unicode data to an array of some other type."); static PyObject * array_tounicode(arrayobject *self, PyObject *unused) { - if (self->ob_descr->typecode != 'u') { + char typecode; + typecode = self->ob_descr->typecode; + if ((typecode != 'u')) { PyErr_SetString(PyExc_ValueError, "tounicode() may only be called on unicode type arrays"); return NULL; } - return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, - (Py_UCS4 *) self->ob_item, - Py_SIZE(self)); + return PyUnicode_FromUnicode((Py_UNICODE *) self->ob_item, Py_SIZE(self)); } PyDoc_STRVAR(tounicode_doc, @@ -1671,7 +1662,13 @@ typecode_to_mformat_code(char typecode) return UNSIGNED_INT8; case 'u': - return UTF32_LE + is_big_endian; + if (sizeof(Py_UNICODE) == 2) { + return UTF16_LE + is_big_endian; + } + if (sizeof(Py_UNICODE) == 4) { + return UTF32_LE + is_big_endian; + } + return UNKNOWN_FORMAT; case 'f': if (sizeof(float) == 4) { @@ -2419,8 +2416,14 @@ array_buffer_getbuf(arrayobject *self, Py_buffer *view, int flags) view->strides = &(view->itemsize); view->format = NULL; view->internal = NULL; - if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT) + if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT) { view->format = self->ob_descr->formats; +#ifdef Py_UNICODE_WIDE + if (self->ob_descr->typecode == 'u') { + view->format = "w"; + } +#endif + } finish: self->ob_exports++; @@ -2534,25 +2537,29 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) Py_DECREF(v); } else if (initial != NULL && PyUnicode_Check(initial)) { + Py_UNICODE *ustr; Py_ssize_t n; - if (PyUnicode_READY(initial)) { + + ustr = PyUnicode_AsUnicode(initial); + if (ustr == NULL) { + PyErr_NoMemory(); Py_DECREF(a); return NULL; } - n = PyUnicode_GET_LENGTH(initial); + + n = PyUnicode_GET_DATA_SIZE(initial); if (n > 0) { arrayobject *self = (arrayobject *)a; - Py_UCS4 *item = (Py_UCS4 *)self->ob_item; - item = (Py_UCS4 *)PyMem_Realloc(item, n * sizeof(Py_UCS4)); + char *item = self->ob_item; + item = (char *)PyMem_Realloc(item, n); if (item == NULL) { PyErr_NoMemory(); Py_DECREF(a); return NULL; } - self->ob_item = (char*)item; - Py_SIZE(self) = n; - if (!PyUnicode_AsUCS4(initial, item, n, 0)) - return NULL; + self->ob_item = item; + Py_SIZE(self) = n / sizeof(Py_UNICODE); + memcpy(item, ustr, n); self->allocated = Py_SIZE(self); } } @@ -2593,7 +2600,7 @@ is a single character. The following type codes are defined:\n\ Type code C Type Minimum size in bytes \n\ 'b' signed integer 1 \n\ 'B' unsigned integer 1 \n\ - 'u' Unicode character 4 \n\ + 'u' Unicode character 2 (see note) \n\ 'h' signed integer 2 \n\ 'H' unsigned integer 2 \n\ 'i' signed integer 2 \n\ @@ -2605,6 +2612,9 @@ is a single character. The following type codes are defined:\n\ 'f' floating point 4 \n\ 'd' floating point 8 \n\ \n\ +NOTE: The 'u' typecode corresponds to Python's unicode character. On \n\ +narrow builds this is 2-bytes on wide builds this is 4-bytes.\n\ +\n\ NOTE: The 'q' and 'Q' type codes are only available if the platform \n\ C compiler used to build Python supports 'long long', or, on Windows, \n\ '__int64'.\n\ |