diff options
author | Alexander Belopolsky <alexander.belopolsky@gmail.com> | 2010-12-04 03:38:46 (GMT) |
---|---|---|
committer | Alexander Belopolsky <alexander.belopolsky@gmail.com> | 2010-12-04 03:38:46 (GMT) |
commit | 942af5a9a45b7b4976bea2e794eccaaf2b3b5c09 (patch) | |
tree | f621bdffa16dd0b04d7bf60d6a32f198fc7b3ec8 /Objects | |
parent | 36526bf3d95763afa6d4efe402b8840b1532d637 (diff) | |
download | cpython-942af5a9a45b7b4976bea2e794eccaaf2b3b5c09.zip cpython-942af5a9a45b7b4976bea2e794eccaaf2b3b5c09.tar.gz cpython-942af5a9a45b7b4976bea2e794eccaaf2b3b5c09.tar.bz2 |
Issue #10557: Fixed error messages from float() and other numeric
types. Added a new API function, PyUnicode_TransformDecimalToASCII(),
which transforms non-ASCII decimal digits in a Unicode string to their
ASCII equivalents.
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/complexobject.c | 30 | ||||
-rw-r--r-- | Objects/floatobject.c | 58 | ||||
-rw-r--r-- | Objects/longobject.c | 31 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 35 |
4 files changed, 106 insertions, 48 deletions
diff --git a/Objects/complexobject.c b/Objects/complexobject.c index 5999796..ec529d5 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -766,20 +766,26 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v) char *end; double x=0.0, y=0.0, z; int got_bracket=0; - char *s_buffer = NULL; + PyObject *s_buffer = NULL; Py_ssize_t len; if (PyUnicode_Check(v)) { - s_buffer = (char *)PyMem_MALLOC(PyUnicode_GET_SIZE(v) + 1); + Py_ssize_t i, buflen = PyUnicode_GET_SIZE(v); + Py_UNICODE *bufptr; + s_buffer = PyUnicode_TransformDecimalToASCII( + PyUnicode_AS_UNICODE(v), buflen); if (s_buffer == NULL) - return PyErr_NoMemory(); - if (PyUnicode_EncodeDecimal(PyUnicode_AS_UNICODE(v), - PyUnicode_GET_SIZE(v), - s_buffer, - NULL)) + return NULL; + /* Replace non-ASCII whitespace with ' ' */ + bufptr = PyUnicode_AS_UNICODE(s_buffer); + for (i = 0; i < buflen; i++) { + Py_UNICODE ch = bufptr[i]; + if (ch > 127 && Py_UNICODE_ISSPACE(ch)) + bufptr[i] = ' '; + } + s = _PyUnicode_AsStringAndSize(s_buffer, &len); + if (s == NULL) goto error; - s = s_buffer; - len = strlen(s); } else if (PyObject_AsCharBuffer(v, &s, &len)) { PyErr_SetString(PyExc_TypeError, @@ -894,16 +900,14 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v) if (s-start != len) goto parse_error; - if (s_buffer) - PyMem_FREE(s_buffer); + Py_XDECREF(s_buffer); return complex_subtype_from_doubles(type, x, y); parse_error: PyErr_SetString(PyExc_ValueError, "complex() arg is a malformed string"); error: - if (s_buffer) - PyMem_FREE(s_buffer); + Py_XDECREF(s_buffer); return NULL; } diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 4decb0b..8409f0a 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -174,22 +174,30 @@ PyFloat_FromString(PyObject *v) { const char *s, *last, *end; double x; - char buffer[256]; /* for errors */ - char *s_buffer = NULL; + PyObject *s_buffer = NULL; Py_ssize_t len; PyObject *result = NULL; if (PyUnicode_Check(v)) { - s_buffer = (char *)PyMem_MALLOC(PyUnicode_GET_SIZE(v)+1); + Py_ssize_t i, buflen = PyUnicode_GET_SIZE(v); + Py_UNICODE *bufptr; + s_buffer = PyUnicode_TransformDecimalToASCII( + PyUnicode_AS_UNICODE(v), buflen); if (s_buffer == NULL) - return PyErr_NoMemory(); - if (PyUnicode_EncodeDecimal(PyUnicode_AS_UNICODE(v), - PyUnicode_GET_SIZE(v), - s_buffer, - NULL)) - goto error; - s = s_buffer; - len = strlen(s); + return NULL; + /* Replace non-ASCII whitespace with ' ' */ + bufptr = PyUnicode_AS_UNICODE(s_buffer); + for (i = 0; i < buflen; i++) { + Py_UNICODE ch = bufptr[i]; + if (ch > 127 && Py_UNICODE_ISSPACE(ch)) + bufptr[i] = ' '; + } + s = _PyUnicode_AsStringAndSize(s_buffer, &len); + if (s == NULL) { + Py_DECREF(s_buffer); + return NULL; + } + last = s + len; } else if (PyObject_AsCharBuffer(v, &s, &len)) { PyErr_SetString(PyExc_TypeError, @@ -197,29 +205,27 @@ PyFloat_FromString(PyObject *v) return NULL; } last = s + len; - - while (Py_ISSPACE(*s)) + /* strip space */ + while (s < last && Py_ISSPACE(*s)) s++; + while (s < last - 1 && Py_ISSPACE(last[-1])) + last--; /* We don't care about overflow or underflow. If the platform * supports them, infinities and signed zeroes (on underflow) are * fine. */ x = PyOS_string_to_double(s, (char **)&end, NULL); - if (x == -1.0 && PyErr_Occurred()) - goto error; - while (Py_ISSPACE(*end)) - end++; - if (end == last) - result = PyFloat_FromDouble(x); - else { - PyOS_snprintf(buffer, sizeof(buffer), - "invalid literal for float(): %.200s", s); - PyErr_SetString(PyExc_ValueError, buffer); + if (end != last) { + PyErr_Format(PyExc_ValueError, + "could not convert string to float: " + "%R", v); result = NULL; } + else if (x == -1.0 && PyErr_Occurred()) + result = NULL; + else + result = PyFloat_FromDouble(x); - error: - if (s_buffer) - PyMem_FREE(s_buffer); + Py_XDECREF(s_buffer); return result; } diff --git a/Objects/longobject.c b/Objects/longobject.c index e8a7284..534e52d 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -2133,17 +2133,34 @@ PyObject * PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base) { PyObject *result; - char *buffer = (char *)PyMem_MALLOC(length+1); + PyObject *asciidig; + char *buffer, *end; + Py_ssize_t i, buflen; + Py_UNICODE *ptr; - if (buffer == NULL) + asciidig = PyUnicode_TransformDecimalToASCII(u, length); + if (asciidig == NULL) return NULL; - - if (PyUnicode_EncodeDecimal(u, length, buffer, NULL)) { - PyMem_FREE(buffer); + /* Replace non-ASCII whitespace with ' ' */ + ptr = PyUnicode_AS_UNICODE(asciidig); + for (i = 0; i < length; i++) { + Py_UNICODE ch = ptr[i]; + if (ch > 127 && Py_UNICODE_ISSPACE(ch)) + ptr[i] = ' '; + } + buffer = _PyUnicode_AsStringAndSize(asciidig, &buflen); + if (buffer == NULL) { + Py_DECREF(asciidig); return NULL; } - result = PyLong_FromString(buffer, NULL, base); - PyMem_FREE(buffer); + result = PyLong_FromString(buffer, &end, base); + if (result != NULL && end != buffer + buflen) { + PyErr_SetString(PyExc_ValueError, + "null byte in argument for int()"); + Py_DECREF(result); + result = NULL; + } + Py_DECREF(asciidig); return result; } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index d3a2d1b..751da30 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6206,6 +6206,30 @@ PyObject *PyUnicode_Translate(PyObject *str, return NULL; } +PyObject * +PyUnicode_TransformDecimalToASCII(Py_UNICODE *s, + Py_ssize_t length) +{ + PyObject *result; + Py_UNICODE *p; /* write pointer into result */ + Py_ssize_t i; + /* Copy to a new string */ + result = (PyObject *)_PyUnicode_New(length); + Py_UNICODE_COPY(PyUnicode_AS_UNICODE(result), s, length); + if (result == NULL) + return result; + p = PyUnicode_AS_UNICODE(result); + /* Iterate over code points */ + for (i = 0; i < length; i++) { + Py_UNICODE ch =s[i]; + if (ch > 127) { + int decimal = Py_UNICODE_TODECIMAL(ch); + if (decimal >= 0) + p[i] = '0' + decimal; + } + } + return result; +} /* --- Decimal Encoder ---------------------------------------------------- */ int PyUnicode_EncodeDecimal(Py_UNICODE *s, @@ -8967,6 +8991,13 @@ unicode_freelistsize(PyUnicodeObject *self) { return PyLong_FromLong(numfree); } + +static PyObject * +unicode__decimal2ascii(PyObject *self) +{ + return PyUnicode_TransformDecimalToASCII(PyUnicode_AS_UNICODE(self), + PyUnicode_GET_SIZE(self)); +} #endif PyDoc_STRVAR(startswith__doc__, @@ -9108,7 +9139,6 @@ unicode_getnewargs(PyUnicodeObject *v) return Py_BuildValue("(u#)", v->str, v->length); } - static PyMethodDef unicode_methods[] = { /* Order is according to common usage: often used methods should @@ -9170,8 +9200,9 @@ static PyMethodDef unicode_methods[] = { #endif #if 0 - /* This one is just used for debugging the implementation. */ + /* These methods are just used for debugging the implementation. */ {"freelistsize", (PyCFunction) unicode_freelistsize, METH_NOARGS}, + {"_decimal2ascii", (PyCFunction) unicode__decimal2ascii, METH_NOARGS}, #endif {"__getnewargs__", (PyCFunction)unicode_getnewargs, METH_NOARGS}, |