summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorAlexander Belopolsky <alexander.belopolsky@gmail.com>2010-12-04 03:38:46 (GMT)
committerAlexander Belopolsky <alexander.belopolsky@gmail.com>2010-12-04 03:38:46 (GMT)
commit942af5a9a45b7b4976bea2e794eccaaf2b3b5c09 (patch)
treef621bdffa16dd0b04d7bf60d6a32f198fc7b3ec8 /Objects
parent36526bf3d95763afa6d4efe402b8840b1532d637 (diff)
downloadcpython-942af5a9a45b7b4976bea2e794eccaaf2b3b5c09.zip
cpython-942af5a9a45b7b4976bea2e794eccaaf2b3b5c09.tar.gz
cpython-942af5a9a45b7b4976bea2e794eccaaf2b3b5c09.tar.bz2
Issue #10557: Fixed error messages from float() and other numeric
types. Added a new API function, PyUnicode_TransformDecimalToASCII(), which transforms non-ASCII decimal digits in a Unicode string to their ASCII equivalents.
Diffstat (limited to 'Objects')
-rw-r--r--Objects/complexobject.c30
-rw-r--r--Objects/floatobject.c58
-rw-r--r--Objects/longobject.c31
-rw-r--r--Objects/unicodeobject.c35
4 files changed, 106 insertions, 48 deletions
diff --git a/Objects/complexobject.c b/Objects/complexobject.c
index 5999796..ec529d5 100644
--- a/Objects/complexobject.c
+++ b/Objects/complexobject.c
@@ -766,20 +766,26 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
char *end;
double x=0.0, y=0.0, z;
int got_bracket=0;
- char *s_buffer = NULL;
+ PyObject *s_buffer = NULL;
Py_ssize_t len;
if (PyUnicode_Check(v)) {
- s_buffer = (char *)PyMem_MALLOC(PyUnicode_GET_SIZE(v) + 1);
+ Py_ssize_t i, buflen = PyUnicode_GET_SIZE(v);
+ Py_UNICODE *bufptr;
+ s_buffer = PyUnicode_TransformDecimalToASCII(
+ PyUnicode_AS_UNICODE(v), buflen);
if (s_buffer == NULL)
- return PyErr_NoMemory();
- if (PyUnicode_EncodeDecimal(PyUnicode_AS_UNICODE(v),
- PyUnicode_GET_SIZE(v),
- s_buffer,
- NULL))
+ return NULL;
+ /* Replace non-ASCII whitespace with ' ' */
+ bufptr = PyUnicode_AS_UNICODE(s_buffer);
+ for (i = 0; i < buflen; i++) {
+ Py_UNICODE ch = bufptr[i];
+ if (ch > 127 && Py_UNICODE_ISSPACE(ch))
+ bufptr[i] = ' ';
+ }
+ s = _PyUnicode_AsStringAndSize(s_buffer, &len);
+ if (s == NULL)
goto error;
- s = s_buffer;
- len = strlen(s);
}
else if (PyObject_AsCharBuffer(v, &s, &len)) {
PyErr_SetString(PyExc_TypeError,
@@ -894,16 +900,14 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
if (s-start != len)
goto parse_error;
- if (s_buffer)
- PyMem_FREE(s_buffer);
+ Py_XDECREF(s_buffer);
return complex_subtype_from_doubles(type, x, y);
parse_error:
PyErr_SetString(PyExc_ValueError,
"complex() arg is a malformed string");
error:
- if (s_buffer)
- PyMem_FREE(s_buffer);
+ Py_XDECREF(s_buffer);
return NULL;
}
diff --git a/Objects/floatobject.c b/Objects/floatobject.c
index 4decb0b..8409f0a 100644
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@@ -174,22 +174,30 @@ PyFloat_FromString(PyObject *v)
{
const char *s, *last, *end;
double x;
- char buffer[256]; /* for errors */
- char *s_buffer = NULL;
+ PyObject *s_buffer = NULL;
Py_ssize_t len;
PyObject *result = NULL;
if (PyUnicode_Check(v)) {
- s_buffer = (char *)PyMem_MALLOC(PyUnicode_GET_SIZE(v)+1);
+ Py_ssize_t i, buflen = PyUnicode_GET_SIZE(v);
+ Py_UNICODE *bufptr;
+ s_buffer = PyUnicode_TransformDecimalToASCII(
+ PyUnicode_AS_UNICODE(v), buflen);
if (s_buffer == NULL)
- return PyErr_NoMemory();
- if (PyUnicode_EncodeDecimal(PyUnicode_AS_UNICODE(v),
- PyUnicode_GET_SIZE(v),
- s_buffer,
- NULL))
- goto error;
- s = s_buffer;
- len = strlen(s);
+ return NULL;
+ /* Replace non-ASCII whitespace with ' ' */
+ bufptr = PyUnicode_AS_UNICODE(s_buffer);
+ for (i = 0; i < buflen; i++) {
+ Py_UNICODE ch = bufptr[i];
+ if (ch > 127 && Py_UNICODE_ISSPACE(ch))
+ bufptr[i] = ' ';
+ }
+ s = _PyUnicode_AsStringAndSize(s_buffer, &len);
+ if (s == NULL) {
+ Py_DECREF(s_buffer);
+ return NULL;
+ }
+ last = s + len;
}
else if (PyObject_AsCharBuffer(v, &s, &len)) {
PyErr_SetString(PyExc_TypeError,
@@ -197,29 +205,27 @@ PyFloat_FromString(PyObject *v)
return NULL;
}
last = s + len;
-
- while (Py_ISSPACE(*s))
+ /* strip space */
+ while (s < last && Py_ISSPACE(*s))
s++;
+ while (s < last - 1 && Py_ISSPACE(last[-1]))
+ last--;
/* We don't care about overflow or underflow. If the platform
* supports them, infinities and signed zeroes (on underflow) are
* fine. */
x = PyOS_string_to_double(s, (char **)&end, NULL);
- if (x == -1.0 && PyErr_Occurred())
- goto error;
- while (Py_ISSPACE(*end))
- end++;
- if (end == last)
- result = PyFloat_FromDouble(x);
- else {
- PyOS_snprintf(buffer, sizeof(buffer),
- "invalid literal for float(): %.200s", s);
- PyErr_SetString(PyExc_ValueError, buffer);
+ if (end != last) {
+ PyErr_Format(PyExc_ValueError,
+ "could not convert string to float: "
+ "%R", v);
result = NULL;
}
+ else if (x == -1.0 && PyErr_Occurred())
+ result = NULL;
+ else
+ result = PyFloat_FromDouble(x);
- error:
- if (s_buffer)
- PyMem_FREE(s_buffer);
+ Py_XDECREF(s_buffer);
return result;
}
diff --git a/Objects/longobject.c b/Objects/longobject.c
index e8a7284..534e52d 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -2133,17 +2133,34 @@ PyObject *
PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base)
{
PyObject *result;
- char *buffer = (char *)PyMem_MALLOC(length+1);
+ PyObject *asciidig;
+ char *buffer, *end;
+ Py_ssize_t i, buflen;
+ Py_UNICODE *ptr;
- if (buffer == NULL)
+ asciidig = PyUnicode_TransformDecimalToASCII(u, length);
+ if (asciidig == NULL)
return NULL;
-
- if (PyUnicode_EncodeDecimal(u, length, buffer, NULL)) {
- PyMem_FREE(buffer);
+ /* Replace non-ASCII whitespace with ' ' */
+ ptr = PyUnicode_AS_UNICODE(asciidig);
+ for (i = 0; i < length; i++) {
+ Py_UNICODE ch = ptr[i];
+ if (ch > 127 && Py_UNICODE_ISSPACE(ch))
+ ptr[i] = ' ';
+ }
+ buffer = _PyUnicode_AsStringAndSize(asciidig, &buflen);
+ if (buffer == NULL) {
+ Py_DECREF(asciidig);
return NULL;
}
- result = PyLong_FromString(buffer, NULL, base);
- PyMem_FREE(buffer);
+ result = PyLong_FromString(buffer, &end, base);
+ if (result != NULL && end != buffer + buflen) {
+ PyErr_SetString(PyExc_ValueError,
+ "null byte in argument for int()");
+ Py_DECREF(result);
+ result = NULL;
+ }
+ Py_DECREF(asciidig);
return result;
}
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index d3a2d1b..751da30 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -6206,6 +6206,30 @@ PyObject *PyUnicode_Translate(PyObject *str,
return NULL;
}
+PyObject *
+PyUnicode_TransformDecimalToASCII(Py_UNICODE *s,
+ Py_ssize_t length)
+{
+ PyObject *result;
+ Py_UNICODE *p; /* write pointer into result */
+ Py_ssize_t i;
+ /* Copy to a new string */
+ result = (PyObject *)_PyUnicode_New(length);
+ Py_UNICODE_COPY(PyUnicode_AS_UNICODE(result), s, length);
+ if (result == NULL)
+ return result;
+ p = PyUnicode_AS_UNICODE(result);
+ /* Iterate over code points */
+ for (i = 0; i < length; i++) {
+ Py_UNICODE ch =s[i];
+ if (ch > 127) {
+ int decimal = Py_UNICODE_TODECIMAL(ch);
+ if (decimal >= 0)
+ p[i] = '0' + decimal;
+ }
+ }
+ return result;
+}
/* --- Decimal Encoder ---------------------------------------------------- */
int PyUnicode_EncodeDecimal(Py_UNICODE *s,
@@ -8967,6 +8991,13 @@ unicode_freelistsize(PyUnicodeObject *self)
{
return PyLong_FromLong(numfree);
}
+
+static PyObject *
+unicode__decimal2ascii(PyObject *self)
+{
+ return PyUnicode_TransformDecimalToASCII(PyUnicode_AS_UNICODE(self),
+ PyUnicode_GET_SIZE(self));
+}
#endif
PyDoc_STRVAR(startswith__doc__,
@@ -9108,7 +9139,6 @@ unicode_getnewargs(PyUnicodeObject *v)
return Py_BuildValue("(u#)", v->str, v->length);
}
-
static PyMethodDef unicode_methods[] = {
/* Order is according to common usage: often used methods should
@@ -9170,8 +9200,9 @@ static PyMethodDef unicode_methods[] = {
#endif
#if 0
- /* This one is just used for debugging the implementation. */
+ /* These methods are just used for debugging the implementation. */
{"freelistsize", (PyCFunction) unicode_freelistsize, METH_NOARGS},
+ {"_decimal2ascii", (PyCFunction) unicode__decimal2ascii, METH_NOARGS},
#endif
{"__getnewargs__", (PyCFunction)unicode_getnewargs, METH_NOARGS},