summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorBrett Cannon <brett@python.org>2016-09-09 21:57:09 (GMT)
committerBrett Cannon <brett@python.org>2016-09-09 21:57:09 (GMT)
commita721abac299bb6529021000a71847486d531b41a (patch)
tree8355a69b891cfcdaad8a5fd62870231b7f940696 /Objects
parentee73a657455a908102379d3c9bc254676418e10c (diff)
downloadcpython-a721abac299bb6529021000a71847486d531b41a.zip
cpython-a721abac299bb6529021000a71847486d531b41a.tar.gz
cpython-a721abac299bb6529021000a71847486d531b41a.tar.bz2
Issue #26331: Implement the parsing part of PEP 515.
Thanks to Georg Brandl for the patch.
Diffstat (limited to 'Objects')
-rw-r--r--Objects/complexobject.c63
-rw-r--r--Objects/floatobject.c59
-rw-r--r--Objects/longobject.c169
3 files changed, 205 insertions, 86 deletions
diff --git a/Objects/complexobject.c b/Objects/complexobject.c
index a5bfb66..a9d5ec3 100644
--- a/Objects/complexobject.c
+++ b/Objects/complexobject.c
@@ -759,29 +759,12 @@ static PyMemberDef complex_members[] = {
};
static PyObject *
-complex_subtype_from_string(PyTypeObject *type, PyObject *v)
+complex_from_string_inner(const char *s, Py_ssize_t len, void *type)
{
- const char *s, *start;
- char *end;
double x=0.0, y=0.0, z;
int got_bracket=0;
- PyObject *s_buffer = NULL;
- Py_ssize_t len;
-
- if (PyUnicode_Check(v)) {
- s_buffer = _PyUnicode_TransformDecimalAndSpaceToASCII(v);
- if (s_buffer == NULL)
- return NULL;
- s = PyUnicode_AsUTF8AndSize(s_buffer, &len);
- if (s == NULL)
- goto error;
- }
- else {
- PyErr_Format(PyExc_TypeError,
- "complex() argument must be a string or a number, not '%.200s'",
- Py_TYPE(v)->tp_name);
- return NULL;
- }
+ const char *start;
+ char *end;
/* position on first nonblank */
start = s;
@@ -822,7 +805,7 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
if (PyErr_ExceptionMatches(PyExc_ValueError))
PyErr_Clear();
else
- goto error;
+ return NULL;
}
if (end != s) {
/* all 4 forms starting with <float> land here */
@@ -835,7 +818,7 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
if (PyErr_ExceptionMatches(PyExc_ValueError))
PyErr_Clear();
else
- goto error;
+ return NULL;
}
if (end != s)
/* <float><signed-float>j */
@@ -890,18 +873,46 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
if (s-start != len)
goto parse_error;
- Py_XDECREF(s_buffer);
- return complex_subtype_from_doubles(type, x, y);
+ return complex_subtype_from_doubles((PyTypeObject *)type, x, y);
parse_error:
PyErr_SetString(PyExc_ValueError,
"complex() arg is a malformed string");
- error:
- Py_XDECREF(s_buffer);
return NULL;
}
static PyObject *
+complex_subtype_from_string(PyTypeObject *type, PyObject *v)
+{
+ const char *s;
+ PyObject *s_buffer = NULL, *result = NULL;
+ Py_ssize_t len;
+
+ if (PyUnicode_Check(v)) {
+ s_buffer = _PyUnicode_TransformDecimalAndSpaceToASCII(v);
+ if (s_buffer == NULL) {
+ return NULL;
+ }
+ s = PyUnicode_AsUTF8AndSize(s_buffer, &len);
+ if (s == NULL) {
+ goto exit;
+ }
+ }
+ else {
+ PyErr_Format(PyExc_TypeError,
+ "complex() argument must be a string or a number, not '%.200s'",
+ Py_TYPE(v)->tp_name);
+ return NULL;
+ }
+
+ result = _Py_string_to_number_with_underscores(s, len, "complex", v, type,
+ complex_from_string_inner);
+ exit:
+ Py_DECREF(s_buffer);
+ return result;
+}
+
+static PyObject *
complex_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
PyObject *r, *i, *tmp;
diff --git a/Objects/floatobject.c b/Objects/floatobject.c
index 0642b16..0f37618 100644
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@@ -124,11 +124,43 @@ PyFloat_FromDouble(double fval)
return (PyObject *) op;
}
+static PyObject *
+float_from_string_inner(const char *s, Py_ssize_t len, void *obj)
+{
+ double x;
+ const char *end;
+ const char *last = s + len;
+ /* strip space */
+ while (s < last && Py_ISSPACE(*s)) {
+ s++;
+ }
+
+ while (s < last - 1 && Py_ISSPACE(last[-1])) {
+ last--;
+ }
+
+ /* We don't care about overflow or underflow. If the platform
+ * supports them, infinities and signed zeroes (on underflow) are
+ * fine. */
+ x = PyOS_string_to_double(s, (char **)&end, NULL);
+ if (end != last) {
+ PyErr_Format(PyExc_ValueError,
+ "could not convert string to float: "
+ "%R", obj);
+ return NULL;
+ }
+ else if (x == -1.0 && PyErr_Occurred()) {
+ return NULL;
+ }
+ else {
+ return PyFloat_FromDouble(x);
+ }
+}
+
PyObject *
PyFloat_FromString(PyObject *v)
{
- const char *s, *last, *end;
- double x;
+ const char *s;
PyObject *s_buffer = NULL;
Py_ssize_t len;
Py_buffer view = {NULL, NULL};
@@ -169,27 +201,8 @@ PyFloat_FromString(PyObject *v)
Py_TYPE(v)->tp_name);
return NULL;
}
- last = s + len;
- /* strip space */
- while (s < last && Py_ISSPACE(*s))
- s++;
- while (s < last - 1 && Py_ISSPACE(last[-1]))
- last--;
- /* We don't care about overflow or underflow. If the platform
- * supports them, infinities and signed zeroes (on underflow) are
- * fine. */
- x = PyOS_string_to_double(s, (char **)&end, NULL);
- if (end != last) {
- PyErr_Format(PyExc_ValueError,
- "could not convert string to float: "
- "%R", v);
- result = NULL;
- }
- else if (x == -1.0 && PyErr_Occurred())
- result = NULL;
- else
- result = PyFloat_FromDouble(x);
-
+ result = _Py_string_to_number_with_underscores(s, len, "float", v, v,
+ float_from_string_inner);
PyBuffer_Release(&view);
Py_XDECREF(s_buffer);
return result;
diff --git a/Objects/longobject.c b/Objects/longobject.c
index 740b7f5..bbf7e71 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -2004,12 +2004,18 @@ unsigned char _PyLong_DigitValue[256] = {
* non-digit (which may be *str!). A normalized int is returned.
* The point to this routine is that it takes time linear in the number of
* string characters.
+ *
+ * Return values:
+ * -1 on syntax error (exception needs to be set, *res is untouched)
+ * 0 else (exception may be set, in that case *res is set to NULL)
*/
-static PyLongObject *
-long_from_binary_base(const char **str, int base)
+static int
+long_from_binary_base(const char **str, int base, PyLongObject **res)
{
const char *p = *str;
const char *start = p;
+ char prev = 0;
+ int digits = 0;
int bits_per_char;
Py_ssize_t n;
PyLongObject *z;
@@ -2019,23 +2025,43 @@ long_from_binary_base(const char **str, int base)
assert(base >= 2 && base <= 32 && (base & (base - 1)) == 0);
n = base;
- for (bits_per_char = -1; n; ++bits_per_char)
+ for (bits_per_char = -1; n; ++bits_per_char) {
n >>= 1;
- /* n <- total # of bits needed, while setting p to end-of-string */
- while (_PyLong_DigitValue[Py_CHARMASK(*p)] < base)
+ }
+ /* count digits and set p to end-of-string */
+ while (_PyLong_DigitValue[Py_CHARMASK(*p)] < base || *p == '_') {
+ if (*p == '_') {
+ if (prev == '_') {
+ *str = p - 1;
+ return -1;
+ }
+ } else {
+ ++digits;
+ }
+ prev = *p;
++p;
+ }
+ if (prev == '_') {
+ /* Trailing underscore not allowed. */
+ *str = p - 1;
+ return -1;
+ }
+
*str = p;
/* n <- # of Python digits needed, = ceiling(n/PyLong_SHIFT). */
- n = (p - start) * bits_per_char + PyLong_SHIFT - 1;
+ n = digits * bits_per_char + PyLong_SHIFT - 1;
if (n / bits_per_char < p - start) {
PyErr_SetString(PyExc_ValueError,
"int string too large to convert");
- return NULL;
+ *res = NULL;
+ return 0;
}
n = n / PyLong_SHIFT;
z = _PyLong_New(n);
- if (z == NULL)
- return NULL;
+ if (z == NULL) {
+ *res = NULL;
+ return 0;
+ }
/* Read string from right, and fill in int from left; i.e.,
* from least to most significant in both.
*/
@@ -2043,7 +2069,11 @@ long_from_binary_base(const char **str, int base)
bits_in_accum = 0;
pdigit = z->ob_digit;
while (--p >= start) {
- int k = (int)_PyLong_DigitValue[Py_CHARMASK(*p)];
+ int k;
+ if (*p == '_') {
+ continue;
+ }
+ k = (int)_PyLong_DigitValue[Py_CHARMASK(*p)];
assert(k >= 0 && k < base);
accum |= (twodigits)k << bits_in_accum;
bits_in_accum += bits_per_char;
@@ -2062,7 +2092,8 @@ long_from_binary_base(const char **str, int base)
}
while (pdigit - z->ob_digit < n)
*pdigit++ = 0;
- return long_normalize(z);
+ *res = long_normalize(z);
+ return 0;
}
/* Parses an int from a bytestring. Leading and trailing whitespace will be
@@ -2087,23 +2118,29 @@ PyLong_FromString(const char *str, char **pend, int base)
"int() arg 2 must be >= 2 and <= 36");
return NULL;
}
- while (*str != '\0' && Py_ISSPACE(Py_CHARMASK(*str)))
+ while (*str != '\0' && Py_ISSPACE(Py_CHARMASK(*str))) {
str++;
- if (*str == '+')
+ }
+ if (*str == '+') {
++str;
+ }
else if (*str == '-') {
++str;
sign = -1;
}
if (base == 0) {
- if (str[0] != '0')
+ if (str[0] != '0') {
base = 10;
- else if (str[1] == 'x' || str[1] == 'X')
+ }
+ else if (str[1] == 'x' || str[1] == 'X') {
base = 16;
- else if (str[1] == 'o' || str[1] == 'O')
+ }
+ else if (str[1] == 'o' || str[1] == 'O') {
base = 8;
- else if (str[1] == 'b' || str[1] == 'B')
+ }
+ else if (str[1] == 'b' || str[1] == 'B') {
base = 2;
+ }
else {
/* "old" (C-style) octal literal, now invalid.
it might still be zero though */
@@ -2114,12 +2151,26 @@ PyLong_FromString(const char *str, char **pend, int base)
if (str[0] == '0' &&
((base == 16 && (str[1] == 'x' || str[1] == 'X')) ||
(base == 8 && (str[1] == 'o' || str[1] == 'O')) ||
- (base == 2 && (str[1] == 'b' || str[1] == 'B'))))
+ (base == 2 && (str[1] == 'b' || str[1] == 'B')))) {
str += 2;
+ /* One underscore allowed here. */
+ if (*str == '_') {
+ ++str;
+ }
+ }
+ if (str[0] == '_') {
+ /* May not start with underscores. */
+ goto onError;
+ }
start = str;
- if ((base & (base - 1)) == 0)
- z = long_from_binary_base(&str, base);
+ if ((base & (base - 1)) == 0) {
+ int res = long_from_binary_base(&str, base, &z);
+ if (res < 0) {
+ /* Syntax error. */
+ goto onError;
+ }
+ }
else {
/***
Binary bases can be converted in time linear in the number of digits, because
@@ -2208,11 +2259,13 @@ digit beyond the first.
***/
twodigits c; /* current input character */
Py_ssize_t size_z;
+ int digits = 0;
int i;
int convwidth;
twodigits convmultmax, convmult;
digit *pz, *pzstop;
- const char* scan;
+ const char *scan, *lastdigit;
+ char prev = 0;
static double log_base_BASE[37] = {0.0e0,};
static int convwidth_base[37] = {0,};
@@ -2226,8 +2279,9 @@ digit beyond the first.
log((double)PyLong_BASE));
for (;;) {
twodigits next = convmax * base;
- if (next > PyLong_BASE)
+ if (next > PyLong_BASE) {
break;
+ }
convmax = next;
++i;
}
@@ -2238,21 +2292,43 @@ digit beyond the first.
/* Find length of the string of numeric characters. */
scan = str;
- while (_PyLong_DigitValue[Py_CHARMASK(*scan)] < base)
+ lastdigit = str;
+
+ while (_PyLong_DigitValue[Py_CHARMASK(*scan)] < base || *scan == '_') {
+ if (*scan == '_') {
+ if (prev == '_') {
+ /* Only one underscore allowed. */
+ str = lastdigit + 1;
+ goto onError;
+ }
+ }
+ else {
+ ++digits;
+ lastdigit = scan;
+ }
+ prev = *scan;
++scan;
+ }
+ if (prev == '_') {
+ /* Trailing underscore not allowed. */
+ /* Set error pointer to first underscore. */
+ str = lastdigit + 1;
+ goto onError;
+ }
/* Create an int object that can contain the largest possible
* integer with this base and length. Note that there's no
* need to initialize z->ob_digit -- no slot is read up before
* being stored into.
*/
- size_z = (Py_ssize_t)((scan - str) * log_base_BASE[base]) + 1;
+ size_z = (Py_ssize_t)(digits * log_base_BASE[base]) + 1;
/* Uncomment next line to test exceedingly rare copy code */
/* size_z = 1; */
assert(size_z > 0);
z = _PyLong_New(size_z);
- if (z == NULL)
+ if (z == NULL) {
return NULL;
+ }
Py_SIZE(z) = 0;
/* `convwidth` consecutive input digits are treated as a single
@@ -2263,9 +2339,17 @@ digit beyond the first.
/* Work ;-) */
while (str < scan) {
+ if (*str == '_') {
+ str++;
+ continue;
+ }
/* grab up to convwidth digits from the input string */
c = (digit)_PyLong_DigitValue[Py_CHARMASK(*str++)];
- for (i = 1; i < convwidth && str != scan; ++i, ++str) {
+ for (i = 1; i < convwidth && str != scan; ++str) {
+ if (*str == '_') {
+ continue;
+ }
+ i++;
c = (twodigits)(c * base +
(int)_PyLong_DigitValue[Py_CHARMASK(*str)]);
assert(c < PyLong_BASE);
@@ -2277,8 +2361,9 @@ digit beyond the first.
*/
if (i != convwidth) {
convmult = base;
- for ( ; i > 1; --i)
+ for ( ; i > 1; --i) {
convmult *= base;
+ }
}
/* Multiply z by convmult, and add c. */
@@ -2316,41 +2401,51 @@ digit beyond the first.
}
}
}
- if (z == NULL)
+ if (z == NULL) {
return NULL;
+ }
if (error_if_nonzero) {
/* reset the base to 0, else the exception message
doesn't make too much sense */
base = 0;
- if (Py_SIZE(z) != 0)
+ if (Py_SIZE(z) != 0) {
goto onError;
+ }
/* there might still be other problems, therefore base
remains zero here for the same reason */
}
- if (str == start)
+ if (str == start) {
goto onError;
- if (sign < 0)
+ }
+ if (sign < 0) {
Py_SIZE(z) = -(Py_SIZE(z));
- while (*str && Py_ISSPACE(Py_CHARMASK(*str)))
+ }
+ while (*str && Py_ISSPACE(Py_CHARMASK(*str))) {
str++;
- if (*str != '\0')
+ }
+ if (*str != '\0') {
goto onError;
+ }
long_normalize(z);
z = maybe_small_long(z);
- if (z == NULL)
+ if (z == NULL) {
return NULL;
- if (pend != NULL)
+ }
+ if (pend != NULL) {
*pend = (char *)str;
+ }
return (PyObject *) z;
onError:
- if (pend != NULL)
+ if (pend != NULL) {
*pend = (char *)str;
+ }
Py_XDECREF(z);
slen = strlen(orig_str) < 200 ? strlen(orig_str) : 200;
strobj = PyUnicode_FromStringAndSize(orig_str, slen);
- if (strobj == NULL)
+ if (strobj == NULL) {
return NULL;
+ }
PyErr_Format(PyExc_ValueError,
"invalid literal for int() with base %d: %.200R",
base, strobj);