Issue #26331: Implement the parsing part of PEP 515.

Thanks to Georg Brandl for the patch.
author: Brett Cannon <brett@python.org> 2016-09-09 21:57:09 (GMT)
committer: Brett Cannon <brett@python.org> 2016-09-09 21:57:09 (GMT)
commit: a721abac299bb6529021000a71847486d531b41a (patch)
tree: 8355a69b891cfcdaad8a5fd62870231b7f940696 /Objects
parent: ee73a657455a908102379d3c9bc254676418e10c (diff)
download: cpython-a721abac299bb6529021000a71847486d531b41a.zip
cpython-a721abac299bb6529021000a71847486d531b41a.tar.gz
cpython-a721abac299bb6529021000a71847486d531b41a.tar.bz2
3 files changed, 205 insertions, 86 deletions
diff --git a/Objects/complexobject.c b/Objects/complexobject.c
index a5bfb66..a9d5ec3 100644
--- a/Objects/complexobject.c
+++ b/Objects/complexobject.c
@@ -759,29 +759,12 @@ static PyMemberDef complex_members[] = {
 };
 
 static PyObject *
-complex_subtype_from_string(PyTypeObject *type, PyObject *v)
+complex_from_string_inner(const char *s, Py_ssize_t len, void *type)
 {
-    const char *s, *start;
-    char *end;
     double x=0.0, y=0.0, z;
     int got_bracket=0;
-    PyObject *s_buffer = NULL;
-    Py_ssize_t len;
-
-    if (PyUnicode_Check(v)) {
-        s_buffer = _PyUnicode_TransformDecimalAndSpaceToASCII(v);
-        if (s_buffer == NULL)
-            return NULL;
-        s = PyUnicode_AsUTF8AndSize(s_buffer, &len);
-        if (s == NULL)
-            goto error;
-    }
-    else {
-        PyErr_Format(PyExc_TypeError,
-            "complex() argument must be a string or a number, not '%.200s'",
-            Py_TYPE(v)->tp_name);
-        return NULL;
-    }
+    const char *start;
+    char *end;
 
     /* position on first nonblank */
     start = s;
@@ -822,7 +805,7 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
         if (PyErr_ExceptionMatches(PyExc_ValueError))
             PyErr_Clear();
         else
-            goto error;
+            return NULL;
     }
     if (end != s) {
         /* all 4 forms starting with <float> land here */
@@ -835,7 +818,7 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
                 if (PyErr_ExceptionMatches(PyExc_ValueError))
                     PyErr_Clear();
                 else
-                    goto error;
+                    return NULL;
             }
             if (end != s)
                 /* <float><signed-float>j */
@@ -890,18 +873,46 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
     if (s-start != len)
         goto parse_error;
 
-    Py_XDECREF(s_buffer);
-    return complex_subtype_from_doubles(type, x, y);
+    return complex_subtype_from_doubles((PyTypeObject *)type, x, y);
 
   parse_error:
     PyErr_SetString(PyExc_ValueError,
                     "complex() arg is a malformed string");
-  error:
-    Py_XDECREF(s_buffer);
     return NULL;
 }
 
 static PyObject *
+complex_subtype_from_string(PyTypeObject *type, PyObject *v)
+{
+    const char *s;
+    PyObject *s_buffer = NULL, *result = NULL;
+    Py_ssize_t len;
+
+    if (PyUnicode_Check(v)) {
+        s_buffer = _PyUnicode_TransformDecimalAndSpaceToASCII(v);
+        if (s_buffer == NULL) {
+            return NULL;
+        }
+        s = PyUnicode_AsUTF8AndSize(s_buffer, &len);
+        if (s == NULL) {
+            goto exit;
+        }
+    }
+    else {
+        PyErr_Format(PyExc_TypeError,
+            "complex() argument must be a string or a number, not '%.200s'",
+            Py_TYPE(v)->tp_name);
+        return NULL;
+    }
+
+    result = _Py_string_to_number_with_underscores(s, len, "complex", v, type,
+                                                   complex_from_string_inner);
+  exit:
+    Py_DECREF(s_buffer);
+    return result;
+}
+
+static PyObject *
 complex_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 {
     PyObject *r, *i, *tmp;
diff --git a/Objects/floatobject.c b/Objects/floatobject.c
index 0642b16..0f37618 100644
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@@ -124,11 +124,43 @@ PyFloat_FromDouble(double fval)
     return (PyObject *) op;
 }
 
+static PyObject *
+float_from_string_inner(const char *s, Py_ssize_t len, void *obj)
+{
+    double x;
+    const char *end;
+    const char *last = s + len;
+    /* strip space */
+    while (s < last && Py_ISSPACE(*s)) {
+        s++;
+    }
+
+    while (s < last - 1 && Py_ISSPACE(last[-1])) {
+        last--;
+    }
+
+    /* We don't care about overflow or underflow.  If the platform
+     * supports them, infinities and signed zeroes (on underflow) are
+     * fine. */
+    x = PyOS_string_to_double(s, (char **)&end, NULL);
+    if (end != last) {
+        PyErr_Format(PyExc_ValueError,
+                     "could not convert string to float: "
+                     "%R", obj);
+        return NULL;
+    }
+    else if (x == -1.0 && PyErr_Occurred()) {
+        return NULL;
+    }
+    else {
+        return PyFloat_FromDouble(x);
+    }
+}
+
 PyObject *
 PyFloat_FromString(PyObject *v)
 {
-    const char *s, *last, *end;
-    double x;
+    const char *s;
     PyObject *s_buffer = NULL;
     Py_ssize_t len;
     Py_buffer view = {NULL, NULL};
@@ -169,27 +201,8 @@ PyFloat_FromString(PyObject *v)
             Py_TYPE(v)->tp_name);
         return NULL;
     }
-    last = s + len;
-    /* strip space */
-    while (s < last && Py_ISSPACE(*s))
-        s++;
-    while (s < last - 1 && Py_ISSPACE(last[-1]))
-        last--;
-    /* We don't care about overflow or underflow.  If the platform
-     * supports them, infinities and signed zeroes (on underflow) are
-     * fine. */
-    x = PyOS_string_to_double(s, (char **)&end, NULL);
-    if (end != last) {
-        PyErr_Format(PyExc_ValueError,
-                     "could not convert string to float: "
-                     "%R", v);
-        result = NULL;
-    }
-    else if (x == -1.0 && PyErr_Occurred())
-        result = NULL;
-    else
-        result = PyFloat_FromDouble(x);
-
+    result = _Py_string_to_number_with_underscores(s, len, "float", v, v,
+                                                   float_from_string_inner);
     PyBuffer_Release(&view);
     Py_XDECREF(s_buffer);
     return result;
diff --git a/Objects/longobject.c b/Objects/longobject.c
index 740b7f5..bbf7e71 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -2004,12 +2004,18 @@ unsigned char _PyLong_DigitValue[256] = {
  * non-digit (which may be *str!).  A normalized int is returned.
  * The point to this routine is that it takes time linear in the number of
  * string characters.
+ *
+ * Return values:
+ *   -1 on syntax error (exception needs to be set, *res is untouched)
+ *   0 else (exception may be set, in that case *res is set to NULL)
  */
-static PyLongObject *
-long_from_binary_base(const char **str, int base)
+static int
+long_from_binary_base(const char **str, int base, PyLongObject **res)
 {
     const char *p = *str;
     const char *start = p;
+    char prev = 0;
+    int digits = 0;
     int bits_per_char;
     Py_ssize_t n;
     PyLongObject *z;
@@ -2019,23 +2025,43 @@ long_from_binary_base(const char **str, int base)
 
     assert(base >= 2 && base <= 32 && (base & (base - 1)) == 0);
     n = base;
-    for (bits_per_char = -1; n; ++bits_per_char)
+    for (bits_per_char = -1; n; ++bits_per_char) {
         n >>= 1;
-    /* n <- total # of bits needed, while setting p to end-of-string */
-    while (_PyLong_DigitValue[Py_CHARMASK(*p)] < base)
+    }
+    /* count digits and set p to end-of-string */
+    while (_PyLong_DigitValue[Py_CHARMASK(*p)] < base || *p == '_') {
+        if (*p == '_') {
+            if (prev == '_') {
+                *str = p - 1;
+                return -1;
+            }
+        } else {
+            ++digits;
+        }
+        prev = *p;
         ++p;
+    }
+    if (prev == '_') {
+        /* Trailing underscore not allowed. */
+        *str = p - 1;
+        return -1;
+    }
+
     *str = p;
     /* n <- # of Python digits needed, = ceiling(n/PyLong_SHIFT). */
-    n = (p - start) * bits_per_char + PyLong_SHIFT - 1;
+    n = digits * bits_per_char + PyLong_SHIFT - 1;
     if (n / bits_per_char < p - start) {
         PyErr_SetString(PyExc_ValueError,
                         "int string too large to convert");
-        return NULL;
+        *res = NULL;
+        return 0;
     }
     n = n / PyLong_SHIFT;
     z = _PyLong_New(n);
-    if (z == NULL)
-        return NULL;
+    if (z == NULL) {
+        *res = NULL;
+        return 0;
+    }
     /* Read string from right, and fill in int from left; i.e.,
      * from least to most significant in both.
      */
@@ -2043,7 +2069,11 @@ long_from_binary_base(const char **str, int base)
     bits_in_accum = 0;
     pdigit = z->ob_digit;
     while (--p >= start) {
-        int k = (int)_PyLong_DigitValue[Py_CHARMASK(*p)];
+        int k;
+        if (*p == '_') {
+            continue;
+        }
+        k = (int)_PyLong_DigitValue[Py_CHARMASK(*p)];
         assert(k >= 0 && k < base);
         accum |= (twodigits)k << bits_in_accum;
         bits_in_accum += bits_per_char;
@@ -2062,7 +2092,8 @@ long_from_binary_base(const char **str, int base)
     }
     while (pdigit - z->ob_digit < n)
         *pdigit++ = 0;
-    return long_normalize(z);
+    *res = long_normalize(z);
+    return 0;
 }
 
 /* Parses an int from a bytestring. Leading and trailing whitespace will be
@@ -2087,23 +2118,29 @@ PyLong_FromString(const char *str, char **pend, int base)
                         "int() arg 2 must be >= 2 and <= 36");
         return NULL;
     }
-    while (*str != '\0' && Py_ISSPACE(Py_CHARMASK(*str)))
+    while (*str != '\0' && Py_ISSPACE(Py_CHARMASK(*str))) {
         str++;
-    if (*str == '+')
+    }
+    if (*str == '+') {
         ++str;
+    }
     else if (*str == '-') {
         ++str;
         sign = -1;
     }
     if (base == 0) {
-        if (str[0] != '0')
+        if (str[0] != '0') {
             base = 10;
-        else if (str[1] == 'x' || str[1] == 'X')
+        }
+        else if (str[1] == 'x' || str[1] == 'X') {
             base = 16;
-        else if (str[1] == 'o' || str[1] == 'O')
+        }
+        else if (str[1] == 'o' || str[1] == 'O') {
             base = 8;
-        else if (str[1] == 'b' || str[1] == 'B')
+        }
+        else if (str[1] == 'b' || str[1] == 'B') {
             base = 2;
+        }
         else {
             /* "old" (C-style) octal literal, now invalid.
                it might still be zero though */
@@ -2114,12 +2151,26 @@ PyLong_FromString(const char *str, char **pend, int base)
     if (str[0] == '0' &&
         ((base == 16 && (str[1] == 'x' || str[1] == 'X')) ||
          (base == 8  && (str[1] == 'o' || str[1] == 'O')) ||
-         (base == 2  && (str[1] == 'b' || str[1] == 'B'))))
+         (base == 2  && (str[1] == 'b' || str[1] == 'B')))) {
         str += 2;
+        /* One underscore allowed here. */
+        if (*str == '_') {
+            ++str;
+        }
+    }
+    if (str[0] == '_') {
+	    /* May not start with underscores. */
+	    goto onError;
+    }
 
     start = str;
-    if ((base & (base - 1)) == 0)
-        z = long_from_binary_base(&str, base);
+    if ((base & (base - 1)) == 0) {
+        int res = long_from_binary_base(&str, base, &z);
+        if (res < 0) {
+            /* Syntax error. */
+            goto onError;
+        }
+    }
     else {
 /***
 Binary bases can be converted in time linear in the number of digits, because
@@ -2208,11 +2259,13 @@ digit beyond the first.
 ***/
         twodigits c;           /* current input character */
         Py_ssize_t size_z;
+        int digits = 0;
         int i;
         int convwidth;
         twodigits convmultmax, convmult;
         digit *pz, *pzstop;
-        const char* scan;
+        const char *scan, *lastdigit;
+        char prev = 0;
 
         static double log_base_BASE[37] = {0.0e0,};
         static int convwidth_base[37] = {0,};
@@ -2226,8 +2279,9 @@ digit beyond the first.
                                    log((double)PyLong_BASE));
             for (;;) {
                 twodigits next = convmax * base;
-                if (next > PyLong_BASE)
+                if (next > PyLong_BASE) {
                     break;
+                }
                 convmax = next;
                 ++i;
             }
@@ -2238,21 +2292,43 @@ digit beyond the first.
 
         /* Find length of the string of numeric characters. */
         scan = str;
-        while (_PyLong_DigitValue[Py_CHARMASK(*scan)] < base)
+        lastdigit = str;
+
+        while (_PyLong_DigitValue[Py_CHARMASK(*scan)] < base || *scan == '_') {
+            if (*scan == '_') {
+                if (prev == '_') {
+                    /* Only one underscore allowed. */
+                    str = lastdigit + 1;
+                    goto onError;
+                }
+            }
+            else {
+                ++digits;
+                lastdigit = scan;
+            }
+            prev = *scan;
             ++scan;
+        }
+        if (prev == '_') {
+            /* Trailing underscore not allowed. */
+            /* Set error pointer to first underscore. */
+            str = lastdigit + 1;
+            goto onError;
+        }
 
         /* Create an int object that can contain the largest possible
          * integer with this base and length.  Note that there's no
          * need to initialize z->ob_digit -- no slot is read up before
          * being stored into.
          */
-        size_z = (Py_ssize_t)((scan - str) * log_base_BASE[base]) + 1;
+        size_z = (Py_ssize_t)(digits * log_base_BASE[base]) + 1;
         /* Uncomment next line to test exceedingly rare copy code */
         /* size_z = 1; */
         assert(size_z > 0);
         z = _PyLong_New(size_z);
-        if (z == NULL)
+        if (z == NULL) {
             return NULL;
+        }
         Py_SIZE(z) = 0;
 
         /* `convwidth` consecutive input digits are treated as a single
@@ -2263,9 +2339,17 @@ digit beyond the first.
 
         /* Work ;-) */
         while (str < scan) {
+            if (*str == '_') {
+                str++;
+                continue;
+            }
             /* grab up to convwidth digits from the input string */
             c = (digit)_PyLong_DigitValue[Py_CHARMASK(*str++)];
-            for (i = 1; i < convwidth && str != scan; ++i, ++str) {
+            for (i = 1; i < convwidth && str != scan; ++str) {
+                if (*str == '_') {
+                    continue;
+                }
+                i++;
                 c = (twodigits)(c *  base +
                                 (int)_PyLong_DigitValue[Py_CHARMASK(*str)]);
                 assert(c < PyLong_BASE);
@@ -2277,8 +2361,9 @@ digit beyond the first.
              */
             if (i != convwidth) {
                 convmult = base;
-                for ( ; i > 1; --i)
+                for ( ; i > 1; --i) {
                     convmult *= base;
+                }
             }
 
             /* Multiply z by convmult, and add c. */
@@ -2316,41 +2401,51 @@ digit beyond the first.
             }
         }
     }
-    if (z == NULL)
+    if (z == NULL) {
         return NULL;
+    }
     if (error_if_nonzero) {
         /* reset the base to 0, else the exception message
            doesn't make too much sense */
         base = 0;
-        if (Py_SIZE(z) != 0)
+        if (Py_SIZE(z) != 0) {
             goto onError;
+        }
         /* there might still be other problems, therefore base
            remains zero here for the same reason */
     }
-    if (str == start)
+    if (str == start) {
         goto onError;
-    if (sign < 0)
+    }
+    if (sign < 0) {
         Py_SIZE(z) = -(Py_SIZE(z));
-    while (*str && Py_ISSPACE(Py_CHARMASK(*str)))
+    }
+    while (*str && Py_ISSPACE(Py_CHARMASK(*str))) {
         str++;
-    if (*str != '\0')
+    }
+    if (*str != '\0') {
         goto onError;
+    }
     long_normalize(z);
     z = maybe_small_long(z);
-    if (z == NULL)
+    if (z == NULL) {
         return NULL;
-    if (pend != NULL)
+    }
+    if (pend != NULL) {
         *pend = (char *)str;
+    }
     return (PyObject *) z;
 
   onError:
-    if (pend != NULL)
+    if (pend != NULL) {
         *pend = (char *)str;
+    }
     Py_XDECREF(z);
     slen = strlen(orig_str) < 200 ? strlen(orig_str) : 200;
     strobj = PyUnicode_FromStringAndSize(orig_str, slen);
-    if (strobj == NULL)
+    if (strobj == NULL) {
         return NULL;
+    }
     PyErr_Format(PyExc_ValueError,
                  "invalid literal for int() with base %d: %.200R",
                  base, strobj);
author	Brett Cannon <brett@python.org>	2016-09-09 21:57:09 (GMT)
committer	Brett Cannon <brett@python.org>	2016-09-09 21:57:09 (GMT)
commit	a721abac299bb6529021000a71847486d531b41a (patch)
tree	8355a69b891cfcdaad8a5fd62870231b7f940696 /Objects
parent	ee73a657455a908102379d3c9bc254676418e10c (diff)
download	cpython-a721abac299bb6529021000a71847486d531b41a.zip cpython-a721abac299bb6529021000a71847486d531b41a.tar.gz cpython-a721abac299bb6529021000a71847486d531b41a.tar.bz2