diff options
Diffstat (limited to 'Python/pystrtod.c')
| -rw-r--r-- | Python/pystrtod.c | 616 | 
1 files changed, 429 insertions, 187 deletions
| diff --git a/Python/pystrtod.c b/Python/pystrtod.c index 6c19b45..9004cf4 100644 --- a/Python/pystrtod.c +++ b/Python/pystrtod.c @@ -5,9 +5,8 @@  /* ascii character tests (as opposed to locale tests) */  #define ISSPACE(c)  ((c) == ' ' || (c) == '\f' || (c) == '\n' || \ -                     (c) == '\r' || (c) == '\t' || (c) == '\v') +             (c) == '\r' || (c) == '\t' || (c) == '\v')  #define ISDIGIT(c)  ((c) >= '0' && (c) <= '9') -#define ISXDIGIT(c) (ISDIGIT(c) || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F'))  /** @@ -15,7 +14,7 @@   * @nptr:    the string to convert to a numeric value.   * @endptr:  if non-%NULL, it returns the character after   *           the last character used in the conversion. - *  + *   * Converts a string to a #gdouble value.   * This function behaves like the standard strtod() function   * does in the C locale. It does this without actually @@ -32,7 +31,7 @@   * stored in %errno. If the correct value would cause underflow,   * zero is returned and %ERANGE is stored in %errno.   * If memory allocation fails, %ENOMEM is stored in %errno. - *  + *   * This function resets %errno before calling strtod() so that   * you can reliably detect overflow and underflow.   * @@ -41,208 +40,451 @@  double  PyOS_ascii_strtod(const char *nptr, char **endptr)  { -	char *fail_pos; -	double val = -1.0; -	struct lconv *locale_data; -	const char *decimal_point; -	size_t decimal_point_len; -	const char *p, *decimal_point_pos; -	const char *end = NULL; /* Silence gcc */ - -	assert(nptr != NULL); - -	fail_pos = NULL; - -	locale_data = localeconv(); -	decimal_point = locale_data->decimal_point; -	decimal_point_len = strlen(decimal_point); - -	assert(decimal_point_len != 0); - -	decimal_point_pos = NULL; -	if (decimal_point[0] != '.' ||  -	    decimal_point[1] != 0) -	{ -		p = nptr; -		  /* Skip leading space */ -		while (ISSPACE(*p)) -			p++; - -		  /* Skip leading optional sign */ -		if (*p == '+' || *p == '-') -			p++; - -		while (ISDIGIT(*p)) -			p++; - -		if (*p == '.') -		{ -			decimal_point_pos = p++; - -			while (ISDIGIT(*p)) -				p++; - -			if (*p == 'e' || *p == 'E') -				p++; -			if (*p == '+' || *p == '-') -				p++; -			while (ISDIGIT(*p)) -				p++; -			end = p; -		} -		else if (strncmp(p, decimal_point, decimal_point_len) == 0) -		{ -			/* Python bug #1417699 */ -			*endptr = (char*)nptr; -			errno = EINVAL; -			return val; -		} -		/* For the other cases, we need not convert the decimal point */ -	} - -	/* Set errno to zero, so that we can distinguish zero results -	   and underflows */ -	errno = 0; - -	if (decimal_point_pos) -	{ -		char *copy, *c; - -		/* We need to convert the '.' to the locale specific decimal point */ -		copy = (char *)PyMem_MALLOC(end - nptr + 1 + decimal_point_len); -		if (copy == NULL) { -			if (endptr) -				*endptr = (char *)nptr; -			errno = ENOMEM; -			return val; -		} - -		c = copy; -		memcpy(c, nptr, decimal_point_pos - nptr); -		c += decimal_point_pos - nptr; -		memcpy(c, decimal_point, decimal_point_len); -		c += decimal_point_len; -		memcpy(c, decimal_point_pos + 1, end - (decimal_point_pos + 1)); -		c += end - (decimal_point_pos + 1); -		*c = 0; - -		val = strtod(copy, &fail_pos); - -		if (fail_pos) -		{ -			if (fail_pos > decimal_point_pos) -				fail_pos = (char *)nptr + (fail_pos - copy) - (decimal_point_len - 1); -			else -				fail_pos = (char *)nptr + (fail_pos - copy); -		} - -		PyMem_FREE(copy); - -	} -	else { -		unsigned i = 0; -		if (nptr[i] == '-') -			i++; -		if (nptr[i] == '0' && (nptr[i+1] == 'x' || nptr[i+1] == 'X')) -			fail_pos = (char*)nptr; -		else -			val = strtod(nptr, &fail_pos); -	} - -	if (endptr) -		*endptr = fail_pos; - -	return val; +    char *fail_pos; +    double val = -1.0; +    struct lconv *locale_data; +    const char *decimal_point; +    size_t decimal_point_len; +    const char *p, *decimal_point_pos; +    const char *end = NULL; /* Silence gcc */ +    const char *digits_pos = NULL; +    int negate = 0; + +    assert(nptr != NULL); + +    fail_pos = NULL; + +    locale_data = localeconv(); +    decimal_point = locale_data->decimal_point; +    decimal_point_len = strlen(decimal_point); + +    assert(decimal_point_len != 0); + +    decimal_point_pos = NULL; + +    /* We process any leading whitespace and the optional sign manually, +       then pass the remainder to the system strtod.  This ensures that +       the result of an underflow has the correct sign. (bug #1725)  */ + +    p = nptr; +    /* Skip leading space */ +    while (ISSPACE(*p)) +        p++; + +    /* Process leading sign, if present */ +    if (*p == '-') { +        negate = 1; +        p++; +    } else if (*p == '+') { +        p++; +    } + +    /* What's left should begin with a digit, a decimal point, or one of +       the letters i, I, n, N. It should not begin with 0x or 0X */ +    if ((!ISDIGIT(*p) && +         *p != '.' && *p != 'i' && *p != 'I' && *p != 'n' && *p != 'N') +        || +        (*p == '0' && (p[1] == 'x' || p[1] == 'X'))) +    { +        if (endptr) +            *endptr = (char*)nptr; +        errno = EINVAL; +        return val; +    } +    digits_pos = p; + +    if (decimal_point[0] != '.' || +        decimal_point[1] != 0) +    { +        while (ISDIGIT(*p)) +            p++; + +        if (*p == '.') +        { +            decimal_point_pos = p++; + +            while (ISDIGIT(*p)) +                p++; + +            if (*p == 'e' || *p == 'E') +                p++; +            if (*p == '+' || *p == '-') +                p++; +            while (ISDIGIT(*p)) +                p++; +            end = p; +        } +        else if (strncmp(p, decimal_point, decimal_point_len) == 0) +        { +            /* Python bug #1417699 */ +            if (endptr) +                *endptr = (char*)nptr; +            errno = EINVAL; +            return val; +        } +        /* For the other cases, we need not convert the decimal +           point */ +    } + +    /* Set errno to zero, so that we can distinguish zero results +       and underflows */ +    errno = 0; + +    if (decimal_point_pos) +    { +        char *copy, *c; + +        /* We need to convert the '.' to the locale specific decimal +           point */ +        copy = (char *)PyMem_MALLOC(end - digits_pos + +                                    1 + decimal_point_len); +        if (copy == NULL) { +            if (endptr) +                *endptr = (char *)nptr; +            errno = ENOMEM; +            return val; +        } + +        c = copy; +        memcpy(c, digits_pos, decimal_point_pos - digits_pos); +        c += decimal_point_pos - digits_pos; +        memcpy(c, decimal_point, decimal_point_len); +        c += decimal_point_len; +        memcpy(c, decimal_point_pos + 1, +               end - (decimal_point_pos + 1)); +        c += end - (decimal_point_pos + 1); +        *c = 0; + +        val = strtod(copy, &fail_pos); + +        if (fail_pos) +        { +            if (fail_pos > decimal_point_pos) +                fail_pos = (char *)digits_pos + +                    (fail_pos - copy) - +                    (decimal_point_len - 1); +            else +                fail_pos = (char *)digits_pos + +                    (fail_pos - copy); +        } + +        PyMem_FREE(copy); + +    } +    else { +        val = strtod(digits_pos, &fail_pos); +    } + +    if (fail_pos == digits_pos) +        fail_pos = (char *)nptr; + +    if (negate && fail_pos != nptr) +        val = -val; + +    if (endptr) +        *endptr = fail_pos; + +    return val; +} + +/* Given a string that may have a decimal point in the current +   locale, change it back to a dot.  Since the string cannot get +   longer, no need for a maximum buffer size parameter. */ +Py_LOCAL_INLINE(void) +change_decimal_from_locale_to_dot(char* buffer) +{ +    struct lconv *locale_data = localeconv(); +    const char *decimal_point = locale_data->decimal_point; + +    if (decimal_point[0] != '.' || decimal_point[1] != 0) { +        size_t decimal_point_len = strlen(decimal_point); + +        if (*buffer == '+' || *buffer == '-') +            buffer++; +        while (isdigit(Py_CHARMASK(*buffer))) +            buffer++; +        if (strncmp(buffer, decimal_point, decimal_point_len) == 0) { +            *buffer = '.'; +            buffer++; +            if (decimal_point_len > 1) { +                /* buffer needs to get smaller */ +                size_t rest_len = strlen(buffer + +                                     (decimal_point_len - 1)); +                memmove(buffer, +                    buffer + (decimal_point_len - 1), +                    rest_len); +                buffer[rest_len] = 0; +            } +        } +    }  } +/* From the C99 standard, section 7.19.6: +The exponent always contains at least two digits, and only as many more digits +as necessary to represent the exponent. +*/ +#define MIN_EXPONENT_DIGITS 2 + +/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS +   in length. */ +Py_LOCAL_INLINE(void) +ensure_minimum_exponent_length(char* buffer, size_t buf_size) +{ +    char *p = strpbrk(buffer, "eE"); +    if (p && (*(p + 1) == '-' || *(p + 1) == '+')) { +        char *start = p + 2; +        int exponent_digit_cnt = 0; +        int leading_zero_cnt = 0; +        int in_leading_zeros = 1; +        int significant_digit_cnt; + +        /* Skip over the exponent and the sign. */ +        p += 2; + +        /* Find the end of the exponent, keeping track of leading +           zeros. */ +        while (*p && isdigit(Py_CHARMASK(*p))) { +            if (in_leading_zeros && *p == '0') +                ++leading_zero_cnt; +            if (*p != '0') +                in_leading_zeros = 0; +            ++p; +            ++exponent_digit_cnt; +        } + +        significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt; +        if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) { +            /* If there are 2 exactly digits, we're done, +               regardless of what they contain */ +        } +        else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) { +            int extra_zeros_cnt; + +            /* There are more than 2 digits in the exponent.  See +               if we can delete some of the leading zeros */ +            if (significant_digit_cnt < MIN_EXPONENT_DIGITS) +                significant_digit_cnt = MIN_EXPONENT_DIGITS; +            extra_zeros_cnt = exponent_digit_cnt - +                significant_digit_cnt; + +            /* Delete extra_zeros_cnt worth of characters from the +               front of the exponent */ +            assert(extra_zeros_cnt >= 0); + +            /* Add one to significant_digit_cnt to copy the +               trailing 0 byte, thus setting the length */ +            memmove(start, +                start + extra_zeros_cnt, +                significant_digit_cnt + 1); +        } +        else { +            /* If there are fewer than 2 digits, add zeros +               until there are 2, if there's enough room */ +            int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt; +            if (start + zeros + exponent_digit_cnt + 1 +                  < buffer + buf_size) { +                memmove(start + zeros, start, +                    exponent_digit_cnt + 1); +                memset(start, '0', zeros); +            } +        } +    } +} + +/* Ensure that buffer has a decimal point in it.  The decimal point +   will not be in the current locale, it will always be '.' */ +Py_LOCAL_INLINE(void) +ensure_decimal_point(char* buffer, size_t buf_size) +{ +    int insert_count = 0; +    char* chars_to_insert; + +    /* search for the first non-digit character */ +    char *p = buffer; +    if (*p == '-' || *p == '+') +        /* Skip leading sign, if present.  I think this could only +           ever be '-', but it can't hurt to check for both. */ +        ++p; +    while (*p && isdigit(Py_CHARMASK(*p))) +        ++p; + +    if (*p == '.') { +        if (isdigit(Py_CHARMASK(*(p+1)))) { +            /* Nothing to do, we already have a decimal +               point and a digit after it */ +        } +        else { +            /* We have a decimal point, but no following +               digit.  Insert a zero after the decimal. */ +            ++p; +            chars_to_insert = "0"; +            insert_count = 1; +        } +    } +    else { +        chars_to_insert = ".0"; +        insert_count = 2; +    } +    if (insert_count) { +        size_t buf_len = strlen(buffer); +        if (buf_len + insert_count + 1 >= buf_size) { +            /* If there is not enough room in the buffer +               for the additional text, just skip it.  It's +               not worth generating an error over. */ +        } +        else { +            memmove(p + insert_count, p, +                buffer + strlen(buffer) - p + 1); +            memcpy(p, chars_to_insert, insert_count); +        } +    } +} + +/* Add the locale specific grouping characters to buffer.  Note +   that any decimal point (if it's present) in buffer is already +   locale-specific.  Return 0 on error, else 1. */ +Py_LOCAL_INLINE(int) +add_thousands_grouping(char* buffer, size_t buf_size) +{ +    Py_ssize_t len = strlen(buffer); +    struct lconv *locale_data = localeconv(); +    const char *decimal_point = locale_data->decimal_point; + +    /* Find the decimal point, if any.  We're only concerned +       about the characters to the left of the decimal when +       adding grouping. */ +    char *p = strstr(buffer, decimal_point); +    if (!p) { +        /* No decimal, use the entire string. */ + +        /* If any exponent, adjust p. */ +        p = strpbrk(buffer, "eE"); +        if (!p) +            /* No exponent and no decimal.  Use the entire +               string. */ +            p = buffer + len; +    } +    /* At this point, p points just past the right-most character we +       want to format.  We need to add the grouping string for the +       characters between buffer and p. */ +    return _PyString_InsertThousandsGrouping(buffer, len, p-buffer, +                                             buf_size, NULL, 1); +} + +/* see FORMATBUFLEN in unicodeobject.c */ +#define FLOAT_FORMATBUFLEN 120 +  /**   * PyOS_ascii_formatd:   * @buffer: A buffer to place the resulting string in - * @buf_len: The length of the buffer. + * @buf_size: The length of the buffer.   * @format: The printf()-style format to use for the - *          code to use for converting.  + *          code to use for converting.   * @d: The #gdouble to convert   *   * Converts a #gdouble to a string, using the '.' as   * decimal point. To format the number you pass in   * a printf()-style format string. Allowed conversion - * specifiers are 'e', 'E', 'f', 'F', 'g' and 'G'.  - *  + * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'n'. + * + * 'n' is the same as 'g', except it uses the current locale. + * 'Z' is the same as 'g', except it always has a decimal and + *     at least one digit after the decimal. + *   * Return value: The pointer to the buffer with the converted string.   **/  char * -PyOS_ascii_formatd(char       *buffer,  -		   size_t      buf_len,  -		   const char *format,  -		   double      d) +PyOS_ascii_formatd(char       *buffer, +                   size_t      buf_size, +                   const char *format, +                   double      d)  { -	struct lconv *locale_data; -	const char *decimal_point; -	size_t decimal_point_len, rest_len; -	char *p; -	char format_char; - -/* 	g_return_val_if_fail (buffer != NULL, NULL); */ -/* 	g_return_val_if_fail (format[0] == '%', NULL); */ -/* 	g_return_val_if_fail (strpbrk (format + 1, "'l%") == NULL, NULL); */ - -	format_char = format[strlen(format) - 1]; - -/* 	g_return_val_if_fail (format_char == 'e' || format_char == 'E' || */ -/* 			      format_char == 'f' || format_char == 'F' || */ -/* 			      format_char == 'g' || format_char == 'G', */ -/* 			      NULL); */ - -	if (format[0] != '%') -		return NULL; - -	if (strpbrk(format + 1, "'l%")) -		return NULL; - -	if (!(format_char == 'e' || format_char == 'E' ||  -	      format_char == 'f' || format_char == 'F' ||  -	      format_char == 'g' || format_char == 'G')) -		return NULL; - - -	PyOS_snprintf(buffer, buf_len, format, d); - -	locale_data = localeconv(); -	decimal_point = locale_data->decimal_point; -	decimal_point_len = strlen(decimal_point); - -	assert(decimal_point_len != 0); - -	if (decimal_point[0] != '.' ||  -	    decimal_point[1] != 0) -	{ -		p = buffer; - -		if (*p == '+' || *p == '-') -			p++; - -		while (isdigit((unsigned char)*p)) -			p++; - -		if (strncmp(p, decimal_point, decimal_point_len) == 0) -		{ -			*p = '.'; -			p++; -			if (decimal_point_len > 1) { -				rest_len = strlen(p + (decimal_point_len - 1)); -				memmove(p, p + (decimal_point_len - 1),  -					rest_len); -				p[rest_len] = 0; -			} -		} -	} - -	return buffer; +    char format_char; +    size_t format_len = strlen(format); + +    /* For type 'n', we need to make a copy of the format string, because +       we're going to modify 'n' -> 'g', and format is const char*, so we +       can't modify it directly.  FLOAT_FORMATBUFLEN should be longer than +       we ever need this to be.  There's an upcoming check to ensure it's +       big enough. */ +    /* Issue 2264: code 'Z' requires copying the format.  'Z' is 'g', but +       also with at least one character past the decimal. */ +    char tmp_format[FLOAT_FORMATBUFLEN]; + +    /* The last character in the format string must be the format char */ +    format_char = format[format_len - 1]; + +    if (format[0] != '%') +        return NULL; + +    /* I'm not sure why this test is here.  It's ensuring that the format +       string after the first character doesn't have a single quote, a +       lowercase l, or a percent. This is the reverse of the commented-out +       test about 10 lines ago. */ +    if (strpbrk(format + 1, "'l%")) +        return NULL; + +    /* Also curious about this function is that it accepts format strings +       like "%xg", which are invalid for floats.  In general, the +       interface to this function is not very good, but changing it is +       difficult because it's a public API. */ + +    if (!(format_char == 'e' || format_char == 'E' || +          format_char == 'f' || format_char == 'F' || +          format_char == 'g' || format_char == 'G' || +          format_char == 'n' || format_char == 'Z')) +        return NULL; + +    /* Map 'n' or 'Z' format_char to 'g', by copying the format string and +       replacing the final char with a 'g' */ +    if (format_char == 'n' || format_char == 'Z') { +        if (format_len + 1 >= sizeof(tmp_format)) { +            /* The format won't fit in our copy.  Error out.  In +               practice, this will never happen and will be +               detected by returning NULL */ +            return NULL; +        } +        strcpy(tmp_format, format); +        tmp_format[format_len - 1] = 'g'; +        format = tmp_format; +    } + + +    /* Have PyOS_snprintf do the hard work */ +    PyOS_snprintf(buffer, buf_size, format, d); + +    /* Do various fixups on the return string */ + +    /* Get the current locale, and find the decimal point string. +       Convert that string back to a dot.  Do not do this if using the +       'n' (number) format code, since we want to keep the localized +       decimal point in that case. */ +    if (format_char != 'n') +        change_decimal_from_locale_to_dot(buffer); + +    /* If an exponent exists, ensure that the exponent is at least +       MIN_EXPONENT_DIGITS digits, providing the buffer is large enough +       for the extra zeros.  Also, if there are more than +       MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get +       back to MIN_EXPONENT_DIGITS */ +    ensure_minimum_exponent_length(buffer, buf_size); + +    /* If format_char is 'Z', make sure we have at least one character +       after the decimal point (and make sure we have a decimal point). */ +    if (format_char == 'Z') +        ensure_decimal_point(buffer, buf_size); + +    /* If format_char is 'n', add the thousands grouping. */ +    if (format_char == 'n') +        if (!add_thousands_grouping(buffer, buf_size)) +            return NULL; + +    return buffer;  }  double  PyOS_ascii_atof(const char *nptr)  { -	return PyOS_ascii_strtod(nptr, NULL); +    return PyOS_ascii_strtod(nptr, NULL);  } | 
