diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2004-06-08 18:52:54 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2004-06-08 18:52:54 (GMT) |
commit | 737ea82a5abd448b3e214b44d7d3c579b77e8155 (patch) | |
tree | b0f710ba716db5247dbb6ac88e0a20ea8623f716 | |
parent | 6ccc9a99dfbb2575daa8e01a8e8e3531b61a9d60 (diff) | |
download | cpython-737ea82a5abd448b3e214b44d7d3c579b77e8155.zip cpython-737ea82a5abd448b3e214b44d7d3c579b77e8155.tar.gz cpython-737ea82a5abd448b3e214b44d7d3c579b77e8155.tar.bz2 |
Patch #774665: Make Python LC_NUMERIC agnostic.
-rw-r--r-- | Doc/lib/liblocale.tex | 19 | ||||
-rw-r--r-- | Include/Python.h | 2 | ||||
-rw-r--r-- | Include/pystrtod.h | 18 | ||||
-rw-r--r-- | Makefile.pre.in | 1 | ||||
-rw-r--r-- | Misc/NEWS | 4 | ||||
-rw-r--r-- | Modules/_localemodule.c | 70 | ||||
-rw-r--r-- | Modules/cPickle.c | 2 | ||||
-rw-r--r-- | Modules/stropmodule.c | 3 | ||||
-rw-r--r-- | Objects/complexobject.c | 23 | ||||
-rw-r--r-- | Objects/floatobject.c | 8 | ||||
-rw-r--r-- | Objects/stringobject.c | 2 | ||||
-rw-r--r-- | Python/compile.c | 4 | ||||
-rw-r--r-- | Python/marshal.c | 6 | ||||
-rw-r--r-- | Python/pystrtod.c | 258 |
14 files changed, 317 insertions, 103 deletions
diff --git a/Doc/lib/liblocale.tex b/Doc/lib/liblocale.tex index 967c759..bc96189 100644 --- a/Doc/lib/liblocale.tex +++ b/Doc/lib/liblocale.tex @@ -456,25 +456,6 @@ find out what the current locale is. But since the return value can only be used portably to restore it, that is not very useful (except perhaps to find out whether or not the locale is \samp{C}). -When Python is embedded in an application, if the application sets the -locale to something specific before initializing Python, that is -generally okay, and Python will use whatever locale is set, -\emph{except} that the \constant{LC_NUMERIC} locale should always be -\samp{C}. - -The \function{setlocale()} function in the \module{locale} module -gives the Python programmer the impression that you can manipulate the -\constant{LC_NUMERIC} locale setting, but this not the case at the C -level: C code will always find that the \constant{LC_NUMERIC} locale -setting is \samp{C}. This is because too much would break when the -decimal point character is set to something else than a period -(e.g. the Python parser would break). Caveat: threads that run -without holding Python's global interpreter lock may occasionally find -that the numeric locale setting differs; this is because the only -portable way to implement this feature is to set the numeric locale -settings to what the user requests, extract the relevant -characteristics, and then restore the \samp{C} numeric locale. - When Python code uses the \module{locale} module to change the locale, this also affects the embedding application. If the embedding application doesn't want this to happen, it should remove the diff --git a/Include/Python.h b/Include/Python.h index f332836..0d9a797 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -119,6 +119,8 @@ #include "compile.h" #include "eval.h" +#include "pystrtod.h" + /* _Py_Mangle is defined in compile.c */ PyAPI_FUNC(int) _Py_Mangle(char *p, char *name, \ char *buffer, size_t maxlen); diff --git a/Include/pystrtod.h b/Include/pystrtod.h new file mode 100644 index 0000000..e4e5e52 --- /dev/null +++ b/Include/pystrtod.h @@ -0,0 +1,18 @@ +#ifndef Py_STRTOD_H +#define Py_STRTOD_H + +#ifdef __cplusplus +extern "C" { +#endif + + +double PyOS_ascii_strtod(const char *str, char **ptr); +double PyOS_ascii_atof(const char *str); +char * PyOS_ascii_formatd(char *buffer, int buf_len, const char *format, double d); + + +#ifdef __cplusplus +} +#endif + +#endif /* !Py_STRTOD_H */ diff --git a/Makefile.pre.in b/Makefile.pre.in index e4a12b0..8ac3143 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -247,6 +247,7 @@ PYTHON_OBJS= \ Python/sysmodule.o \ Python/traceback.o \ Python/getopt.o \ + Python/pystrtod.o \ Python/$(DYNLOADFILE) \ $(MACHDEP_OBJS) \ $(THREADOBJ) @@ -12,6 +12,10 @@ What's New in Python 2.4 alpha 1? Core and builtins ----------------- +- Python no longer relies on the LC_NUMERIC locale setting to be + the "C" locale; as a result, it no longer tries to prevent changing + the LC_NUMERIC category. + - Bug #952807: Unpickling pickled instances of subclasses of datetime.date, datetime.datetime and datetime.time could yield insane objects. Thanks to Jiwon Seo for a fix. diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index 5edb7f3..0ab79cb 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -51,13 +51,6 @@ static PyObject *Error; PyDoc_STRVAR(setlocale__doc__, "(integer,string=None) -> string. Activates/queries locale processing."); -/* to record the LC_NUMERIC settings */ -static PyObject* grouping = NULL; -static PyObject* thousands_sep = NULL; -static PyObject* decimal_point = NULL; -/* if non-null, indicates that LC_NUMERIC is different from "C" */ -static char* saved_numeric = NULL; - /* the grouping is terminated by either 0 or CHAR_MAX */ static PyObject* copy_grouping(char* s) @@ -167,7 +160,6 @@ PyLocale_setlocale(PyObject* self, PyObject* args) int category; char *locale = NULL, *result; PyObject *result_object; - struct lconv *lc; if (!PyArg_ParseTuple(args, "i|z:setlocale", &category, &locale)) return NULL; @@ -183,29 +175,6 @@ PyLocale_setlocale(PyObject* self, PyObject* args) result_object = PyString_FromString(result); if (!result_object) return NULL; - /* record changes to LC_NUMERIC */ - if (category == LC_NUMERIC || category == LC_ALL) { - if (strcmp(locale, "C") == 0 || strcmp(locale, "POSIX") == 0) { - /* user just asked for default numeric locale */ - if (saved_numeric) - free(saved_numeric); - saved_numeric = NULL; - } else { - /* remember values */ - lc = localeconv(); - Py_XDECREF(grouping); - grouping = copy_grouping(lc->grouping); - Py_XDECREF(thousands_sep); - thousands_sep = PyString_FromString(lc->thousands_sep); - Py_XDECREF(decimal_point); - decimal_point = PyString_FromString(lc->decimal_point); - if (saved_numeric) - free(saved_numeric); - saved_numeric = strdup(locale); - /* restore to "C" */ - setlocale(LC_NUMERIC, "C"); - } - } /* record changes to LC_CTYPE */ if (category == LC_CTYPE || category == LC_ALL) fixup_ulcase(); @@ -213,18 +182,12 @@ PyLocale_setlocale(PyObject* self, PyObject* args) PyErr_Clear(); } else { /* get locale */ - /* restore LC_NUMERIC first, if appropriate */ - if (saved_numeric) - setlocale(LC_NUMERIC, saved_numeric); result = setlocale(category, NULL); if (!result) { PyErr_SetString(Error, "locale query failed"); return NULL; } result_object = PyString_FromString(result); - /* restore back to "C" */ - if (saved_numeric) - setlocale(LC_NUMERIC, "C"); } return result_object; } @@ -262,20 +225,13 @@ PyLocale_localeconv(PyObject* self) Py_XDECREF(x) /* Numeric information */ - if (saved_numeric){ - /* cannot use localeconv results */ - PyDict_SetItemString(result, "decimal_point", decimal_point); - PyDict_SetItemString(result, "grouping", grouping); - PyDict_SetItemString(result, "thousands_sep", thousands_sep); - } else { - RESULT_STRING(decimal_point); - RESULT_STRING(thousands_sep); - x = copy_grouping(l->grouping); - if (!x) - goto failed; - PyDict_SetItemString(result, "grouping", x); - Py_XDECREF(x); - } + RESULT_STRING(decimal_point); + RESULT_STRING(thousands_sep); + x = copy_grouping(l->grouping); + if (!x) + goto failed; + PyDict_SetItemString(result, "grouping", x); + Py_XDECREF(x); /* Monetary information */ RESULT_STRING(int_curr_symbol); @@ -579,18 +535,6 @@ PyLocale_nl_langinfo(PyObject* self, PyObject* args) /* Check whether this is a supported constant. GNU libc sometimes returns numeric values in the char* return value, which would crash PyString_FromString. */ -#ifdef RADIXCHAR - if (saved_numeric) { - if(item == RADIXCHAR) { - Py_INCREF(decimal_point); - return decimal_point; - } - if(item == THOUSEP) { - Py_INCREF(thousands_sep); - return thousands_sep; - } - } -#endif for (i = 0; langinfo_constants[i].name; i++) if (langinfo_constants[i].value == item) { /* Check NULL as a workaround for GNU libc's returning NULL diff --git a/Modules/cPickle.c b/Modules/cPickle.c index 105a2e9..e5e20cb 100644 --- a/Modules/cPickle.c +++ b/Modules/cPickle.c @@ -3319,7 +3319,7 @@ load_float(Unpicklerobject *self) if (!( s=pystrndup(s,len))) return -1; errno = 0; - d = strtod(s, &endptr); + d = PyOS_ascii_strtod(s, &endptr); if (errno || (endptr[0] != '\n') || (endptr[1] != '\0')) { PyErr_SetString(PyExc_ValueError, diff --git a/Modules/stropmodule.c b/Modules/stropmodule.c index 8eb64a0..ce19a05 100644 --- a/Modules/stropmodule.c +++ b/Modules/stropmodule.c @@ -838,7 +838,6 @@ PyDoc_STRVAR(atof__doc__, static PyObject * strop_atof(PyObject *self, PyObject *args) { - extern double strtod(const char *, char **); char *s, *end; double x; char buffer[256]; /* For errors */ @@ -854,7 +853,7 @@ strop_atof(PyObject *self, PyObject *args) } errno = 0; PyFPE_START_PROTECT("strop_atof", return 0) - x = strtod(s, &end); + x = PyOS_ascii_strtod(s, &end); PyFPE_END_PROTECT(x) while (*end && isspace(Py_CHARMASK(*end))) end++; diff --git a/Objects/complexobject.c b/Objects/complexobject.c index c29d48d..4023fa0 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -272,13 +272,19 @@ complex_dealloc(PyObject *op) static void complex_to_buf(char *buf, int bufsz, PyComplexObject *v, int precision) { - if (v->cval.real == 0.) - PyOS_snprintf(buf, bufsz, "%.*gj", - precision, v->cval.imag); - else - PyOS_snprintf(buf, bufsz, "(%.*g%+.*gj)", - precision, v->cval.real, - precision, v->cval.imag); + char format[32]; + if (v->cval.real == 0.) { + PyOS_snprintf(format, 32, "%%.%ig", precision); + PyOS_ascii_formatd(buf, bufsz, format, v->cval.imag); + strncat(buf, "j", bufsz); + } else { + char re[64], im[64]; + + PyOS_snprintf(format, 32, "%%.%ig", precision); + PyOS_ascii_formatd(re, 64, format, v->cval.real); + PyOS_ascii_formatd(im, 64, format, v->cval.imag); + PyOS_snprintf(buf, bufsz, "(%s+%sj)", re, im); + } } static int @@ -662,7 +668,6 @@ static PyMemberDef complex_members[] = { static PyObject * complex_subtype_from_string(PyTypeObject *type, PyObject *v) { - extern double strtod(const char *, char **); const char *s, *start; char *end; double x=0.0, y=0.0, z; @@ -774,7 +779,7 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v) } errno = 0; PyFPE_START_PROTECT("strtod", return 0) - z = strtod(s, &end) ; + z = PyOS_ascii_strtod(s, &end) ; PyFPE_END_PROTECT(z) if (errno != 0) { PyOS_snprintf(buffer, sizeof(buffer), diff --git a/Objects/floatobject.c b/Objects/floatobject.c index f1c8e42..bbf56c6 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -132,7 +132,7 @@ PyFloat_FromString(PyObject *v, char **pend) * key off errno. */ PyFPE_START_PROTECT("strtod", return NULL) - x = strtod(s, (char **)&end); + x = PyOS_ascii_strtod(s, (char **)&end); PyFPE_END_PROTECT(x) errno = 0; /* Believe it or not, Solaris 2.6 can move end *beyond* the null @@ -164,7 +164,7 @@ PyFloat_FromString(PyObject *v, char **pend) /* See above -- may have been strtod being anal about denorms. */ PyFPE_START_PROTECT("atof", return NULL) - x = atof(s); + x = PyOS_ascii_atof(s); PyFPE_END_PROTECT(x) errno = 0; /* whether atof ever set errno is undefined */ } @@ -223,6 +223,7 @@ static void format_float(char *buf, size_t buflen, PyFloatObject *v, int precision) { register char *cp; + char format[32]; /* Subroutine for float_repr and float_print. We want float numbers to be recognizable as such, i.e., they should contain a decimal point or an exponent. @@ -230,7 +231,8 @@ format_float(char *buf, size_t buflen, PyFloatObject *v, int precision) in such cases, we append ".0" to the string. */ assert(PyFloat_Check(v)); - PyOS_snprintf(buf, buflen, "%.*g", precision, v->ob_fval); + PyOS_snprintf(format, 32, "%%.%ig", precision); + PyOS_ascii_formatd(buf, buflen, format, v->ob_fval); cp = buf; if (*cp == '-') cp++; diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 361d84d..b14dc51 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -3582,7 +3582,7 @@ formatfloat(char *buf, size_t buflen, int flags, PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type); - PyOS_snprintf(buf, buflen, fmt, x); + PyOS_ascii_formatd(buf, buflen, fmt, x); return strlen(buf); } diff --git a/Python/compile.c b/Python/compile.c index 15159f8..dd80ae4 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -1379,7 +1379,7 @@ parsenumber(struct compiling *c, char *s) Py_complex z; z.real = 0.; PyFPE_START_PROTECT("atof", return 0) - z.imag = atof(s); + z.imag = PyOS_ascii_atof(s); PyFPE_END_PROTECT(z) return PyComplex_FromCComplex(z); } @@ -1387,7 +1387,7 @@ parsenumber(struct compiling *c, char *s) #endif { PyFPE_START_PROTECT("atof", return 0) - dx = atof(s); + dx = PyOS_ascii_atof(s); PyFPE_END_PROTECT(dx) return PyFloat_FromDouble(dx); } diff --git a/Python/marshal.c b/Python/marshal.c index d3cd659..c253119 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -457,7 +457,7 @@ r_object(RFILE *p) } buf[n] = '\0'; PyFPE_START_PROTECT("atof", return 0) - dx = atof(buf); + dx = PyOS_ascii_atof(buf); PyFPE_END_PROTECT(dx) return PyFloat_FromDouble(dx); } @@ -475,7 +475,7 @@ r_object(RFILE *p) } buf[n] = '\0'; PyFPE_START_PROTECT("atof", return 0) - c.real = atof(buf); + c.real = PyOS_ascii_atof(buf); PyFPE_END_PROTECT(c) n = r_byte(p); if (n == EOF || r_string(buf, (int)n, p) != n) { @@ -485,7 +485,7 @@ r_object(RFILE *p) } buf[n] = '\0'; PyFPE_START_PROTECT("atof", return 0) - c.imag = atof(buf); + c.imag = PyOS_ascii_atof(buf); PyFPE_END_PROTECT(c) return PyComplex_FromCComplex(c); } diff --git a/Python/pystrtod.c b/Python/pystrtod.c new file mode 100644 index 0000000..ab25799 --- /dev/null +++ b/Python/pystrtod.c @@ -0,0 +1,258 @@ +/* -*- Mode: C; c-file-style: "python" -*- */ + +#include <Python.h> +#include <locale.h> + +/* ascii character tests (as opposed to locale tests) */ +#define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \ + (c) == '\r' || (c) == '\t' || (c) == '\v') +#define ISDIGIT(c) ((c) >= '0' && (c) <= '9') +#define ISXDIGIT(c) (ISDIGIT(c) || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F')) + + +/** + * PyOS_ascii_strtod: + * @nptr: the string to convert to a numeric value. + * @endptr: if non-%NULL, it returns the character after + * the last character used in the conversion. + * + * Converts a string to a #gdouble value. + * This function behaves like the standard strtod() function + * does in the C locale. It does this without actually + * changing the current locale, since that would not be + * thread-safe. + * + * This function is typically used when reading configuration + * files or other non-user input that should be locale independent. + * To handle input from the user you should normally use the + * locale-sensitive system strtod() function. + * + * If the correct value would cause overflow, plus or minus %HUGE_VAL + * is returned (according to the sign of the value), and %ERANGE is + * stored in %errno. If the correct value would cause underflow, + * zero is returned and %ERANGE is stored in %errno. + * + * This function resets %errno before calling strtod() so that + * you can reliably detect overflow and underflow. + * + * Return value: the #gdouble value. + **/ +double +PyOS_ascii_strtod(const char *nptr, + char **endptr) +{ + char *fail_pos; + double val; + struct lconv *locale_data; + const char *decimal_point; + int decimal_point_len; + const char *p, *decimal_point_pos; + const char *end = NULL; /* Silence gcc */ + +/* g_return_val_if_fail (nptr != NULL, 0); */ + assert(nptr != NULL); + + fail_pos = NULL; + + locale_data = localeconv(); + decimal_point = locale_data->decimal_point; + decimal_point_len = strlen(decimal_point); + + assert(decimal_point_len != 0); + + decimal_point_pos = NULL; + if (decimal_point[0] != '.' || + decimal_point[1] != 0) + { + p = nptr; + /* Skip leading space */ + while (ISSPACE(*p)) + p++; + + /* Skip leading optional sign */ + if (*p == '+' || *p == '-') + p++; + + if (p[0] == '0' && + (p[1] == 'x' || p[1] == 'X')) + { + p += 2; + /* HEX - find the (optional) decimal point */ + + while (ISXDIGIT(*p)) + p++; + + if (*p == '.') + { + decimal_point_pos = p++; + + while (ISXDIGIT(*p)) + p++; + + if (*p == 'p' || *p == 'P') + p++; + if (*p == '+' || *p == '-') + p++; + while (ISDIGIT(*p)) + p++; + end = p; + } + } + else + { + while (ISDIGIT(*p)) + p++; + + if (*p == '.') + { + decimal_point_pos = p++; + + while (ISDIGIT(*p)) + p++; + + if (*p == 'e' || *p == 'E') + p++; + if (*p == '+' || *p == '-') + p++; + while (ISDIGIT(*p)) + p++; + end = p; + } + } + /* For the other cases, we need not convert the decimal point */ + } + + /* Set errno to zero, so that we can distinguish zero results + and underflows */ + errno = 0; + + if (decimal_point_pos) + { + char *copy, *c; + + /* We need to convert the '.' to the locale specific decimal point */ + copy = malloc(end - nptr + 1 + decimal_point_len); + + c = copy; + memcpy(c, nptr, decimal_point_pos - nptr); + c += decimal_point_pos - nptr; + memcpy(c, decimal_point, decimal_point_len); + c += decimal_point_len; + memcpy(c, decimal_point_pos + 1, end - (decimal_point_pos + 1)); + c += end - (decimal_point_pos + 1); + *c = 0; + + val = strtod(copy, &fail_pos); + + if (fail_pos) + { + if (fail_pos > decimal_point_pos) + fail_pos = (char *)nptr + (fail_pos - copy) - (decimal_point_len - 1); + else + fail_pos = (char *)nptr + (fail_pos - copy); + } + + free(copy); + + } + else + val = strtod(nptr, &fail_pos); + + if (endptr) + *endptr = fail_pos; + + return val; +} + + +/** + * PyOS_ascii_formatd: + * @buffer: A buffer to place the resulting string in + * @buf_len: The length of the buffer. + * @format: The printf()-style format to use for the + * code to use for converting. + * @d: The #gdouble to convert + * + * Converts a #gdouble to a string, using the '.' as + * decimal point. To format the number you pass in + * a printf()-style format string. Allowed conversion + * specifiers are 'e', 'E', 'f', 'F', 'g' and 'G'. + * + * Return value: The pointer to the buffer with the converted string. + **/ +char * +PyOS_ascii_formatd(char *buffer, + int buf_len, + const char *format, + double d) +{ + struct lconv *locale_data; + const char *decimal_point; + int decimal_point_len; + char *p; + int rest_len; + char format_char; + +/* g_return_val_if_fail (buffer != NULL, NULL); */ +/* g_return_val_if_fail (format[0] == '%', NULL); */ +/* g_return_val_if_fail (strpbrk (format + 1, "'l%") == NULL, NULL); */ + + format_char = format[strlen(format) - 1]; + +/* g_return_val_if_fail (format_char == 'e' || format_char == 'E' || */ +/* format_char == 'f' || format_char == 'F' || */ +/* format_char == 'g' || format_char == 'G', */ +/* NULL); */ + + if (format[0] != '%') + return NULL; + + if (strpbrk(format + 1, "'l%")) + return NULL; + + if (!(format_char == 'e' || format_char == 'E' || + format_char == 'f' || format_char == 'F' || + format_char == 'g' || format_char == 'G')) + return NULL; + + + PyOS_snprintf(buffer, buf_len, format, d); + + locale_data = localeconv(); + decimal_point = locale_data->decimal_point; + decimal_point_len = strlen(decimal_point); + + assert(decimal_point_len != 0); + + if (decimal_point[0] != '.' || + decimal_point[1] != 0) + { + p = buffer; + + if (*p == '+' || *p == '-') + p++; + + while (isdigit((unsigned char)*p)) + p++; + + if (strncmp(p, decimal_point, decimal_point_len) == 0) + { + *p = '.'; + p++; + if (decimal_point_len > 1) { + rest_len = strlen(p + (decimal_point_len - 1)); + memmove(p, p + (decimal_point_len - 1), + rest_len); + p[rest_len] = 0; + } + } + } + + return buffer; +} + +double +PyOS_ascii_atof(const char *nptr) +{ + return PyOS_ascii_strtod(nptr, NULL); +} |