From 6edda14b2968ca07b94a2fdde4dd43215222fcd8 Mon Sep 17 00:00:00 2001 From: Stefan Krah Date: Wed, 29 May 2013 15:45:38 +0200 Subject: Issue #17768: Support newline fill character in decimal.py and NUL fill character in _decimal.c. --- Lib/decimal.py | 2 +- Lib/test/test_decimal.py | 4 +++ Modules/_decimal/_decimal.c | 64 +++++++++++++++++++++++++++++----- Modules/_decimal/tests/deccheck.py | 4 +-- Modules/_decimal/tests/formathelper.py | 6 ++-- 5 files changed, 64 insertions(+), 16 deletions(-) diff --git a/Lib/decimal.py b/Lib/decimal.py index 746b34a..1826deb 100644 --- a/Lib/decimal.py +++ b/Lib/decimal.py @@ -6140,7 +6140,7 @@ _parse_format_specifier_regex = re.compile(r"""\A (?:\.(?P0|(?!0)\d+))? (?P[eEfFgGn%])? \Z -""", re.VERBOSE) +""", re.VERBOSE|re.DOTALL) del re diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py index e8256a5..2969394 100644 --- a/Lib/test/test_decimal.py +++ b/Lib/test/test_decimal.py @@ -1021,6 +1021,10 @@ class FormatTest(unittest.TestCase): ('/=10', '-45.6', '-/////45.6'), ('/=+10', '45.6', '+/////45.6'), ('/= 10', '45.6', ' /////45.6'), + ('\x00=10', '-inf', '-\x00Infinity'), + ('\x00^16', '-inf', '\x00\x00\x00-Infinity\x00\x00\x00\x00'), + ('\x00>10', '1.2345', '\x00\x00\x00\x001.2345'), + ('\x00<10', '1.2345', '1.2345\x00\x00\x00\x00'), # thousands separator (',', '1234567', '1,234,567'), diff --git a/Modules/_decimal/_decimal.c b/Modules/_decimal/_decimal.c index 89386ff..98d49e6 100644 --- a/Modules/_decimal/_decimal.c +++ b/Modules/_decimal/_decimal.c @@ -3096,6 +3096,29 @@ dec_repr(PyObject *dec) return res; } +/* Return a duplicate of src, copy embedded null characters. */ +static char * +dec_strdup(const char *src, Py_ssize_t size) +{ + char *dest = PyMem_Malloc(size+1); + if (dest == NULL) { + return NULL; + } + + memcpy(dest, src, size); + dest[size] = '\0'; + return dest; +} + +static void +dec_replace_fillchar(char *dest) +{ + while (*dest != '\0') { + if (*dest == '\xff') *dest = '\0'; + dest++; + } +} + /* Convert decimal_point or thousands_sep, which may be multibyte or in the range [128, 255], to a UTF8 string. */ static PyObject * @@ -3131,13 +3154,14 @@ dec_format(PyObject *dec, PyObject *args) PyObject *dot = NULL; PyObject *sep = NULL; PyObject *grouping = NULL; - PyObject *fmt = NULL; PyObject *fmtarg; PyObject *context; mpd_spec_t spec; - char *decstring= NULL; + char *fmt; + char *decstring = NULL; uint32_t status = 0; - size_t n; + int replace_fillchar = 0; + Py_ssize_t size; CURRENT_CONTEXT(context); @@ -3146,10 +3170,20 @@ dec_format(PyObject *dec, PyObject *args) } if (PyUnicode_Check(fmtarg)) { - fmt = PyUnicode_AsUTF8String(fmtarg); + fmt = PyUnicode_AsUTF8AndSize(fmtarg, &size); if (fmt == NULL) { return NULL; } + if (size > 0 && fmt[0] == '\0') { + /* NUL fill character: must be replaced with a valid UTF-8 char + before calling mpd_parse_fmt_str(). */ + replace_fillchar = 1; + fmt = dec_strdup(fmt, size); + if (fmt == NULL) { + return NULL; + } + fmt[0] = '_'; + } } else { PyErr_SetString(PyExc_TypeError, @@ -3157,12 +3191,19 @@ dec_format(PyObject *dec, PyObject *args) return NULL; } - if (!mpd_parse_fmt_str(&spec, PyBytes_AS_STRING(fmt), - CtxCaps(context))) { + if (!mpd_parse_fmt_str(&spec, fmt, CtxCaps(context))) { PyErr_SetString(PyExc_ValueError, "invalid format string"); goto finish; } + if (replace_fillchar) { + /* In order to avoid clobbering parts of UTF-8 thousands separators or + decimal points when the substitution is reversed later, the actual + placeholder must be an invalid UTF-8 byte. */ + spec.fill[0] = '\xff'; + spec.fill[1] = '\0'; + } + if (override) { /* Values for decimal_point, thousands_sep and grouping can be explicitly specified in the override dict. These values @@ -3199,7 +3240,7 @@ dec_format(PyObject *dec, PyObject *args) } } else { - n = strlen(spec.dot); + size_t n = strlen(spec.dot); if (n > 1 || (n == 1 && !isascii((uchar)spec.dot[0]))) { /* fix locale dependent non-ascii characters */ dot = dotsep_as_utf8(spec.dot); @@ -3231,14 +3272,19 @@ dec_format(PyObject *dec, PyObject *args) } goto finish; } - result = PyUnicode_DecodeUTF8(decstring, strlen(decstring), NULL); + size = strlen(decstring); + if (replace_fillchar) { + dec_replace_fillchar(decstring); + } + + result = PyUnicode_DecodeUTF8(decstring, size, NULL); finish: Py_XDECREF(grouping); Py_XDECREF(sep); Py_XDECREF(dot); - Py_XDECREF(fmt); + if (replace_fillchar) PyMem_Free(fmt); if (decstring) mpd_free(decstring); return result; } diff --git a/Modules/_decimal/tests/deccheck.py b/Modules/_decimal/tests/deccheck.py index a2853ad..7a6b410 100644 --- a/Modules/_decimal/tests/deccheck.py +++ b/Modules/_decimal/tests/deccheck.py @@ -891,7 +891,7 @@ def test_ternary(method, prec, exp_range, restricted_range, itr, stat): def test_format(method, prec, exp_range, restricted_range, itr, stat): """Iterate the __format__ method through many test cases.""" for op in all_unary(prec, exp_range, itr): - fmt1 = rand_format(chr(random.randrange(32, 128)), 'EeGgn') + fmt1 = rand_format(chr(random.randrange(0, 128)), 'EeGgn') fmt2 = rand_locale() for fmt in (fmt1, fmt2): fmtop = (op[0], fmt) @@ -904,7 +904,7 @@ def test_format(method, prec, exp_range, restricted_range, itr, stat): except VerifyError as err: log(err) for op in all_unary(prec, 9999, itr): - fmt1 = rand_format(chr(random.randrange(32, 128)), 'Ff%') + fmt1 = rand_format(chr(random.randrange(0, 128)), 'Ff%') fmt2 = rand_locale() for fmt in (fmt1, fmt2): fmtop = (op[0], fmt) diff --git a/Modules/_decimal/tests/formathelper.py b/Modules/_decimal/tests/formathelper.py index 9da2b37..19b2aad 100644 --- a/Modules/_decimal/tests/formathelper.py +++ b/Modules/_decimal/tests/formathelper.py @@ -215,8 +215,6 @@ def check_fillchar(i): c = chr(i) c.encode('utf-8').decode() format(P.Decimal(0), c + '<19g') - if c in ("'", '"', '\\'): - return None return c except: return None @@ -224,14 +222,14 @@ def check_fillchar(i): # Generate all unicode characters that are accepted as # fill characters by decimal.py. def all_fillchars(): - for i in range(32, 0x110002): + for i in range(0, 0x110002): c = check_fillchar(i) if c: yield c # Return random fill character. def rand_fillchar(): while 1: - i = random.randrange(32, 0x110002) + i = random.randrange(0, 0x110002) c = check_fillchar(i) if c: return c -- cgit v0.12