diff options
author | Paul Ganssle <pganssle@users.noreply.github.com> | 2018-08-23 15:06:20 (GMT) |
---|---|---|
committer | Tal Einat <taleinat+github@gmail.com> | 2018-08-23 15:06:20 (GMT) |
commit | 096329f0b2bf5e3f0a16363aa631d993ce078737 (patch) | |
tree | 503291191bdb7ecd0d42d02bd8a35b72af468d81 /Modules/_datetimemodule.c | |
parent | c33bb5d4016fb2fc8f3b6d4b0c14b73b33cdb3cf (diff) | |
download | cpython-096329f0b2bf5e3f0a16363aa631d993ce078737.zip cpython-096329f0b2bf5e3f0a16363aa631d993ce078737.tar.gz cpython-096329f0b2bf5e3f0a16363aa631d993ce078737.tar.bz2 |
bpo-34454: fix .fromisoformat() methods crashing on inputs with surrogate code points (GH-8862)
The current C implementations **crash** if the input includes a surrogate
Unicode code point, which is not possible to encode in UTF-8.
Important notes:
1. It is possible to pass a non-UTF-8 string as a separator to the
`.isoformat()` methods.
2. The pure-Python `datetime.fromisoformat()` implementation accepts
strings with a surrogate as the separator.
In `datetime.fromisoformat()`, in the special case of non-UTF-8 separators,
this implementation will take a performance hit by making a copy of the
input string and replacing the separator with 'T'.
Co-authored-by: Alexey Izbyshev <izbyshev@ispras.ru>
Co-authored-by: Paul Ganssle <paul@ganssle.io>
Diffstat (limited to 'Modules/_datetimemodule.c')
-rw-r--r-- | Modules/_datetimemodule.c | 81 |
1 files changed, 72 insertions, 9 deletions
diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 076912d..2522b65 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -2883,6 +2883,9 @@ date_fromisoformat(PyObject *cls, PyObject *dtstr) { Py_ssize_t len; const char * dt_ptr = PyUnicode_AsUTF8AndSize(dtstr, &len); + if (dt_ptr == NULL) { + goto invalid_string_error; + } int year = 0, month = 0, day = 0; @@ -2894,12 +2897,15 @@ date_fromisoformat(PyObject *cls, PyObject *dtstr) { } if (rv < 0) { - PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s", - dt_ptr); - return NULL; + goto invalid_string_error; } return new_date_subclass_ex(year, month, day, cls); + +invalid_string_error: + PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %R", + dtstr); + return NULL; } @@ -4258,6 +4264,10 @@ time_fromisoformat(PyObject *cls, PyObject *tstr) { Py_ssize_t len; const char *p = PyUnicode_AsUTF8AndSize(tstr, &len); + if (p == NULL) { + goto invalid_string_error; + } + int hour = 0, minute = 0, second = 0, microsecond = 0; int tzoffset, tzimicrosecond = 0; int rv = parse_isoformat_time(p, len, @@ -4265,8 +4275,7 @@ time_fromisoformat(PyObject *cls, PyObject *tstr) { &tzoffset, &tzimicrosecond); if (rv < 0) { - PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s", p); - return NULL; + goto invalid_string_error; } PyObject *tzinfo = tzinfo_from_isoformat_results(rv, tzoffset, @@ -4286,6 +4295,10 @@ time_fromisoformat(PyObject *cls, PyObject *tstr) { Py_DECREF(tzinfo); return t; + +invalid_string_error: + PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %R", tstr); + return NULL; } @@ -4840,6 +4853,33 @@ datetime_combine(PyObject *cls, PyObject *args, PyObject *kw) } static PyObject * +_sanitize_isoformat_str(PyObject *dtstr, int *needs_decref) { + // `fromisoformat` allows surrogate characters in exactly one position, + // the separator; to allow datetime_fromisoformat to make the simplifying + // assumption that all valid strings can be encoded in UTF-8, this function + // replaces any surrogate character separators with `T`. + Py_ssize_t len = PyUnicode_GetLength(dtstr); + *needs_decref = 0; + if (len <= 10 || !Py_UNICODE_IS_SURROGATE(PyUnicode_READ_CHAR(dtstr, 10))) { + return dtstr; + } + + PyObject *str_out = PyUnicode_New(len, PyUnicode_MAX_CHAR_VALUE(dtstr)); + if (str_out == NULL) { + return NULL; + } + + if (PyUnicode_CopyCharacters(str_out, 0, dtstr, 0, len) == -1 || + PyUnicode_WriteChar(str_out, 10, (Py_UCS4)'T')) { + Py_DECREF(str_out); + return NULL; + } + + *needs_decref = 1; + return str_out; +} + +static PyObject * datetime_fromisoformat(PyObject* cls, PyObject *dtstr) { assert(dtstr != NULL); @@ -4848,9 +4888,20 @@ datetime_fromisoformat(PyObject* cls, PyObject *dtstr) { return NULL; } + int needs_decref = 0; + dtstr = _sanitize_isoformat_str(dtstr, &needs_decref); + if (dtstr == NULL) { + goto error; + } + Py_ssize_t len; const char * dt_ptr = PyUnicode_AsUTF8AndSize(dtstr, &len); - const char * p = dt_ptr; + + if (dt_ptr == NULL) { + goto invalid_string_error; + } + + const char *p = dt_ptr; int year = 0, month = 0, day = 0; int hour = 0, minute = 0, second = 0, microsecond = 0; @@ -4883,20 +4934,32 @@ datetime_fromisoformat(PyObject* cls, PyObject *dtstr) { &tzoffset, &tzusec); } if (rv < 0) { - PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s", dt_ptr); - return NULL; + goto invalid_string_error; } PyObject* tzinfo = tzinfo_from_isoformat_results(rv, tzoffset, tzusec); if (tzinfo == NULL) { - return NULL; + goto error; } PyObject *dt = new_datetime_subclass_ex(year, month, day, hour, minute, second, microsecond, tzinfo, cls); Py_DECREF(tzinfo); + if (needs_decref) { + Py_DECREF(dtstr); + } return dt; + +invalid_string_error: + PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %R", dtstr); + +error: + if (needs_decref) { + Py_DECREF(dtstr); + } + + return NULL; } |