summaryrefslogtreecommitdiffstats
path: root/Modules/_datetimemodule.c
diff options
context:
space:
mode:
authorPaul Ganssle <pganssle@users.noreply.github.com>2018-08-23 15:06:20 (GMT)
committerTal Einat <taleinat+github@gmail.com>2018-08-23 15:06:20 (GMT)
commit096329f0b2bf5e3f0a16363aa631d993ce078737 (patch)
tree503291191bdb7ecd0d42d02bd8a35b72af468d81 /Modules/_datetimemodule.c
parentc33bb5d4016fb2fc8f3b6d4b0c14b73b33cdb3cf (diff)
downloadcpython-096329f0b2bf5e3f0a16363aa631d993ce078737.zip
cpython-096329f0b2bf5e3f0a16363aa631d993ce078737.tar.gz
cpython-096329f0b2bf5e3f0a16363aa631d993ce078737.tar.bz2
bpo-34454: fix .fromisoformat() methods crashing on inputs with surrogate code points (GH-8862)
The current C implementations **crash** if the input includes a surrogate Unicode code point, which is not possible to encode in UTF-8. Important notes: 1. It is possible to pass a non-UTF-8 string as a separator to the `.isoformat()` methods. 2. The pure-Python `datetime.fromisoformat()` implementation accepts strings with a surrogate as the separator. In `datetime.fromisoformat()`, in the special case of non-UTF-8 separators, this implementation will take a performance hit by making a copy of the input string and replacing the separator with 'T'. Co-authored-by: Alexey Izbyshev <izbyshev@ispras.ru> Co-authored-by: Paul Ganssle <paul@ganssle.io>
Diffstat (limited to 'Modules/_datetimemodule.c')
-rw-r--r--Modules/_datetimemodule.c81
1 files changed, 72 insertions, 9 deletions
diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c
index 076912d..2522b65 100644
--- a/Modules/_datetimemodule.c
+++ b/Modules/_datetimemodule.c
@@ -2883,6 +2883,9 @@ date_fromisoformat(PyObject *cls, PyObject *dtstr) {
Py_ssize_t len;
const char * dt_ptr = PyUnicode_AsUTF8AndSize(dtstr, &len);
+ if (dt_ptr == NULL) {
+ goto invalid_string_error;
+ }
int year = 0, month = 0, day = 0;
@@ -2894,12 +2897,15 @@ date_fromisoformat(PyObject *cls, PyObject *dtstr) {
}
if (rv < 0) {
- PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s",
- dt_ptr);
- return NULL;
+ goto invalid_string_error;
}
return new_date_subclass_ex(year, month, day, cls);
+
+invalid_string_error:
+ PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %R",
+ dtstr);
+ return NULL;
}
@@ -4258,6 +4264,10 @@ time_fromisoformat(PyObject *cls, PyObject *tstr) {
Py_ssize_t len;
const char *p = PyUnicode_AsUTF8AndSize(tstr, &len);
+ if (p == NULL) {
+ goto invalid_string_error;
+ }
+
int hour = 0, minute = 0, second = 0, microsecond = 0;
int tzoffset, tzimicrosecond = 0;
int rv = parse_isoformat_time(p, len,
@@ -4265,8 +4275,7 @@ time_fromisoformat(PyObject *cls, PyObject *tstr) {
&tzoffset, &tzimicrosecond);
if (rv < 0) {
- PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s", p);
- return NULL;
+ goto invalid_string_error;
}
PyObject *tzinfo = tzinfo_from_isoformat_results(rv, tzoffset,
@@ -4286,6 +4295,10 @@ time_fromisoformat(PyObject *cls, PyObject *tstr) {
Py_DECREF(tzinfo);
return t;
+
+invalid_string_error:
+ PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %R", tstr);
+ return NULL;
}
@@ -4840,6 +4853,33 @@ datetime_combine(PyObject *cls, PyObject *args, PyObject *kw)
}
static PyObject *
+_sanitize_isoformat_str(PyObject *dtstr, int *needs_decref) {
+ // `fromisoformat` allows surrogate characters in exactly one position,
+ // the separator; to allow datetime_fromisoformat to make the simplifying
+ // assumption that all valid strings can be encoded in UTF-8, this function
+ // replaces any surrogate character separators with `T`.
+ Py_ssize_t len = PyUnicode_GetLength(dtstr);
+ *needs_decref = 0;
+ if (len <= 10 || !Py_UNICODE_IS_SURROGATE(PyUnicode_READ_CHAR(dtstr, 10))) {
+ return dtstr;
+ }
+
+ PyObject *str_out = PyUnicode_New(len, PyUnicode_MAX_CHAR_VALUE(dtstr));
+ if (str_out == NULL) {
+ return NULL;
+ }
+
+ if (PyUnicode_CopyCharacters(str_out, 0, dtstr, 0, len) == -1 ||
+ PyUnicode_WriteChar(str_out, 10, (Py_UCS4)'T')) {
+ Py_DECREF(str_out);
+ return NULL;
+ }
+
+ *needs_decref = 1;
+ return str_out;
+}
+
+static PyObject *
datetime_fromisoformat(PyObject* cls, PyObject *dtstr) {
assert(dtstr != NULL);
@@ -4848,9 +4888,20 @@ datetime_fromisoformat(PyObject* cls, PyObject *dtstr) {
return NULL;
}
+ int needs_decref = 0;
+ dtstr = _sanitize_isoformat_str(dtstr, &needs_decref);
+ if (dtstr == NULL) {
+ goto error;
+ }
+
Py_ssize_t len;
const char * dt_ptr = PyUnicode_AsUTF8AndSize(dtstr, &len);
- const char * p = dt_ptr;
+
+ if (dt_ptr == NULL) {
+ goto invalid_string_error;
+ }
+
+ const char *p = dt_ptr;
int year = 0, month = 0, day = 0;
int hour = 0, minute = 0, second = 0, microsecond = 0;
@@ -4883,20 +4934,32 @@ datetime_fromisoformat(PyObject* cls, PyObject *dtstr) {
&tzoffset, &tzusec);
}
if (rv < 0) {
- PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s", dt_ptr);
- return NULL;
+ goto invalid_string_error;
}
PyObject* tzinfo = tzinfo_from_isoformat_results(rv, tzoffset, tzusec);
if (tzinfo == NULL) {
- return NULL;
+ goto error;
}
PyObject *dt = new_datetime_subclass_ex(year, month, day, hour, minute,
second, microsecond, tzinfo, cls);
Py_DECREF(tzinfo);
+ if (needs_decref) {
+ Py_DECREF(dtstr);
+ }
return dt;
+
+invalid_string_error:
+ PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %R", dtstr);
+
+error:
+ if (needs_decref) {
+ Py_DECREF(dtstr);
+ }
+
+ return NULL;
}