diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2015-10-02 10:14:03 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2015-10-02 10:14:03 (GMT) |
commit | 29e68edbf47b708c1f7082c2235d82f41e747635 (patch) | |
tree | 554941fc129b51181ea2aa7799a967a3042ebcec /Objects/unicodeobject.c | |
parent | 5dbe245ef238762c8e1100885e8671bf2e089157 (diff) | |
parent | 58c8f2bb6de115b620cec3cf995f04005573765c (diff) | |
download | cpython-29e68edbf47b708c1f7082c2235d82f41e747635.zip cpython-29e68edbf47b708c1f7082c2235d82f41e747635.tar.gz cpython-29e68edbf47b708c1f7082c2235d82f41e747635.tar.bz2 |
Issue #24848: Fixed bugs in UTF-7 decoding of misformed data:
1. Non-ASCII bytes were accepted after shift sequence.
2. A low surrogate could be emitted in case of error in high surrogate.
3. In some circumstances the '\xfd' character was produced instead of the
replacement character '\ufffd' (due to a bug in _PyUnicodeWriter).
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 21 |
1 files changed, 12 insertions, 9 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 93c4ad9..4fd0430 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4360,31 +4360,31 @@ PyUnicode_DecodeUTF7Stateful(const char *s, } else { /* now leaving a base-64 section */ inShift = 0; - s++; - if (surrogate) { - if (_PyUnicodeWriter_WriteCharInline(&writer, surrogate) < 0) - goto onError; - surrogate = 0; - } if (base64bits > 0) { /* left-over bits */ if (base64bits >= 6) { /* We've seen at least one base-64 character */ + s++; errmsg = "partial character in shift sequence"; goto utf7Error; } else { /* Some bits remain; they should be zero */ if (base64buffer != 0) { + s++; errmsg = "non-zero padding bits in shift sequence"; goto utf7Error; } } } - if (ch != '-') { + if (surrogate && DECODE_DIRECT(ch)) { + if (_PyUnicodeWriter_WriteCharInline(&writer, surrogate) < 0) + goto onError; + } + surrogate = 0; + if (ch == '-') { /* '-' is absorbed; other terminating characters are preserved */ - if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0) - goto onError; + s++; } } } @@ -4398,6 +4398,7 @@ PyUnicode_DecodeUTF7Stateful(const char *s, } else { /* begin base64-encoded section */ inShift = 1; + surrogate = 0; shiftOutStart = writer.pos; base64bits = 0; base64buffer = 0; @@ -4429,6 +4430,7 @@ utf7Error: if (inShift && !consumed) { /* in shift sequence, no more to follow */ /* if we're in an inconsistent state, that's an error */ + inShift = 0; if (surrogate || (base64bits >= 6) || (base64bits > 0 && base64buffer != 0)) { @@ -13366,6 +13368,7 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, if (maxchar > writer->maxchar || writer->readonly) { /* resize + widen */ + maxchar = Py_MAX(maxchar, writer->maxchar); newbuffer = PyUnicode_New(newlen, maxchar); if (newbuffer == NULL) return -1; |