Issue #24848: Fixed bugs in UTF-7 decoding of misformed data:

1. Non-ASCII bytes were accepted after shift sequence. 2. A low surrogate could be emitted in case of error in high surrogate. 3. In some circumstances the '\xfd' character was produced instead of the replacement character '\ufffd' (due to a bug in _PyUnicodeWriter).
author: Serhiy Storchaka <storchaka@gmail.com> 2015-10-02 10:14:03 (GMT)
committer: Serhiy Storchaka <storchaka@gmail.com> 2015-10-02 10:14:03 (GMT)
commit: 29e68edbf47b708c1f7082c2235d82f41e747635 (patch)
tree: 554941fc129b51181ea2aa7799a967a3042ebcec /Objects/unicodeobject.c
parent: 5dbe245ef238762c8e1100885e8671bf2e089157 (diff)
parent: 58c8f2bb6de115b620cec3cf995f04005573765c (diff)
download: cpython-29e68edbf47b708c1f7082c2235d82f41e747635.zip
cpython-29e68edbf47b708c1f7082c2235d82f41e747635.tar.gz
cpython-29e68edbf47b708c1f7082c2235d82f41e747635.tar.bz2
1 files changed, 12 insertions, 9 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 93c4ad9..4fd0430 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -4360,31 +4360,31 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
             }
             else { /* now leaving a base-64 section */
                 inShift = 0;
-                s++;
-                if (surrogate) {
-                    if (_PyUnicodeWriter_WriteCharInline(&writer, surrogate) < 0)
-                        goto onError;
-                    surrogate = 0;
-                }
                 if (base64bits > 0) { /* left-over bits */
                     if (base64bits >= 6) {
                         /* We've seen at least one base-64 character */
+                        s++;
                         errmsg = "partial character in shift sequence";
                         goto utf7Error;
                     }
                     else {
                         /* Some bits remain; they should be zero */
                         if (base64buffer != 0) {
+                            s++;
                             errmsg = "non-zero padding bits in shift sequence";
                             goto utf7Error;
                         }
                     }
                 }
-                if (ch != '-') {
+                if (surrogate && DECODE_DIRECT(ch)) {
+                    if (_PyUnicodeWriter_WriteCharInline(&writer, surrogate) < 0)
+                        goto onError;
+                }
+                surrogate = 0;
+                if (ch == '-') {
                     /* '-' is absorbed; other terminating
                        characters are preserved */
-                    if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
-                        goto onError;
+                    s++;
                 }
             }
         }
@@ -4398,6 +4398,7 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
             }
             else { /* begin base64-encoded section */
                 inShift = 1;
+                surrogate = 0;
                 shiftOutStart = writer.pos;
                 base64bits = 0;
                 base64buffer = 0;
@@ -4429,6 +4430,7 @@ utf7Error:
 
     if (inShift && !consumed) { /* in shift sequence, no more to follow */
         /* if we're in an inconsistent state, that's an error */
+        inShift = 0;
         if (surrogate ||
                 (base64bits >= 6) ||
                 (base64bits > 0 && base64buffer != 0)) {
@@ -13366,6 +13368,7 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
 
         if (maxchar > writer->maxchar || writer->readonly) {
             /* resize + widen */
+            maxchar = Py_MAX(maxchar, writer->maxchar);
             newbuffer = PyUnicode_New(newlen, maxchar);
             if (newbuffer == NULL)
                 return -1;
author	Serhiy Storchaka <storchaka@gmail.com>	2015-10-02 10:14:03 (GMT)
committer	Serhiy Storchaka <storchaka@gmail.com>	2015-10-02 10:14:03 (GMT)
commit	29e68edbf47b708c1f7082c2235d82f41e747635 (patch)
tree	554941fc129b51181ea2aa7799a967a3042ebcec /Objects/unicodeobject.c
parent	5dbe245ef238762c8e1100885e8671bf2e089157 (diff)
parent	58c8f2bb6de115b620cec3cf995f04005573765c (diff)
download	cpython-29e68edbf47b708c1f7082c2235d82f41e747635.zip cpython-29e68edbf47b708c1f7082c2235d82f41e747635.tar.gz cpython-29e68edbf47b708c1f7082c2235d82f41e747635.tar.bz2