Fix for #1444: utf_8_sig.StreamReader was (indirectly through decode())

calling codecs.utf_8_decode() with final==True, which falled with incomplete byte sequences. Fix and test by James G. Sack.
author: Walter Dörwald <walter@livinglogic.de> 2007-11-19 12:41:10 (GMT)
committer: Walter Dörwald <walter@livinglogic.de> 2007-11-19 12:41:10 (GMT)
commit: 183744d6b9d45e77c3bd2dc30a6eb41c9f1c58f1 (patch)
tree: 9999590e5cb293ba30ebc9133ac0860f3999bcc9 /Lib/encodings
parent: fc7e72d1c6f9977d027dcea827707f8cbd5fa4bc (diff)
download: cpython-183744d6b9d45e77c3bd2dc30a6eb41c9f1c58f1.zip
cpython-183744d6b9d45e77c3bd2dc30a6eb41c9f1c58f1.tar.gz
cpython-183744d6b9d45e77c3bd2dc30a6eb41c9f1c58f1.tar.bz2
1 files changed, 11 insertions, 5 deletions
diff --git a/Lib/encodings/utf_8_sig.py b/Lib/encodings/utf_8_sig.py
index 92678d2..697ba95 100644
--- a/Lib/encodings/utf_8_sig.py
+++ b/Lib/encodings/utf_8_sig.py
@@ -84,12 +84,18 @@ class StreamReader(codecs.StreamReader):
             pass
 
     def decode(self, input, errors='strict'):
-        if len(input) < 3 and codecs.BOM_UTF8.startswith(input):
-            # not enough data to decide if this is a BOM
-            # => try again on the next call
-            return (u"", 0)
+        if len(input) < 3:
+            if codecs.BOM_UTF8.startswith(input):
+                # not enough data to decide if this is a BOM
+                # => try again on the next call
+                return (u"", 0)
+        elif input[:3] == codecs.BOM_UTF8:
+            self.decode = codecs.utf_8_decode
+            (output, consumed) = codecs.utf_8_decode(input[3:],errors)
+            return (output, consumed+3)
+        # (else) no BOM present
         self.decode = codecs.utf_8_decode
-        return decode(input, errors)
+        return codecs.utf_8_decode(input, errors)
 
 ### encodings module API
author	Walter Dörwald <walter@livinglogic.de>	2007-11-19 12:41:10 (GMT)
committer	Walter Dörwald <walter@livinglogic.de>	2007-11-19 12:41:10 (GMT)
commit	183744d6b9d45e77c3bd2dc30a6eb41c9f1c58f1 (patch)
tree	9999590e5cb293ba30ebc9133ac0860f3999bcc9 /Lib/encodings
parent	fc7e72d1c6f9977d027dcea827707f8cbd5fa4bc (diff)
download	cpython-183744d6b9d45e77c3bd2dc30a6eb41c9f1c58f1.zip cpython-183744d6b9d45e77c3bd2dc30a6eb41c9f1c58f1.tar.gz cpython-183744d6b9d45e77c3bd2dc30a6eb41c9f1c58f1.tar.bz2