diff options
author | Walter Dörwald <walter@livinglogic.de> | 2007-11-19 12:41:10 (GMT) |
---|---|---|
committer | Walter Dörwald <walter@livinglogic.de> | 2007-11-19 12:41:10 (GMT) |
commit | 183744d6b9d45e77c3bd2dc30a6eb41c9f1c58f1 (patch) | |
tree | 9999590e5cb293ba30ebc9133ac0860f3999bcc9 /Lib/encodings | |
parent | fc7e72d1c6f9977d027dcea827707f8cbd5fa4bc (diff) | |
download | cpython-183744d6b9d45e77c3bd2dc30a6eb41c9f1c58f1.zip cpython-183744d6b9d45e77c3bd2dc30a6eb41c9f1c58f1.tar.gz cpython-183744d6b9d45e77c3bd2dc30a6eb41c9f1c58f1.tar.bz2 |
Fix for #1444: utf_8_sig.StreamReader was (indirectly through decode())
calling codecs.utf_8_decode() with final==True, which falled with incomplete
byte sequences. Fix and test by James G. Sack.
Diffstat (limited to 'Lib/encodings')
-rw-r--r-- | Lib/encodings/utf_8_sig.py | 16 |
1 files changed, 11 insertions, 5 deletions
diff --git a/Lib/encodings/utf_8_sig.py b/Lib/encodings/utf_8_sig.py index 92678d2..697ba95 100644 --- a/Lib/encodings/utf_8_sig.py +++ b/Lib/encodings/utf_8_sig.py @@ -84,12 +84,18 @@ class StreamReader(codecs.StreamReader): pass def decode(self, input, errors='strict'): - if len(input) < 3 and codecs.BOM_UTF8.startswith(input): - # not enough data to decide if this is a BOM - # => try again on the next call - return (u"", 0) + if len(input) < 3: + if codecs.BOM_UTF8.startswith(input): + # not enough data to decide if this is a BOM + # => try again on the next call + return (u"", 0) + elif input[:3] == codecs.BOM_UTF8: + self.decode = codecs.utf_8_decode + (output, consumed) = codecs.utf_8_decode(input[3:],errors) + return (output, consumed+3) + # (else) no BOM present self.decode = codecs.utf_8_decode - return decode(input, errors) + return codecs.utf_8_decode(input, errors) ### encodings module API |