diff options
author | Walter Dörwald <walter@livinglogic.de> | 2007-04-16 22:10:50 (GMT) |
---|---|---|
committer | Walter Dörwald <walter@livinglogic.de> | 2007-04-16 22:10:50 (GMT) |
commit | 3abcb013b8195aea38f80968d4111b5ac7e68c0b (patch) | |
tree | dffc08076dd91cbb7860e8115685b939d067a27f /Lib/encodings/utf_8_sig.py | |
parent | 8981ad05c0f9c0edc8c2c7aeaad3615805abe907 (diff) | |
download | cpython-3abcb013b8195aea38f80968d4111b5ac7e68c0b.zip cpython-3abcb013b8195aea38f80968d4111b5ac7e68c0b.tar.gz cpython-3abcb013b8195aea38f80968d4111b5ac7e68c0b.tar.bz2 |
Apply SF patch #1698994: Add getstate() and setstate()
methods to incrementalcodecs.
Also forward port r54786 (fix the incremental
utf_8_sig decoder).
Diffstat (limited to 'Lib/encodings/utf_8_sig.py')
-rw-r--r-- | Lib/encodings/utf_8_sig.py | 52 |
1 files changed, 38 insertions, 14 deletions
diff --git a/Lib/encodings/utf_8_sig.py b/Lib/encodings/utf_8_sig.py index d751da6..a0cc1af 100644 --- a/Lib/encodings/utf_8_sig.py +++ b/Lib/encodings/utf_8_sig.py @@ -12,7 +12,8 @@ import codecs ### Codec APIs def encode(input, errors='strict'): - return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0], len(input)) + return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0], + len(input)) def decode(input, errors='strict'): prefix = 0 @@ -25,38 +26,61 @@ def decode(input, errors='strict'): class IncrementalEncoder(codecs.IncrementalEncoder): def __init__(self, errors='strict'): codecs.IncrementalEncoder.__init__(self, errors) - self.first = True + self.first = 1 def encode(self, input, final=False): if self.first: - self.first = False - return codecs.BOM_UTF8 + codecs.utf_8_encode(input, self.errors)[0] + self.first = 0 + return codecs.BOM_UTF8 + \ + codecs.utf_8_encode(input, self.errors)[0] else: return codecs.utf_8_encode(input, self.errors)[0] def reset(self): codecs.IncrementalEncoder.reset(self) - self.first = True + self.first = 1 + + def getstate(self): + return self.first + + def setstate(self, state): + self.first = state class IncrementalDecoder(codecs.BufferedIncrementalDecoder): def __init__(self, errors='strict'): codecs.BufferedIncrementalDecoder.__init__(self, errors) - self.first = True + self.first = 1 def _buffer_decode(self, input, errors, final): - if self.first and codecs.BOM_UTF8.startswith(input): # might be a BOM + if self.first: if len(input) < 3: - # not enough data to decide if this really is a BOM - # => try again on the next call - return (u"", 0) - (output, consumed) = codecs.utf_8_decode(input[3:], errors, final) - self.first = False - return (output, consumed+3) + if codecs.BOM_UTF8.startswith(input): + # not enough data to decide if this really is a BOM + # => try again on the next call + return (u"", 0) + else: + self.first = 0 + else: + self.first = 0 + if input[:3] == codecs.BOM_UTF8: + (output, consumed) = \ + codecs.utf_8_decode(input[3:], errors, final) + return (output, consumed+3) return codecs.utf_8_decode(input, errors, final) def reset(self): codecs.BufferedIncrementalDecoder.reset(self) - self.first = True + self.first = 1 + + def getstate(self): + state = codecs.BufferedIncrementalDecoder.getstate(self) + # state[1] must be 0 here, as it isn't passed along to the caller + return (state[0], self.first) + + def setstate(self, state): + # state[1] will be ignored by BufferedIncrementalDecoder.setstate() + codecs.BufferedIncrementalDecoder.setstate(self, state) + self.first = state[1] class StreamWriter(codecs.StreamWriter): def reset(self): |