diff options
author | Walter Dörwald <walter@livinglogic.de> | 2007-04-16 22:10:50 (GMT) |
---|---|---|
committer | Walter Dörwald <walter@livinglogic.de> | 2007-04-16 22:10:50 (GMT) |
commit | 3abcb013b8195aea38f80968d4111b5ac7e68c0b (patch) | |
tree | dffc08076dd91cbb7860e8115685b939d067a27f /Lib/encodings | |
parent | 8981ad05c0f9c0edc8c2c7aeaad3615805abe907 (diff) | |
download | cpython-3abcb013b8195aea38f80968d4111b5ac7e68c0b.zip cpython-3abcb013b8195aea38f80968d4111b5ac7e68c0b.tar.gz cpython-3abcb013b8195aea38f80968d4111b5ac7e68c0b.tar.bz2 |
Apply SF patch #1698994: Add getstate() and setstate()
methods to incrementalcodecs.
Also forward port r54786 (fix the incremental
utf_8_sig decoder).
Diffstat (limited to 'Lib/encodings')
-rw-r--r-- | Lib/encodings/utf_16.py | 45 | ||||
-rw-r--r-- | Lib/encodings/utf_8_sig.py | 52 |
2 files changed, 83 insertions, 14 deletions
diff --git a/Lib/encodings/utf_16.py b/Lib/encodings/utf_16.py index eff08f3..cf096b5 100644 --- a/Lib/encodings/utf_16.py +++ b/Lib/encodings/utf_16.py @@ -34,6 +34,22 @@ class IncrementalEncoder(codecs.IncrementalEncoder): codecs.IncrementalEncoder.reset(self) self.encoder = None + def getstate(self): + # state info we return to the caller: + # 0: stream is in natural order for this platform + # 2: endianness hasn't been determined yet + # (we're never writing in unnatural order) + return (2 if self.encoder is None else 0) + + def setstate(self, state): + if state: + self.encoder = None + else: + if sys.byteorder == 'little': + self.encoder = codecs.utf_16_le_encode + else: + self.encoder = codecs.utf_16_be_encode + class IncrementalDecoder(codecs.BufferedIncrementalDecoder): def __init__(self, errors='strict'): codecs.BufferedIncrementalDecoder.__init__(self, errors) @@ -56,6 +72,35 @@ class IncrementalDecoder(codecs.BufferedIncrementalDecoder): codecs.BufferedIncrementalDecoder.reset(self) self.decoder = None + def getstate(self): + # additonal state info from the base class must be None here, + # as it isn't passed along to the caller + state = codecs.BufferedIncrementalDecoder.getstate(self)[0] + # additional state info we pass to the caller: + # 0: stream is in natural order for this platform + # 1: stream is in unnatural order + # 2: endianness hasn't been determined yet + if self.decoder is None: + return (state, 2) + addstate = int((sys.byteorder == "big") != + (self.decoder is codecs.utf_16_be_decode)) + return (state, addstate) + + def setstate(self, state): + # state[1] will be ignored by BufferedIncrementalDecoder.setstate() + codecs.BufferedIncrementalDecoder.setstate(self, state) + state = state[1] + if state == 0: + self.decoder = (codecs.utf_16_be_decode + if sys.byteorder == "big" + else codecs.utf_16_le_decode) + elif state == 1: + self.decoder = (codecs.utf_16_le_decode + if sys.byteorder == "big" + else codecs.utf_16_be_decode) + else: + self.decoder = None + class StreamWriter(codecs.StreamWriter): def __init__(self, stream, errors='strict'): self.bom_written = False diff --git a/Lib/encodings/utf_8_sig.py b/Lib/encodings/utf_8_sig.py index d751da6..a0cc1af 100644 --- a/Lib/encodings/utf_8_sig.py +++ b/Lib/encodings/utf_8_sig.py @@ -12,7 +12,8 @@ import codecs ### Codec APIs def encode(input, errors='strict'): - return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0], len(input)) + return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0], + len(input)) def decode(input, errors='strict'): prefix = 0 @@ -25,38 +26,61 @@ def decode(input, errors='strict'): class IncrementalEncoder(codecs.IncrementalEncoder): def __init__(self, errors='strict'): codecs.IncrementalEncoder.__init__(self, errors) - self.first = True + self.first = 1 def encode(self, input, final=False): if self.first: - self.first = False - return codecs.BOM_UTF8 + codecs.utf_8_encode(input, self.errors)[0] + self.first = 0 + return codecs.BOM_UTF8 + \ + codecs.utf_8_encode(input, self.errors)[0] else: return codecs.utf_8_encode(input, self.errors)[0] def reset(self): codecs.IncrementalEncoder.reset(self) - self.first = True + self.first = 1 + + def getstate(self): + return self.first + + def setstate(self, state): + self.first = state class IncrementalDecoder(codecs.BufferedIncrementalDecoder): def __init__(self, errors='strict'): codecs.BufferedIncrementalDecoder.__init__(self, errors) - self.first = True + self.first = 1 def _buffer_decode(self, input, errors, final): - if self.first and codecs.BOM_UTF8.startswith(input): # might be a BOM + if self.first: if len(input) < 3: - # not enough data to decide if this really is a BOM - # => try again on the next call - return (u"", 0) - (output, consumed) = codecs.utf_8_decode(input[3:], errors, final) - self.first = False - return (output, consumed+3) + if codecs.BOM_UTF8.startswith(input): + # not enough data to decide if this really is a BOM + # => try again on the next call + return (u"", 0) + else: + self.first = 0 + else: + self.first = 0 + if input[:3] == codecs.BOM_UTF8: + (output, consumed) = \ + codecs.utf_8_decode(input[3:], errors, final) + return (output, consumed+3) return codecs.utf_8_decode(input, errors, final) def reset(self): codecs.BufferedIncrementalDecoder.reset(self) - self.first = True + self.first = 1 + + def getstate(self): + state = codecs.BufferedIncrementalDecoder.getstate(self) + # state[1] must be 0 here, as it isn't passed along to the caller + return (state[0], self.first) + + def setstate(self, state): + # state[1] will be ignored by BufferedIncrementalDecoder.setstate() + codecs.BufferedIncrementalDecoder.setstate(self, state) + self.first = state[1] class StreamWriter(codecs.StreamWriter): def reset(self): |