diff options
| -rw-r--r-- | Lib/encodings/utf_16.py | 16 | ||||
| -rw-r--r-- | Lib/encodings/utf_8_sig.py | 12 | ||||
| -rw-r--r-- | Misc/NEWS | 3 |
3 files changed, 28 insertions, 3 deletions
diff --git a/Lib/encodings/utf_16.py b/Lib/encodings/utf_16.py index b145a5d..f3fadff 100644 --- a/Lib/encodings/utf_16.py +++ b/Lib/encodings/utf_16.py @@ -34,6 +34,22 @@ class IncrementalEncoder(codecs.IncrementalEncoder): codecs.IncrementalEncoder.reset(self) self.encoder = None + def getstate(self): + # state info we return to the caller: + # 0: stream is in natural order for this platform + # 2: endianness hasn't been determined yet + # (we're never writing in unnatural order) + return (2 if self.encoder is None else 0) + + def setstate(self, state): + if state: + self.encoder = None + else: + if sys.byteorder == 'little': + self.encoder = codecs.utf_16_le_encode + else: + self.encoder = codecs.utf_16_be_encode + class IncrementalDecoder(codecs.BufferedIncrementalDecoder): def __init__(self, errors='strict'): codecs.BufferedIncrementalDecoder.__init__(self, errors) diff --git a/Lib/encodings/utf_8_sig.py b/Lib/encodings/utf_8_sig.py index 697ba95..8784694 100644 --- a/Lib/encodings/utf_8_sig.py +++ b/Lib/encodings/utf_8_sig.py @@ -25,18 +25,24 @@ def decode(input, errors='strict'): class IncrementalEncoder(codecs.IncrementalEncoder): def __init__(self, errors='strict'): codecs.IncrementalEncoder.__init__(self, errors) - self.first = True + self.first = 1 def encode(self, input, final=False): if self.first: - self.first = False + self.first = 0 return codecs.BOM_UTF8 + codecs.utf_8_encode(input, self.errors)[0] else: return codecs.utf_8_encode(input, self.errors)[0] def reset(self): codecs.IncrementalEncoder.reset(self) - self.first = True + self.first = 1 + + def getstate(self): + return self.first + + def setstate(self, state): + self.first = state class IncrementalDecoder(codecs.BufferedIncrementalDecoder): def __init__(self, errors='strict'): @@ -12,6 +12,9 @@ What's New in Python 2.6.6 alpha 1? Core and Builtins ----------------- +- Issue #6213: Implement getstate() and setstate() methods of utf-8-sig and + utf-16 incremental encoders. + - Issue #8271: during the decoding of an invalid UTF-8 byte sequence, only the start byte and the continuation byte(s) are now considered invalid, instead of the number of bytes specified by the start byte. |
