summaryrefslogtreecommitdiffstats
path: root/Lib/encodings
diff options
context:
space:
mode:
authorWalter Dörwald <walter@livinglogic.de>2007-04-16 22:10:50 (GMT)
committerWalter Dörwald <walter@livinglogic.de>2007-04-16 22:10:50 (GMT)
commit3abcb013b8195aea38f80968d4111b5ac7e68c0b (patch)
treedffc08076dd91cbb7860e8115685b939d067a27f /Lib/encodings
parent8981ad05c0f9c0edc8c2c7aeaad3615805abe907 (diff)
downloadcpython-3abcb013b8195aea38f80968d4111b5ac7e68c0b.zip
cpython-3abcb013b8195aea38f80968d4111b5ac7e68c0b.tar.gz
cpython-3abcb013b8195aea38f80968d4111b5ac7e68c0b.tar.bz2
Apply SF patch #1698994: Add getstate() and setstate()
methods to incrementalcodecs. Also forward port r54786 (fix the incremental utf_8_sig decoder).
Diffstat (limited to 'Lib/encodings')
-rw-r--r--Lib/encodings/utf_16.py45
-rw-r--r--Lib/encodings/utf_8_sig.py52
2 files changed, 83 insertions, 14 deletions
diff --git a/Lib/encodings/utf_16.py b/Lib/encodings/utf_16.py
index eff08f3..cf096b5 100644
--- a/Lib/encodings/utf_16.py
+++ b/Lib/encodings/utf_16.py
@@ -34,6 +34,22 @@ class IncrementalEncoder(codecs.IncrementalEncoder):
codecs.IncrementalEncoder.reset(self)
self.encoder = None
+ def getstate(self):
+ # state info we return to the caller:
+ # 0: stream is in natural order for this platform
+ # 2: endianness hasn't been determined yet
+ # (we're never writing in unnatural order)
+ return (2 if self.encoder is None else 0)
+
+ def setstate(self, state):
+ if state:
+ self.encoder = None
+ else:
+ if sys.byteorder == 'little':
+ self.encoder = codecs.utf_16_le_encode
+ else:
+ self.encoder = codecs.utf_16_be_encode
+
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
def __init__(self, errors='strict'):
codecs.BufferedIncrementalDecoder.__init__(self, errors)
@@ -56,6 +72,35 @@ class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
codecs.BufferedIncrementalDecoder.reset(self)
self.decoder = None
+ def getstate(self):
+ # additonal state info from the base class must be None here,
+ # as it isn't passed along to the caller
+ state = codecs.BufferedIncrementalDecoder.getstate(self)[0]
+ # additional state info we pass to the caller:
+ # 0: stream is in natural order for this platform
+ # 1: stream is in unnatural order
+ # 2: endianness hasn't been determined yet
+ if self.decoder is None:
+ return (state, 2)
+ addstate = int((sys.byteorder == "big") !=
+ (self.decoder is codecs.utf_16_be_decode))
+ return (state, addstate)
+
+ def setstate(self, state):
+ # state[1] will be ignored by BufferedIncrementalDecoder.setstate()
+ codecs.BufferedIncrementalDecoder.setstate(self, state)
+ state = state[1]
+ if state == 0:
+ self.decoder = (codecs.utf_16_be_decode
+ if sys.byteorder == "big"
+ else codecs.utf_16_le_decode)
+ elif state == 1:
+ self.decoder = (codecs.utf_16_le_decode
+ if sys.byteorder == "big"
+ else codecs.utf_16_be_decode)
+ else:
+ self.decoder = None
+
class StreamWriter(codecs.StreamWriter):
def __init__(self, stream, errors='strict'):
self.bom_written = False
diff --git a/Lib/encodings/utf_8_sig.py b/Lib/encodings/utf_8_sig.py
index d751da6..a0cc1af 100644
--- a/Lib/encodings/utf_8_sig.py
+++ b/Lib/encodings/utf_8_sig.py
@@ -12,7 +12,8 @@ import codecs
### Codec APIs
def encode(input, errors='strict'):
- return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0], len(input))
+ return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0],
+ len(input))
def decode(input, errors='strict'):
prefix = 0
@@ -25,38 +26,61 @@ def decode(input, errors='strict'):
class IncrementalEncoder(codecs.IncrementalEncoder):
def __init__(self, errors='strict'):
codecs.IncrementalEncoder.__init__(self, errors)
- self.first = True
+ self.first = 1
def encode(self, input, final=False):
if self.first:
- self.first = False
- return codecs.BOM_UTF8 + codecs.utf_8_encode(input, self.errors)[0]
+ self.first = 0
+ return codecs.BOM_UTF8 + \
+ codecs.utf_8_encode(input, self.errors)[0]
else:
return codecs.utf_8_encode(input, self.errors)[0]
def reset(self):
codecs.IncrementalEncoder.reset(self)
- self.first = True
+ self.first = 1
+
+ def getstate(self):
+ return self.first
+
+ def setstate(self, state):
+ self.first = state
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
def __init__(self, errors='strict'):
codecs.BufferedIncrementalDecoder.__init__(self, errors)
- self.first = True
+ self.first = 1
def _buffer_decode(self, input, errors, final):
- if self.first and codecs.BOM_UTF8.startswith(input): # might be a BOM
+ if self.first:
if len(input) < 3:
- # not enough data to decide if this really is a BOM
- # => try again on the next call
- return (u"", 0)
- (output, consumed) = codecs.utf_8_decode(input[3:], errors, final)
- self.first = False
- return (output, consumed+3)
+ if codecs.BOM_UTF8.startswith(input):
+ # not enough data to decide if this really is a BOM
+ # => try again on the next call
+ return (u"", 0)
+ else:
+ self.first = 0
+ else:
+ self.first = 0
+ if input[:3] == codecs.BOM_UTF8:
+ (output, consumed) = \
+ codecs.utf_8_decode(input[3:], errors, final)
+ return (output, consumed+3)
return codecs.utf_8_decode(input, errors, final)
def reset(self):
codecs.BufferedIncrementalDecoder.reset(self)
- self.first = True
+ self.first = 1
+
+ def getstate(self):
+ state = codecs.BufferedIncrementalDecoder.getstate(self)
+ # state[1] must be 0 here, as it isn't passed along to the caller
+ return (state[0], self.first)
+
+ def setstate(self, state):
+ # state[1] will be ignored by BufferedIncrementalDecoder.setstate()
+ codecs.BufferedIncrementalDecoder.setstate(self, state)
+ self.first = state[1]
class StreamWriter(codecs.StreamWriter):
def reset(self):