summaryrefslogtreecommitdiffstats
path: root/Lib/encodings/utf_8_sig.py
diff options
context:
space:
mode:
authorWalter Dörwald <walter@livinglogic.de>2007-04-16 22:10:50 (GMT)
committerWalter Dörwald <walter@livinglogic.de>2007-04-16 22:10:50 (GMT)
commit3abcb013b8195aea38f80968d4111b5ac7e68c0b (patch)
treedffc08076dd91cbb7860e8115685b939d067a27f /Lib/encodings/utf_8_sig.py
parent8981ad05c0f9c0edc8c2c7aeaad3615805abe907 (diff)
downloadcpython-3abcb013b8195aea38f80968d4111b5ac7e68c0b.zip
cpython-3abcb013b8195aea38f80968d4111b5ac7e68c0b.tar.gz
cpython-3abcb013b8195aea38f80968d4111b5ac7e68c0b.tar.bz2
Apply SF patch #1698994: Add getstate() and setstate()
methods to incrementalcodecs. Also forward port r54786 (fix the incremental utf_8_sig decoder).
Diffstat (limited to 'Lib/encodings/utf_8_sig.py')
-rw-r--r--Lib/encodings/utf_8_sig.py52
1 files changed, 38 insertions, 14 deletions
diff --git a/Lib/encodings/utf_8_sig.py b/Lib/encodings/utf_8_sig.py
index d751da6..a0cc1af 100644
--- a/Lib/encodings/utf_8_sig.py
+++ b/Lib/encodings/utf_8_sig.py
@@ -12,7 +12,8 @@ import codecs
### Codec APIs
def encode(input, errors='strict'):
- return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0], len(input))
+ return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0],
+ len(input))
def decode(input, errors='strict'):
prefix = 0
@@ -25,38 +26,61 @@ def decode(input, errors='strict'):
class IncrementalEncoder(codecs.IncrementalEncoder):
def __init__(self, errors='strict'):
codecs.IncrementalEncoder.__init__(self, errors)
- self.first = True
+ self.first = 1
def encode(self, input, final=False):
if self.first:
- self.first = False
- return codecs.BOM_UTF8 + codecs.utf_8_encode(input, self.errors)[0]
+ self.first = 0
+ return codecs.BOM_UTF8 + \
+ codecs.utf_8_encode(input, self.errors)[0]
else:
return codecs.utf_8_encode(input, self.errors)[0]
def reset(self):
codecs.IncrementalEncoder.reset(self)
- self.first = True
+ self.first = 1
+
+ def getstate(self):
+ return self.first
+
+ def setstate(self, state):
+ self.first = state
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
def __init__(self, errors='strict'):
codecs.BufferedIncrementalDecoder.__init__(self, errors)
- self.first = True
+ self.first = 1
def _buffer_decode(self, input, errors, final):
- if self.first and codecs.BOM_UTF8.startswith(input): # might be a BOM
+ if self.first:
if len(input) < 3:
- # not enough data to decide if this really is a BOM
- # => try again on the next call
- return (u"", 0)
- (output, consumed) = codecs.utf_8_decode(input[3:], errors, final)
- self.first = False
- return (output, consumed+3)
+ if codecs.BOM_UTF8.startswith(input):
+ # not enough data to decide if this really is a BOM
+ # => try again on the next call
+ return (u"", 0)
+ else:
+ self.first = 0
+ else:
+ self.first = 0
+ if input[:3] == codecs.BOM_UTF8:
+ (output, consumed) = \
+ codecs.utf_8_decode(input[3:], errors, final)
+ return (output, consumed+3)
return codecs.utf_8_decode(input, errors, final)
def reset(self):
codecs.BufferedIncrementalDecoder.reset(self)
- self.first = True
+ self.first = 1
+
+ def getstate(self):
+ state = codecs.BufferedIncrementalDecoder.getstate(self)
+ # state[1] must be 0 here, as it isn't passed along to the caller
+ return (state[0], self.first)
+
+ def setstate(self, state):
+ # state[1] will be ignored by BufferedIncrementalDecoder.setstate()
+ codecs.BufferedIncrementalDecoder.setstate(self, state)
+ self.first = state[1]
class StreamWriter(codecs.StreamWriter):
def reset(self):