summaryrefslogtreecommitdiffstats
path: root/Lib/encodings/utf_8_sig.py
diff options
context:
space:
mode:
authorWalter Dörwald <walter@livinglogic.de>2006-03-15 11:35:15 (GMT)
committerWalter Dörwald <walter@livinglogic.de>2006-03-15 11:35:15 (GMT)
commitabb02e59946f9ea3076e96e3b03b51d1cebd46b4 (patch)
tree165444acd89173a8832547078cbc417d4626116e /Lib/encodings/utf_8_sig.py
parente2ebb2d7f777db2de72cfeb0e3c489ac4cc5c400 (diff)
downloadcpython-abb02e59946f9ea3076e96e3b03b51d1cebd46b4.zip
cpython-abb02e59946f9ea3076e96e3b03b51d1cebd46b4.tar.gz
cpython-abb02e59946f9ea3076e96e3b03b51d1cebd46b4.tar.bz2
Patch #1436130: codecs.lookup() now returns a CodecInfo object (a subclass
of tuple) that provides incremental decoders and encoders (a way to use stateful codecs without the stream API). Functions codecs.getincrementaldecoder() and codecs.getincrementalencoder() have been added.
Diffstat (limited to 'Lib/encodings/utf_8_sig.py')
-rw-r--r--Lib/encodings/utf_8_sig.py47
1 files changed, 45 insertions, 2 deletions
diff --git a/Lib/encodings/utf_8_sig.py b/Lib/encodings/utf_8_sig.py
index fa437e6..cd14ab0 100644
--- a/Lib/encodings/utf_8_sig.py
+++ b/Lib/encodings/utf_8_sig.py
@@ -22,6 +22,42 @@ def decode(input, errors='strict'):
(output, consumed) = codecs.utf_8_decode(input, errors, True)
return (output, consumed+prefix)
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def __init__(self, errors='strict'):
+ codecs.IncrementalEncoder.__init__(self, errors)
+ self.first = True
+
+ def encode(self, input, final=False):
+ if self.first:
+ self.first = False
+ return codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0]
+ else:
+ return codecs.utf_8_encode(input, errors)[0]
+
+ def reset(self):
+ codecs.IncrementalEncoder.reset(self)
+ self.first = True
+
+class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
+ def __init__(self, errors='strict'):
+ codecs.BufferedIncrementalDecoder.__init__(self, errors)
+ self.first = True
+
+ def _buffer_decode(self, input, errors, final):
+ if self.first and codecs.BOM_UTF8.startswith(input): # might be a BOM
+ if len(input) < 3:
+ # not enough data to decide if this really is a BOM
+ # => try again on the next call
+ return (u"", 0)
+ (output, consumed) = codecs.utf_8_decode(input[3:], errors, final)
+ self.first = False
+ return (output, consumed+3)
+ return codecs.utf_8_decode(input, errors, final)
+
+ def reset(self):
+ codecs.BufferedIncrementalDecoder.reset(self)
+ self.first = True
+
class StreamWriter(codecs.StreamWriter):
def reset(self):
codecs.StreamWriter.reset(self)
@@ -53,5 +89,12 @@ class StreamReader(codecs.StreamReader):
### encodings module API
def getregentry():
-
- return (encode,decode,StreamReader,StreamWriter)
+ return codecs.CodecInfo(
+ name='utf-8-sig',
+ encode=encode,
+ decode=decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )