diff options
author | Thomas Wouters <thomas@python.org> | 2006-04-21 09:43:23 (GMT) |
---|---|---|
committer | Thomas Wouters <thomas@python.org> | 2006-04-21 09:43:23 (GMT) |
commit | a977329b6fb0e4c95cabb9043794de69b27a1099 (patch) | |
tree | b91552a0578639bd10181ab612039c1bed9bec27 /Lib/encodings/utf_8_sig.py | |
parent | d858f70617a9df8456e89a898ad8f97bd57c09f9 (diff) | |
download | cpython-a977329b6fb0e4c95cabb9043794de69b27a1099.zip cpython-a977329b6fb0e4c95cabb9043794de69b27a1099.tar.gz cpython-a977329b6fb0e4c95cabb9043794de69b27a1099.tar.bz2 |
Merge part of the trunk changes into the p3yk branch. This merges from 43030
(branch-creation time) up to 43067. 43068 and 43069 contain a little
swapping action between re.py and sre.py, and this mightily confuses svn
merge, so later changes are going in separately.
This merge should break no additional tests.
The last-merged revision is going in a 'last_merge' property on '.' (the
branch directory.) Arbitrarily chosen, really; if there's a BCP for this, I
couldn't find it, but we can easily change it afterwards ;)
Diffstat (limited to 'Lib/encodings/utf_8_sig.py')
-rw-r--r-- | Lib/encodings/utf_8_sig.py | 47 |
1 files changed, 45 insertions, 2 deletions
diff --git a/Lib/encodings/utf_8_sig.py b/Lib/encodings/utf_8_sig.py index fa437e6..cd14ab0 100644 --- a/Lib/encodings/utf_8_sig.py +++ b/Lib/encodings/utf_8_sig.py @@ -22,6 +22,42 @@ def decode(input, errors='strict'): (output, consumed) = codecs.utf_8_decode(input, errors, True) return (output, consumed+prefix) +class IncrementalEncoder(codecs.IncrementalEncoder): + def __init__(self, errors='strict'): + codecs.IncrementalEncoder.__init__(self, errors) + self.first = True + + def encode(self, input, final=False): + if self.first: + self.first = False + return codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0] + else: + return codecs.utf_8_encode(input, errors)[0] + + def reset(self): + codecs.IncrementalEncoder.reset(self) + self.first = True + +class IncrementalDecoder(codecs.BufferedIncrementalDecoder): + def __init__(self, errors='strict'): + codecs.BufferedIncrementalDecoder.__init__(self, errors) + self.first = True + + def _buffer_decode(self, input, errors, final): + if self.first and codecs.BOM_UTF8.startswith(input): # might be a BOM + if len(input) < 3: + # not enough data to decide if this really is a BOM + # => try again on the next call + return (u"", 0) + (output, consumed) = codecs.utf_8_decode(input[3:], errors, final) + self.first = False + return (output, consumed+3) + return codecs.utf_8_decode(input, errors, final) + + def reset(self): + codecs.BufferedIncrementalDecoder.reset(self) + self.first = True + class StreamWriter(codecs.StreamWriter): def reset(self): codecs.StreamWriter.reset(self) @@ -53,5 +89,12 @@ class StreamReader(codecs.StreamReader): ### encodings module API def getregentry(): - - return (encode,decode,StreamReader,StreamWriter) + return codecs.CodecInfo( + name='utf-8-sig', + encode=encode, + decode=decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) |