summaryrefslogtreecommitdiffstats
path: root/Lib/encodings/utf_8_sig.py
diff options
context:
space:
mode:
authorThomas Wouters <thomas@python.org>2006-04-21 09:43:23 (GMT)
committerThomas Wouters <thomas@python.org>2006-04-21 09:43:23 (GMT)
commita977329b6fb0e4c95cabb9043794de69b27a1099 (patch)
treeb91552a0578639bd10181ab612039c1bed9bec27 /Lib/encodings/utf_8_sig.py
parentd858f70617a9df8456e89a898ad8f97bd57c09f9 (diff)
downloadcpython-a977329b6fb0e4c95cabb9043794de69b27a1099.zip
cpython-a977329b6fb0e4c95cabb9043794de69b27a1099.tar.gz
cpython-a977329b6fb0e4c95cabb9043794de69b27a1099.tar.bz2
Merge part of the trunk changes into the p3yk branch. This merges from 43030
(branch-creation time) up to 43067. 43068 and 43069 contain a little swapping action between re.py and sre.py, and this mightily confuses svn merge, so later changes are going in separately. This merge should break no additional tests. The last-merged revision is going in a 'last_merge' property on '.' (the branch directory.) Arbitrarily chosen, really; if there's a BCP for this, I couldn't find it, but we can easily change it afterwards ;)
Diffstat (limited to 'Lib/encodings/utf_8_sig.py')
-rw-r--r--Lib/encodings/utf_8_sig.py47
1 files changed, 45 insertions, 2 deletions
diff --git a/Lib/encodings/utf_8_sig.py b/Lib/encodings/utf_8_sig.py
index fa437e6..cd14ab0 100644
--- a/Lib/encodings/utf_8_sig.py
+++ b/Lib/encodings/utf_8_sig.py
@@ -22,6 +22,42 @@ def decode(input, errors='strict'):
(output, consumed) = codecs.utf_8_decode(input, errors, True)
return (output, consumed+prefix)
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def __init__(self, errors='strict'):
+ codecs.IncrementalEncoder.__init__(self, errors)
+ self.first = True
+
+ def encode(self, input, final=False):
+ if self.first:
+ self.first = False
+ return codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0]
+ else:
+ return codecs.utf_8_encode(input, errors)[0]
+
+ def reset(self):
+ codecs.IncrementalEncoder.reset(self)
+ self.first = True
+
+class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
+ def __init__(self, errors='strict'):
+ codecs.BufferedIncrementalDecoder.__init__(self, errors)
+ self.first = True
+
+ def _buffer_decode(self, input, errors, final):
+ if self.first and codecs.BOM_UTF8.startswith(input): # might be a BOM
+ if len(input) < 3:
+ # not enough data to decide if this really is a BOM
+ # => try again on the next call
+ return (u"", 0)
+ (output, consumed) = codecs.utf_8_decode(input[3:], errors, final)
+ self.first = False
+ return (output, consumed+3)
+ return codecs.utf_8_decode(input, errors, final)
+
+ def reset(self):
+ codecs.BufferedIncrementalDecoder.reset(self)
+ self.first = True
+
class StreamWriter(codecs.StreamWriter):
def reset(self):
codecs.StreamWriter.reset(self)
@@ -53,5 +89,12 @@ class StreamReader(codecs.StreamReader):
### encodings module API
def getregentry():
-
- return (encode,decode,StreamReader,StreamWriter)
+ return codecs.CodecInfo(
+ name='utf-8-sig',
+ encode=encode,
+ decode=decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )