diff options
author | Walter Dörwald <walter@livinglogic.de> | 2007-08-17 16:41:28 (GMT) |
---|---|---|
committer | Walter Dörwald <walter@livinglogic.de> | 2007-08-17 16:41:28 (GMT) |
commit | 6e390806495cf30c836615996b94e5ffa258cbef (patch) | |
tree | eef913ca3061a114ff6d301a042408d4d3243ecc /Lib/encodings | |
parent | 437e6a3b1588ece44abbb4d65f74f9a841638e1d (diff) | |
download | cpython-6e390806495cf30c836615996b94e5ffa258cbef.zip cpython-6e390806495cf30c836615996b94e5ffa258cbef.tar.gz cpython-6e390806495cf30c836615996b94e5ffa258cbef.tar.bz2 |
Backport r57105 and r57145 from the py3k branch: UTF-32 codecs.
Diffstat (limited to 'Lib/encodings')
-rw-r--r-- | Lib/encodings/aliases.py | 10 | ||||
-rw-r--r-- | Lib/encodings/utf_32.py | 144 | ||||
-rw-r--r-- | Lib/encodings/utf_32_be.py | 37 | ||||
-rw-r--r-- | Lib/encodings/utf_32_le.py | 37 |
4 files changed, 228 insertions, 0 deletions
diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py index cefb2ed..c6f5aeb 100644 --- a/Lib/encodings/aliases.py +++ b/Lib/encodings/aliases.py @@ -490,6 +490,16 @@ aliases = { 'unicodelittleunmarked' : 'utf_16_le', 'utf_16le' : 'utf_16_le', + # utf_32 codec + 'u32' : 'utf_32', + 'utf32' : 'utf_32', + + # utf_32_be codec + 'utf_32be' : 'utf_32_be', + + # utf_32_le codec + 'utf_32le' : 'utf_32_le', + # utf_7 codec 'u7' : 'utf_7', 'utf7' : 'utf_7', diff --git a/Lib/encodings/utf_32.py b/Lib/encodings/utf_32.py new file mode 100644 index 0000000..4bbd22a --- /dev/null +++ b/Lib/encodings/utf_32.py @@ -0,0 +1,144 @@ +""" +Python 'utf-32' Codec +""" +import codecs, sys + +### Codec APIs + +encode = codecs.utf_32_encode + +def decode(input, errors='strict'): + return codecs.utf_32_decode(input, errors, True) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def __init__(self, errors='strict'): + codecs.IncrementalEncoder.__init__(self, errors) + self.encoder = None + + def encode(self, input, final=False): + if self.encoder is None: + result = codecs.utf_32_encode(input, self.errors)[0] + if sys.byteorder == 'little': + self.encoder = codecs.utf_32_le_encode + else: + self.encoder = codecs.utf_32_be_encode + return result + return self.encoder(input, self.errors)[0] + + def reset(self): + codecs.IncrementalEncoder.reset(self) + self.encoder = None + + def getstate(self): + # state info we return to the caller: + # 0: stream is in natural order for this platform + # 2: endianness hasn't been determined yet + # (we're never writing in unnatural order) + return (2 if self.encoder is None else 0) + + def setstate(self, state): + if state: + self.encoder = None + else: + if sys.byteorder == 'little': + self.encoder = codecs.utf_32_le_encode + else: + self.encoder = codecs.utf_32_be_encode + +class IncrementalDecoder(codecs.BufferedIncrementalDecoder): + def __init__(self, errors='strict'): + codecs.BufferedIncrementalDecoder.__init__(self, errors) + self.decoder = None + + def _buffer_decode(self, input, errors, final): + if self.decoder is None: + (output, consumed, byteorder) = \ + codecs.utf_32_ex_decode(input, errors, 0, final) + if byteorder == -1: + self.decoder = codecs.utf_32_le_decode + elif byteorder == 1: + self.decoder = codecs.utf_32_be_decode + elif consumed >= 4: + raise UnicodeError("UTF-32 stream does not start with BOM") + return (output, consumed) + return self.decoder(input, self.errors, final) + + def reset(self): + codecs.BufferedIncrementalDecoder.reset(self) + self.decoder = None + + def getstate(self): + # additonal state info from the base class must be None here, + # as it isn't passed along to the caller + state = codecs.BufferedIncrementalDecoder.getstate(self)[0] + # additional state info we pass to the caller: + # 0: stream is in natural order for this platform + # 1: stream is in unnatural order + # 2: endianness hasn't been determined yet + if self.decoder is None: + return (state, 2) + addstate = int((sys.byteorder == "big") != + (self.decoder is codecs.utf_32_be_decode)) + return (state, addstate) + + def setstate(self, state): + # state[1] will be ignored by BufferedIncrementalDecoder.setstate() + codecs.BufferedIncrementalDecoder.setstate(self, state) + state = state[1] + if state == 0: + self.decoder = (codecs.utf_32_be_decode + if sys.byteorder == "big" + else codecs.utf_32_le_decode) + elif state == 1: + self.decoder = (codecs.utf_32_le_decode + if sys.byteorder == "big" + else codecs.utf_32_be_decode) + else: + self.decoder = None + +class StreamWriter(codecs.StreamWriter): + def __init__(self, stream, errors='strict'): + self.bom_written = False + codecs.StreamWriter.__init__(self, stream, errors) + + def encode(self, input, errors='strict'): + self.bom_written = True + result = codecs.utf_32_encode(input, errors) + if sys.byteorder == 'little': + self.encode = codecs.utf_32_le_encode + else: + self.encode = codecs.utf_32_be_encode + return result + +class StreamReader(codecs.StreamReader): + + def reset(self): + codecs.StreamReader.reset(self) + try: + del self.decode + except AttributeError: + pass + + def decode(self, input, errors='strict'): + (object, consumed, byteorder) = \ + codecs.utf_32_ex_decode(input, errors, 0, False) + if byteorder == -1: + self.decode = codecs.utf_32_le_decode + elif byteorder == 1: + self.decode = codecs.utf_32_be_decode + elif consumed>=4: + raise UnicodeError,"UTF-32 stream does not start with BOM" + return (object, consumed) + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='utf-32', + encode=encode, + decode=decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/utf_32_be.py b/Lib/encodings/utf_32_be.py new file mode 100644 index 0000000..fe272b5 --- /dev/null +++ b/Lib/encodings/utf_32_be.py @@ -0,0 +1,37 @@ +""" +Python 'utf-32-be' Codec +""" +import codecs + +### Codec APIs + +encode = codecs.utf_32_be_encode + +def decode(input, errors='strict'): + return codecs.utf_32_be_decode(input, errors, True) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.utf_32_be_encode(input, self.errors)[0] + +class IncrementalDecoder(codecs.BufferedIncrementalDecoder): + _buffer_decode = codecs.utf_32_be_decode + +class StreamWriter(codecs.StreamWriter): + encode = codecs.utf_32_be_encode + +class StreamReader(codecs.StreamReader): + decode = codecs.utf_32_be_decode + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='utf-32-be', + encode=encode, + decode=decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/utf_32_le.py b/Lib/encodings/utf_32_le.py new file mode 100644 index 0000000..9e48210 --- /dev/null +++ b/Lib/encodings/utf_32_le.py @@ -0,0 +1,37 @@ +""" +Python 'utf-32-le' Codec +""" +import codecs + +### Codec APIs + +encode = codecs.utf_32_le_encode + +def decode(input, errors='strict'): + return codecs.utf_32_le_decode(input, errors, True) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.utf_32_le_encode(input, self.errors)[0] + +class IncrementalDecoder(codecs.BufferedIncrementalDecoder): + _buffer_decode = codecs.utf_32_le_decode + +class StreamWriter(codecs.StreamWriter): + encode = codecs.utf_32_le_encode + +class StreamReader(codecs.StreamReader): + decode = codecs.utf_32_le_decode + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='utf-32-le', + encode=encode, + decode=decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) |