summaryrefslogtreecommitdiffstats
path: root/Lib/encodings
diff options
context:
space:
mode:
authorWalter Dörwald <walter@livinglogic.de>2007-08-17 16:41:28 (GMT)
committerWalter Dörwald <walter@livinglogic.de>2007-08-17 16:41:28 (GMT)
commit6e390806495cf30c836615996b94e5ffa258cbef (patch)
treeeef913ca3061a114ff6d301a042408d4d3243ecc /Lib/encodings
parent437e6a3b1588ece44abbb4d65f74f9a841638e1d (diff)
downloadcpython-6e390806495cf30c836615996b94e5ffa258cbef.zip
cpython-6e390806495cf30c836615996b94e5ffa258cbef.tar.gz
cpython-6e390806495cf30c836615996b94e5ffa258cbef.tar.bz2
Backport r57105 and r57145 from the py3k branch: UTF-32 codecs.
Diffstat (limited to 'Lib/encodings')
-rw-r--r--Lib/encodings/aliases.py10
-rw-r--r--Lib/encodings/utf_32.py144
-rw-r--r--Lib/encodings/utf_32_be.py37
-rw-r--r--Lib/encodings/utf_32_le.py37
4 files changed, 228 insertions, 0 deletions
diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py
index cefb2ed..c6f5aeb 100644
--- a/Lib/encodings/aliases.py
+++ b/Lib/encodings/aliases.py
@@ -490,6 +490,16 @@ aliases = {
'unicodelittleunmarked' : 'utf_16_le',
'utf_16le' : 'utf_16_le',
+ # utf_32 codec
+ 'u32' : 'utf_32',
+ 'utf32' : 'utf_32',
+
+ # utf_32_be codec
+ 'utf_32be' : 'utf_32_be',
+
+ # utf_32_le codec
+ 'utf_32le' : 'utf_32_le',
+
# utf_7 codec
'u7' : 'utf_7',
'utf7' : 'utf_7',
diff --git a/Lib/encodings/utf_32.py b/Lib/encodings/utf_32.py
new file mode 100644
index 0000000..4bbd22a
--- /dev/null
+++ b/Lib/encodings/utf_32.py
@@ -0,0 +1,144 @@
+"""
+Python 'utf-32' Codec
+"""
+import codecs, sys
+
+### Codec APIs
+
+encode = codecs.utf_32_encode
+
+def decode(input, errors='strict'):
+ return codecs.utf_32_decode(input, errors, True)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def __init__(self, errors='strict'):
+ codecs.IncrementalEncoder.__init__(self, errors)
+ self.encoder = None
+
+ def encode(self, input, final=False):
+ if self.encoder is None:
+ result = codecs.utf_32_encode(input, self.errors)[0]
+ if sys.byteorder == 'little':
+ self.encoder = codecs.utf_32_le_encode
+ else:
+ self.encoder = codecs.utf_32_be_encode
+ return result
+ return self.encoder(input, self.errors)[0]
+
+ def reset(self):
+ codecs.IncrementalEncoder.reset(self)
+ self.encoder = None
+
+ def getstate(self):
+ # state info we return to the caller:
+ # 0: stream is in natural order for this platform
+ # 2: endianness hasn't been determined yet
+ # (we're never writing in unnatural order)
+ return (2 if self.encoder is None else 0)
+
+ def setstate(self, state):
+ if state:
+ self.encoder = None
+ else:
+ if sys.byteorder == 'little':
+ self.encoder = codecs.utf_32_le_encode
+ else:
+ self.encoder = codecs.utf_32_be_encode
+
+class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
+ def __init__(self, errors='strict'):
+ codecs.BufferedIncrementalDecoder.__init__(self, errors)
+ self.decoder = None
+
+ def _buffer_decode(self, input, errors, final):
+ if self.decoder is None:
+ (output, consumed, byteorder) = \
+ codecs.utf_32_ex_decode(input, errors, 0, final)
+ if byteorder == -1:
+ self.decoder = codecs.utf_32_le_decode
+ elif byteorder == 1:
+ self.decoder = codecs.utf_32_be_decode
+ elif consumed >= 4:
+ raise UnicodeError("UTF-32 stream does not start with BOM")
+ return (output, consumed)
+ return self.decoder(input, self.errors, final)
+
+ def reset(self):
+ codecs.BufferedIncrementalDecoder.reset(self)
+ self.decoder = None
+
+ def getstate(self):
+ # additonal state info from the base class must be None here,
+ # as it isn't passed along to the caller
+ state = codecs.BufferedIncrementalDecoder.getstate(self)[0]
+ # additional state info we pass to the caller:
+ # 0: stream is in natural order for this platform
+ # 1: stream is in unnatural order
+ # 2: endianness hasn't been determined yet
+ if self.decoder is None:
+ return (state, 2)
+ addstate = int((sys.byteorder == "big") !=
+ (self.decoder is codecs.utf_32_be_decode))
+ return (state, addstate)
+
+ def setstate(self, state):
+ # state[1] will be ignored by BufferedIncrementalDecoder.setstate()
+ codecs.BufferedIncrementalDecoder.setstate(self, state)
+ state = state[1]
+ if state == 0:
+ self.decoder = (codecs.utf_32_be_decode
+ if sys.byteorder == "big"
+ else codecs.utf_32_le_decode)
+ elif state == 1:
+ self.decoder = (codecs.utf_32_le_decode
+ if sys.byteorder == "big"
+ else codecs.utf_32_be_decode)
+ else:
+ self.decoder = None
+
+class StreamWriter(codecs.StreamWriter):
+ def __init__(self, stream, errors='strict'):
+ self.bom_written = False
+ codecs.StreamWriter.__init__(self, stream, errors)
+
+ def encode(self, input, errors='strict'):
+ self.bom_written = True
+ result = codecs.utf_32_encode(input, errors)
+ if sys.byteorder == 'little':
+ self.encode = codecs.utf_32_le_encode
+ else:
+ self.encode = codecs.utf_32_be_encode
+ return result
+
+class StreamReader(codecs.StreamReader):
+
+ def reset(self):
+ codecs.StreamReader.reset(self)
+ try:
+ del self.decode
+ except AttributeError:
+ pass
+
+ def decode(self, input, errors='strict'):
+ (object, consumed, byteorder) = \
+ codecs.utf_32_ex_decode(input, errors, 0, False)
+ if byteorder == -1:
+ self.decode = codecs.utf_32_le_decode
+ elif byteorder == 1:
+ self.decode = codecs.utf_32_be_decode
+ elif consumed>=4:
+ raise UnicodeError,"UTF-32 stream does not start with BOM"
+ return (object, consumed)
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='utf-32',
+ encode=encode,
+ decode=decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
diff --git a/Lib/encodings/utf_32_be.py b/Lib/encodings/utf_32_be.py
new file mode 100644
index 0000000..fe272b5
--- /dev/null
+++ b/Lib/encodings/utf_32_be.py
@@ -0,0 +1,37 @@
+"""
+Python 'utf-32-be' Codec
+"""
+import codecs
+
+### Codec APIs
+
+encode = codecs.utf_32_be_encode
+
+def decode(input, errors='strict'):
+ return codecs.utf_32_be_decode(input, errors, True)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.utf_32_be_encode(input, self.errors)[0]
+
+class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
+ _buffer_decode = codecs.utf_32_be_decode
+
+class StreamWriter(codecs.StreamWriter):
+ encode = codecs.utf_32_be_encode
+
+class StreamReader(codecs.StreamReader):
+ decode = codecs.utf_32_be_decode
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='utf-32-be',
+ encode=encode,
+ decode=decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
diff --git a/Lib/encodings/utf_32_le.py b/Lib/encodings/utf_32_le.py
new file mode 100644
index 0000000..9e48210
--- /dev/null
+++ b/Lib/encodings/utf_32_le.py
@@ -0,0 +1,37 @@
+"""
+Python 'utf-32-le' Codec
+"""
+import codecs
+
+### Codec APIs
+
+encode = codecs.utf_32_le_encode
+
+def decode(input, errors='strict'):
+ return codecs.utf_32_le_decode(input, errors, True)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.utf_32_le_encode(input, self.errors)[0]
+
+class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
+ _buffer_decode = codecs.utf_32_le_decode
+
+class StreamWriter(codecs.StreamWriter):
+ encode = codecs.utf_32_le_encode
+
+class StreamReader(codecs.StreamReader):
+ decode = codecs.utf_32_le_decode
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='utf-32-le',
+ encode=encode,
+ decode=decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )