summaryrefslogtreecommitdiffstats
path: root/Lib/codecs.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/codecs.py')
-rw-r--r--Lib/codecs.py142
1 files changed, 119 insertions, 23 deletions
diff --git a/Lib/codecs.py b/Lib/codecs.py
index 5c669c0..6a61e1a 100644
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -127,14 +127,19 @@ class StreamWriter(Codec):
self.stream = stream
self.errors = errors
- def write(self,object):
+ def write(self, object):
""" Writes the object's contents encoded to self.stream.
"""
data, consumed = self.encode(object,self.errors)
self.stream.write(data)
- # XXX .writelines() ?
+ def writelines(self, list):
+
+ """ Writes the concatenated list of strings to the stream
+ using .write().
+ """
+ self.write(''.join(list))
def reset(self):
@@ -179,7 +184,7 @@ class StreamReader(Codec):
self.stream = stream
self.errors = errors
- def read(self,size=-1):
+ def read(self, size=-1):
""" Decodes data from the stream self.stream and returns the
resulting object.
@@ -221,8 +226,44 @@ class StreamReader(Codec):
else:
return object
- # XXX .readline() and .readlines() (these are hard to implement
- # without using buffers for keeping read-ahead data)
+ def readline(self, size=None):
+
+ """ Read one line from the input stream and return the
+ decoded data.
+
+ Note: Unlike the .readlines() method, line breaking must
+ be implemented by the underlying stream's .readline()
+ method -- there is currently no support for line breaking
+ using the codec decoder due to lack of line buffering.
+
+ size, if given, is passed as size argument to the stream's
+ .readline() method.
+
+ """
+ if size is None:
+ line = self.stream.readline()
+ else:
+ line = self.stream.readline(size)
+ return self.decode(line)[0]
+
+
+ def readlines(self, sizehint=0):
+
+ """ Read all lines available on the input stream
+ and return them as list of lines.
+
+ Line breaks are implemented using the codec's decoder
+ method and are included in the list entries.
+
+ sizehint, if given, is passed as size argument to the
+ stream's .read() method.
+
+ """
+ if sizehint is None:
+ data = self.stream.read()
+ else:
+ data = self.stream.read(sizehint)
+ return self.decode(data)[0].splitlines(1)
def reset(self):
@@ -247,6 +288,9 @@ class StreamReader(Codec):
class StreamReaderWriter:
+ # Optional attributes set by the file wrappers below
+ encoding = 'unknown'
+
def __init__(self,stream,Reader,Writer,errors='strict'):
""" Creates a StreamReaderWriter instance.
@@ -269,10 +313,22 @@ class StreamReaderWriter:
return self.reader.read(size)
+ def readline(size=None):
+
+ return self.reader.readline(size)
+
+ def readlines(sizehint=None):
+
+ return self.reader.readlines(sizehint)
+
def write(self,data):
return self.writer.write(data)
+ def writelines(self,list):
+
+ return self.writer.writelines(list)
+
def reset(self):
self.reader.reset()
@@ -290,6 +346,10 @@ class StreamReaderWriter:
class StreamRecoder:
+ # Optional attributes set by the file wrappers below
+ data_encoding = 'unknown'
+ file_encoding = 'unknown'
+
def __init__(self,stream,encode,decode,Reader,Writer,errors='strict'):
""" Creates a StreamRecoder instance which implements a two-way
@@ -328,13 +388,34 @@ class StreamRecoder:
data, bytesencoded = self.encode(data, self.errors)
return data
+ def readline(self,size=None):
+
+ if size is None:
+ data = self.reader.readline()
+ else:
+ data = self.reader.readline(size)
+ data, bytesencoded = self.encode(data, self.errors)
+ return data
+
+ def readlines(self,sizehint=None):
+
+ if sizehint is None:
+ data = self.reader.read()
+ else:
+ data = self.reader.read(sizehint)
+ data, bytesencoded = self.encode(data, self.errors)
+ return data.splitlines(1)
+
def write(self,data):
data, bytesdecoded = self.decode(data, self.errors)
return self.writer.write(data)
- # .writelines(), .readline() and .readlines() ... see notes
- # above.
+ def writelines(self,list):
+
+ data = ''.join(list)
+ data, bytesdecoded = self.decode(data, self.errors)
+ return self.writer.write(data)
def reset(self):
@@ -380,33 +461,45 @@ def open(filename, mode, encoding=None, errors='strict', buffering=1):
if encoding is None:
return file
(e,d,sr,sw) = lookup(encoding)
- return StreamReaderWriter(file, sr, sw, errors)
+ srw = StreamReaderWriter(file, sr, sw, errors)
+ # Add attributes to simplify introspection
+ srw.encoding = encoding
+ return srw
-def EncodedFile(file, input, output=None, errors='strict'):
+def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'):
""" Return a wrapped version of file which provides transparent
encoding translation.
Strings written to the wrapped file are interpreted according
- to the given input encoding and then written to the original
- file as string using the output encoding. The intermediate
- encoding will usually be Unicode but depends on the specified
- codecs.
+ to the given data_encoding and then written to the original
+ file as string using file_encoding. The intermediate encoding
+ will usually be Unicode but depends on the specified codecs.
+
+ Strings are read from the file using file_encoding and then
+ passed back to the caller as string using data_encoding.
- If output is not given, it defaults to input.
+ If file_encoding is not given, it defaults to data_encoding.
errors may be given to define the error handling. It defaults
to 'strict' which causes ValueErrors to be raised in case an
encoding error occurs.
+ data_encoding and file_encoding are added to the wrapped file
+ object as attributes .data_encoding and .file_encoding resp.
+
"""
- if output is None:
- output = input
- encode, decode = lookup(input)[:2]
- Reader, Writer = lookup(output)[2:]
- return StreamRecoder(file,
- encode,decode,Reader,Writer,
- errors)
+ if file_encoding is None:
+ file_encoding = data_encoding
+ encode, decode = lookup(data_encoding)[:2]
+ Reader, Writer = lookup(file_encoding)[2:]
+ sr = StreamRecoder(file,
+ encode,decode,Reader,Writer,
+ errors)
+ # Add attributes to simplify introspection
+ sr.data_encoding = data_encoding
+ sr.file_encoding = file_encoding
+ return sr
### Tests
@@ -414,5 +507,8 @@ if __name__ == '__main__':
import sys
- # Make stdout translate Latin-1 into Unicode-Escape
- sys.stdout = EncodedFile(sys.stdout, 'latin-1', 'unicode-escape')
+ # Make stdout translate Latin-1 output into UTF-8 output
+ sys.stdout = EncodedFile(sys.stdout, 'latin-1', 'utf-8')
+
+ # Have stdin translate Latin-1 input into UTF-8 input
+ sys.stdin = EncodedFile(sys.stdin, 'utf-8', 'latin-1')