Merge part of the trunk changes into the p3yk branch. This merges from 43030

(branch-creation time) up to 43067. 43068 and 43069 contain a little swapping action between re.py and sre.py, and this mightily confuses svn merge, so later changes are going in separately. This merge should break no additional tests. The last-merged revision is going in a 'last_merge' property on '.' (the branch directory.) Arbitrarily chosen, really; if there's a BCP for this, I couldn't find it, but we can easily change it afterwards ;)
author: Thomas Wouters <thomas@python.org> 2006-04-21 09:43:23 (GMT)
committer: Thomas Wouters <thomas@python.org> 2006-04-21 09:43:23 (GMT)
commit: a977329b6fb0e4c95cabb9043794de69b27a1099 (patch)
tree: b91552a0578639bd10181ab612039c1bed9bec27 /Lib/codecs.py
parent: d858f70617a9df8456e89a898ad8f97bd57c09f9 (diff)
download: cpython-a977329b6fb0e4c95cabb9043794de69b27a1099.zip
cpython-a977329b6fb0e4c95cabb9043794de69b27a1099.tar.gz
cpython-a977329b6fb0e4c95cabb9043794de69b27a1099.tar.bz2
1 files changed, 172 insertions, 11 deletions
diff --git a/Lib/codecs.py b/Lib/codecs.py
index 6895a22..28856c7 100644
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -73,6 +73,23 @@ BOM64_BE = BOM_UTF32_BE
 
 ### Codec base classes (defining the API)
 
+class CodecInfo(tuple):
+
+    def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
+        incrementalencoder=None, incrementaldecoder=None, name=None):
+        self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
+        self.name = name
+        self.encode = encode
+        self.decode = decode
+        self.incrementalencoder = incrementalencoder
+        self.incrementaldecoder = incrementaldecoder
+        self.streamwriter = streamwriter
+        self.streamreader = streamreader
+        return self
+
+    def __repr__(self):
+        return "<%s.%s object for encoding %s at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self))
+
 class Codec:
 
     """ Defines the interface for stateless encoders/decoders.
@@ -137,6 +154,88 @@ class Codec:
         """
         raise NotImplementedError
 
+class IncrementalEncoder(object):
+    """
+    A IncrementalEncoder encodes an input in multiple steps. The input can be
+    passed piece by piece to the encode() method. The IncrementalEncoder remembers
+    the state of the Encoding process between calls to encode().
+    """
+    def __init__(self, errors='strict'):
+        """
+        Creates a IncrementalEncoder instance.
+
+        The IncrementalEncoder may use different error handling schemes by
+        providing the errors keyword argument. See the module docstring
+        for a list of possible values.
+        """
+        self.errors = errors
+        self.buffer = ""
+
+    def encode(self, input, final=False):
+        """
+        Encodes input and returns the resulting object.
+        """
+        raise NotImplementedError
+
+    def reset(self):
+        """
+        Resets the encoder to the initial state.
+        """
+
+class IncrementalDecoder(object):
+    """
+    An IncrementalDecoder decodes an input in multiple steps. The input can be
+    passed piece by piece to the decode() method. The IncrementalDecoder
+    remembers the state of the decoding process between calls to decode().
+    """
+    def __init__(self, errors='strict'):
+        """
+        Creates a IncrementalDecoder instance.
+
+        The IncrementalDecoder may use different error handling schemes by
+        providing the errors keyword argument. See the module docstring
+        for a list of possible values.
+        """
+        self.errors = errors
+
+    def decode(self, input, final=False):
+        """
+        Decodes input and returns the resulting object.
+        """
+        raise NotImplementedError
+
+    def reset(self):
+        """
+        Resets the decoder to the initial state.
+        """
+
+class BufferedIncrementalDecoder(IncrementalDecoder):
+    """
+    This subclass of IncrementalDecoder can be used as the baseclass for an
+    incremental decoder if the decoder must be able to handle incomplete byte
+    sequences.
+    """
+    def __init__(self, errors='strict'):
+        IncrementalDecoder.__init__(self, errors)
+        self.buffer = "" # undecoded input that is kept between calls to decode()
+
+    def _buffer_decode(self, input, errors, final):
+        # Overwrite this method in subclasses: It must decode input
+        # and return an (output, length consumed) tuple
+        raise NotImplementedError
+
+    def decode(self, input, final=False):
+        # decode input (taking the buffer into account)
+        data = self.buffer + input
+        (result, consumed) = self._buffer_decode(data, self.errors, final)
+        # keep undecoded input until the next call
+        self.buffer = data[consumed:]
+        return result
+
+    def reset(self):
+        IncrementalDecoder.reset(self)
+        self.bytebuffer = ""
+
 #
 # The StreamWriter and StreamReader class provide generic working
 # interfaces which can be used to implement new encoding submodules
@@ -666,8 +765,8 @@ def open(filename, mode='rb', encoding=None, errors='strict', buffering=1):
     file = __builtin__.open(filename, mode, buffering)
     if encoding is None:
         return file
-    (e, d, sr, sw) = lookup(encoding)
-    srw = StreamReaderWriter(file, sr, sw, errors)
+    info = lookup(encoding)
+    srw = StreamReaderWriter(file, info.streamreader, info.streamwriter, errors)
     # Add attributes to simplify introspection
     srw.encoding = encoding
     return srw
@@ -699,11 +798,9 @@ def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'):
     """
     if file_encoding is None:
         file_encoding = data_encoding
-    encode, decode = lookup(data_encoding)[:2]
-    Reader, Writer = lookup(file_encoding)[2:]
-    sr = StreamRecoder(file,
-                       encode, decode, Reader, Writer,
-                       errors)
+    info = lookup(data_encoding)
+    sr = StreamRecoder(file, info.encode, info.decode,
+                       info.streamreader, info.streamwriter, errors)
     # Add attributes to simplify introspection
     sr.data_encoding = data_encoding
     sr.file_encoding = file_encoding
@@ -719,7 +816,7 @@ def getencoder(encoding):
         Raises a LookupError in case the encoding cannot be found.
 
     """
-    return lookup(encoding)[0]
+    return lookup(encoding).encode
 
 def getdecoder(encoding):
 
@@ -729,7 +826,35 @@ def getdecoder(encoding):
         Raises a LookupError in case the encoding cannot be found.
 
     """
-    return lookup(encoding)[1]
+    return lookup(encoding).decode
+
+def getincrementalencoder(encoding):
+
+    """ Lookup up the codec for the given encoding and return
+        its IncrementalEncoder class or factory function.
+
+        Raises a LookupError in case the encoding cannot be found
+        or the codecs doesn't provide an incremental encoder.
+
+    """
+    encoder = lookup(encoding).incrementalencoder
+    if encoder is None:
+        raise LookupError(encoding)
+    return encoder
+
+def getincrementaldecoder(encoding):
+
+    """ Lookup up the codec for the given encoding and return
+        its IncrementalDecoder class or factory function.
+
+        Raises a LookupError in case the encoding cannot be found
+        or the codecs doesn't provide an incremental decoder.
+
+    """
+    decoder = lookup(encoding).incrementaldecoder
+    if decoder is None:
+        raise LookupError(encoding)
+    return decoder
 
 def getreader(encoding):
 
@@ -739,7 +864,7 @@ def getreader(encoding):
         Raises a LookupError in case the encoding cannot be found.
 
     """
-    return lookup(encoding)[2]
+    return lookup(encoding).streamreader
 
 def getwriter(encoding):
 
@@ -749,7 +874,43 @@ def getwriter(encoding):
         Raises a LookupError in case the encoding cannot be found.
 
     """
-    return lookup(encoding)[3]
+    return lookup(encoding).streamwriter
+
+def iterencode(iterator, encoding, errors='strict', **kwargs):
+    """
+    Encoding iterator.
+
+    Encodes the input strings from the iterator using a IncrementalEncoder.
+
+    errors and kwargs are passed through to the IncrementalEncoder
+    constructor.
+    """
+    encoder = getincrementalencoder(encoding)(errors, **kwargs)
+    for input in iterator:
+        output = encoder.encode(input)
+        if output:
+            yield output
+    output = encoder.encode("", True)
+    if output:
+        yield output
+
+def iterdecode(iterator, encoding, errors='strict', **kwargs):
+    """
+    Decoding iterator.
+
+    Decodes the input strings from the iterator using a IncrementalDecoder.
+
+    errors and kwargs are passed through to the IncrementalDecoder
+    constructor.
+    """
+    decoder = getincrementaldecoder(encoding)(errors, **kwargs)
+    for input in iterator:
+        output = decoder.decode(input)
+        if output:
+            yield output
+    output = decoder.decode("", True)
+    if output:
+        yield output
 
 ### Helpers for charmap-based codecs
author	Thomas Wouters <thomas@python.org>	2006-04-21 09:43:23 (GMT)
committer	Thomas Wouters <thomas@python.org>	2006-04-21 09:43:23 (GMT)
commit	a977329b6fb0e4c95cabb9043794de69b27a1099 (patch)
tree	b91552a0578639bd10181ab612039c1bed9bec27 /Lib/codecs.py
parent	d858f70617a9df8456e89a898ad8f97bd57c09f9 (diff)
download	cpython-a977329b6fb0e4c95cabb9043794de69b27a1099.zip cpython-a977329b6fb0e4c95cabb9043794de69b27a1099.tar.gz cpython-a977329b6fb0e4c95cabb9043794de69b27a1099.tar.bz2