diff options
author | Walter Dörwald <walter@livinglogic.de> | 2006-04-14 18:25:39 (GMT) |
---|---|---|
committer | Walter Dörwald <walter@livinglogic.de> | 2006-04-14 18:25:39 (GMT) |
commit | 78a0be6ab373680335e12cb76b3d811afbae32d0 (patch) | |
tree | 2d6e525b7c7abd9ba026eea52a07348f3daa8fa5 /Lib/encodings | |
parent | a40cf31de67c51bae91a897bd007fa36d6d5daf9 (diff) | |
download | cpython-78a0be6ab373680335e12cb76b3d811afbae32d0.zip cpython-78a0be6ab373680335e12cb76b3d811afbae32d0.tar.gz cpython-78a0be6ab373680335e12cb76b3d811afbae32d0.tar.bz2 |
Add a BufferedIncrementalEncoder class that can be used for implementing
an incremental encoder that must retain part of the data between calls
to the encode() method.
Fix the incremental encoder and decoder for the IDNA encoding.
This closes SF patch #1453235.
Diffstat (limited to 'Lib/encodings')
-rw-r--r-- | Lib/encodings/idna.py | 78 |
1 files changed, 72 insertions, 6 deletions
diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py index 1aa4e96..ea90d67 100644 --- a/Lib/encodings/idna.py +++ b/Lib/encodings/idna.py @@ -194,13 +194,79 @@ class Codec(codecs.Codec): return u".".join(result)+trailing_dot, len(input) -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return Codec().encode(input, self.errors)[0] +class IncrementalEncoder(codecs.BufferedIncrementalEncoder): + def _buffer_encode(self, input, errors, final): + if errors != 'strict': + # IDNA is quite clear that implementations must be strict + raise UnicodeError("unsupported error handling "+errors) + + if not input: + return ("", 0) + + labels = dots.split(input) + trailing_dot = u'' + if labels: + if not labels[-1]: + trailing_dot = '.' + del labels[-1] + elif not final: + # Keep potentially unfinished label until the next call + del labels[-1] + if labels: + trailing_dot = '.' + + result = [] + size = 0 + for label in labels: + result.append(ToASCII(label)) + if size: + size += 1 + size += len(label) + + # Join with U+002E + result = ".".join(result) + trailing_dot + size += len(trailing_dot) + return (result, size) + +class IncrementalDecoder(codecs.BufferedIncrementalDecoder): + def _buffer_decode(self, input, errors, final): + if errors != 'strict': + raise UnicodeError("Unsupported error handling "+errors) + + if not input: + return (u"", 0) + + # IDNA allows decoding to operate on Unicode strings, too. + if isinstance(input, unicode): + labels = dots.split(input) + else: + # Must be ASCII string + input = str(input) + unicode(input, "ascii") + labels = input.split(".") + + trailing_dot = u'' + if labels: + if not labels[-1]: + trailing_dot = u'.' + del labels[-1] + elif not final: + # Keep potentially unfinished label until the next call + del labels[-1] + if labels: + trailing_dot = u'.' + + result = [] + size = 0 + for label in labels: + result.append(ToUnicode(label)) + if size: + size += 1 + size += len(label) -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return Codec().decode(input, self.errors)[0] + result = u".".join(result) + trailing_dot + size += len(trailing_dot) + return (result, size) class StreamWriter(Codec,codecs.StreamWriter): pass |