diff options
Diffstat (limited to 'Lib/encodings/punycode.py')
-rw-r--r-- | Lib/encodings/punycode.py | 57 |
1 files changed, 29 insertions, 28 deletions
diff --git a/Lib/encodings/punycode.py b/Lib/encodings/punycode.py index 66c5101..d97200f 100644 --- a/Lib/encodings/punycode.py +++ b/Lib/encodings/punycode.py @@ -1,6 +1,7 @@ +# -*- coding: iso-8859-1 -*- """ Codec for the Punicode encoding, as specified in RFC 3492 -Written by Martin v. Löwis. +Written by Martin v. Löwis. """ import codecs @@ -9,15 +10,16 @@ import codecs def segregate(str): """3.1 Basic code point segregation""" - base = bytearray() - extended = set() + base = [] + extended = {} for c in str: if ord(c) < 128: - base.append(ord(c)) + base.append(c) else: - extended.add(c) - extended = sorted(extended) - return bytes(base), extended + extended[c] = 1 + extended = extended.keys() + extended.sort() + return "".join(base).encode("ascii"),extended def selective_len(str, max): """Return the length of str, considering only characters below max.""" @@ -74,16 +76,16 @@ def T(j, bias): if res > 26: return 26 return res -digits = b"abcdefghijklmnopqrstuvwxyz0123456789" +digits = "abcdefghijklmnopqrstuvwxyz0123456789" def generate_generalized_integer(N, bias): """3.3 Generalized variable-length integers""" - result = bytearray() + result = [] j = 0 while 1: t = T(j, bias) if N < t: result.append(digits[N]) - return bytes(result) + return result result.append(digits[t + ((N - t) % (36 - t))]) N = (N - t) // (36 - t) j += 1 @@ -106,20 +108,21 @@ def adapt(delta, first, numchars): def generate_integers(baselen, deltas): """3.4 Bias adaptation""" # Punycode parameters: initial bias = 72, damp = 700, skew = 38 - result = bytearray() + result = [] bias = 72 for points, delta in enumerate(deltas): s = generate_generalized_integer(delta, bias) result.extend(s) bias = adapt(delta, points==0, baselen+points+1) - return bytes(result) + return "".join(result) def punycode_encode(text): base, extended = segregate(text) + base = base.encode("ascii") deltas = insertion_unsort(text, extended) extended = generate_integers(len(base), deltas) if base: - return base + b"-" + extended + return base + "-" + extended return extended ##################### Decoding ##################################### @@ -134,7 +137,7 @@ def decode_generalized_number(extended, extpos, bias, errors): char = ord(extended[extpos]) except IndexError: if errors == "strict": - raise UnicodeError("incomplete punicode string") + raise UnicodeError, "incomplete punicode string" return extpos + 1, None extpos += 1 if 0x41 <= char <= 0x5A: # A-Z @@ -171,39 +174,37 @@ def insertion_sort(base, extended, errors): char += pos // (len(base) + 1) if char > 0x10FFFF: if errors == "strict": - raise UnicodeError("Invalid character U+%x" % char) + raise UnicodeError, ("Invalid character U+%x" % char) char = ord('?') pos = pos % (len(base) + 1) - base = base[:pos] + chr(char) + base[pos:] + base = base[:pos] + unichr(char) + base[pos:] bias = adapt(delta, (extpos == 0), len(base)) extpos = newpos return base def punycode_decode(text, errors): - if isinstance(text, str): - text = text.encode("ascii") - if isinstance(text, memoryview): - text = bytes(text) - pos = text.rfind(b"-") + pos = text.rfind("-") if pos == -1: base = "" - extended = str(text, "ascii").upper() + extended = text else: - base = str(text[:pos], "ascii", errors) - extended = str(text[pos+1:], "ascii").upper() + base = text[:pos] + extended = text[pos+1:] + base = unicode(base, "ascii", errors) + extended = extended.upper() return insertion_sort(base, extended, errors) ### Codec APIs class Codec(codecs.Codec): - def encode(self, input, errors='strict'): + def encode(self,input,errors='strict'): res = punycode_encode(input) return res, len(input) - def decode(self, input, errors='strict'): + def decode(self,input,errors='strict'): if errors not in ('strict', 'replace', 'ignore'): - raise UnicodeError("Unsupported error handling "+errors) + raise UnicodeError, "Unsupported error handling "+errors res = punycode_decode(input, errors) return res, len(input) @@ -214,7 +215,7 @@ class IncrementalEncoder(codecs.IncrementalEncoder): class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): if self.errors not in ('strict', 'replace', 'ignore'): - raise UnicodeError("Unsupported error handling "+self.errors) + raise UnicodeError, "Unsupported error handling "+self.errors return punycode_decode(input, self.errors) class StreamWriter(Codec,codecs.StreamWriter): |