diff options
author | Walter Dörwald <walter@livinglogic.de> | 2007-05-10 12:36:25 (GMT) |
---|---|---|
committer | Walter Dörwald <walter@livinglogic.de> | 2007-05-10 12:36:25 (GMT) |
commit | a4c612845aceed4a9f1ef25328b0cfa39d5038ca (patch) | |
tree | d9d33f02109ec7f1197f2c6c1767ec37915c45b5 /Lib/encodings | |
parent | 5c4501af57b22bb2f651b49a4bc49ccc5045c686 (diff) | |
download | cpython-a4c612845aceed4a9f1ef25328b0cfa39d5038ca.zip cpython-a4c612845aceed4a9f1ef25328b0cfa39d5038ca.tar.gz cpython-a4c612845aceed4a9f1ef25328b0cfa39d5038ca.tar.bz2 |
Fix punycode codec and tests.
Diffstat (limited to 'Lib/encodings')
-rw-r--r-- | Lib/encodings/punycode.py | 33 |
1 files changed, 15 insertions, 18 deletions
diff --git a/Lib/encodings/punycode.py b/Lib/encodings/punycode.py index 89906ae..9d7df10 100644 --- a/Lib/encodings/punycode.py +++ b/Lib/encodings/punycode.py @@ -10,15 +10,15 @@ import codecs def segregate(str): """3.1 Basic code point segregation""" - base = [] - extended = {} + base = b"" + extended = set() for c in str: if ord(c) < 128: - base.append(c) + base.append(ord(c)) else: - extended[c] = 1 - extended = sorted(extended.keys()) - return "".join(base).encode("ascii"),extended + extended.add(c) + extended = sorted(extended) + return (base, extended) def selective_len(str, max): """Return the length of str, considering only characters below max.""" @@ -75,10 +75,10 @@ def T(j, bias): if res > 26: return 26 return res -digits = "abcdefghijklmnopqrstuvwxyz0123456789" +digits = b"abcdefghijklmnopqrstuvwxyz0123456789" def generate_generalized_integer(N, bias): """3.3 Generalized variable-length integers""" - result = [] + result = b"" j = 0 while 1: t = T(j, bias) @@ -107,21 +107,20 @@ def adapt(delta, first, numchars): def generate_integers(baselen, deltas): """3.4 Bias adaptation""" # Punycode parameters: initial bias = 72, damp = 700, skew = 38 - result = [] + result = b"" bias = 72 for points, delta in enumerate(deltas): s = generate_generalized_integer(delta, bias) result.extend(s) bias = adapt(delta, points==0, baselen+points+1) - return "".join(result) + return result def punycode_encode(text): base, extended = segregate(text) - base = base.encode("ascii") deltas = insertion_unsort(text, extended) extended = generate_integers(len(base), deltas) if base: - return base + "-" + extended + return base + b"-" + extended return extended ##################### Decoding ##################################### @@ -182,15 +181,13 @@ def insertion_sort(base, extended, errors): return base def punycode_decode(text, errors): - pos = text.rfind("-") + pos = text.rfind(b"-") if pos == -1: base = "" - extended = text + extended = str(text, "ascii").upper() else: - base = text[:pos] - extended = text[pos+1:] - base = str(base, "ascii", errors) - extended = extended.upper() + base = str(text[:pos], "ascii", errors) + extended = str(text[pos+1:], "ascii").upper() return insertion_sort(base, extended, errors) ### Codec APIs |