summaryrefslogtreecommitdiffstats
path: root/Lib/encodings
diff options
context:
space:
mode:
authorWalter Dörwald <walter@livinglogic.de>2007-05-10 12:36:25 (GMT)
committerWalter Dörwald <walter@livinglogic.de>2007-05-10 12:36:25 (GMT)
commita4c612845aceed4a9f1ef25328b0cfa39d5038ca (patch)
treed9d33f02109ec7f1197f2c6c1767ec37915c45b5 /Lib/encodings
parent5c4501af57b22bb2f651b49a4bc49ccc5045c686 (diff)
downloadcpython-a4c612845aceed4a9f1ef25328b0cfa39d5038ca.zip
cpython-a4c612845aceed4a9f1ef25328b0cfa39d5038ca.tar.gz
cpython-a4c612845aceed4a9f1ef25328b0cfa39d5038ca.tar.bz2
Fix punycode codec and tests.
Diffstat (limited to 'Lib/encodings')
-rw-r--r--Lib/encodings/punycode.py33
1 files changed, 15 insertions, 18 deletions
diff --git a/Lib/encodings/punycode.py b/Lib/encodings/punycode.py
index 89906ae..9d7df10 100644
--- a/Lib/encodings/punycode.py
+++ b/Lib/encodings/punycode.py
@@ -10,15 +10,15 @@ import codecs
def segregate(str):
"""3.1 Basic code point segregation"""
- base = []
- extended = {}
+ base = b""
+ extended = set()
for c in str:
if ord(c) < 128:
- base.append(c)
+ base.append(ord(c))
else:
- extended[c] = 1
- extended = sorted(extended.keys())
- return "".join(base).encode("ascii"),extended
+ extended.add(c)
+ extended = sorted(extended)
+ return (base, extended)
def selective_len(str, max):
"""Return the length of str, considering only characters below max."""
@@ -75,10 +75,10 @@ def T(j, bias):
if res > 26: return 26
return res
-digits = "abcdefghijklmnopqrstuvwxyz0123456789"
+digits = b"abcdefghijklmnopqrstuvwxyz0123456789"
def generate_generalized_integer(N, bias):
"""3.3 Generalized variable-length integers"""
- result = []
+ result = b""
j = 0
while 1:
t = T(j, bias)
@@ -107,21 +107,20 @@ def adapt(delta, first, numchars):
def generate_integers(baselen, deltas):
"""3.4 Bias adaptation"""
# Punycode parameters: initial bias = 72, damp = 700, skew = 38
- result = []
+ result = b""
bias = 72
for points, delta in enumerate(deltas):
s = generate_generalized_integer(delta, bias)
result.extend(s)
bias = adapt(delta, points==0, baselen+points+1)
- return "".join(result)
+ return result
def punycode_encode(text):
base, extended = segregate(text)
- base = base.encode("ascii")
deltas = insertion_unsort(text, extended)
extended = generate_integers(len(base), deltas)
if base:
- return base + "-" + extended
+ return base + b"-" + extended
return extended
##################### Decoding #####################################
@@ -182,15 +181,13 @@ def insertion_sort(base, extended, errors):
return base
def punycode_decode(text, errors):
- pos = text.rfind("-")
+ pos = text.rfind(b"-")
if pos == -1:
base = ""
- extended = text
+ extended = str(text, "ascii").upper()
else:
- base = text[:pos]
- extended = text[pos+1:]
- base = str(base, "ascii", errors)
- extended = extended.upper()
+ base = str(text[:pos], "ascii", errors)
+ extended = str(text[pos+1:], "ascii").upper()
return insertion_sort(base, extended, errors)
### Codec APIs