summaryrefslogtreecommitdiffstats
path: root/Lib/encodings/punycode.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/encodings/punycode.py')
-rw-r--r--Lib/encodings/punycode.py57
1 files changed, 29 insertions, 28 deletions
diff --git a/Lib/encodings/punycode.py b/Lib/encodings/punycode.py
index 66c5101..d97200f 100644
--- a/Lib/encodings/punycode.py
+++ b/Lib/encodings/punycode.py
@@ -1,6 +1,7 @@
+# -*- coding: iso-8859-1 -*-
""" Codec for the Punicode encoding, as specified in RFC 3492
-Written by Martin v. Löwis.
+Written by Martin v. Löwis.
"""
import codecs
@@ -9,15 +10,16 @@ import codecs
def segregate(str):
"""3.1 Basic code point segregation"""
- base = bytearray()
- extended = set()
+ base = []
+ extended = {}
for c in str:
if ord(c) < 128:
- base.append(ord(c))
+ base.append(c)
else:
- extended.add(c)
- extended = sorted(extended)
- return bytes(base), extended
+ extended[c] = 1
+ extended = extended.keys()
+ extended.sort()
+ return "".join(base).encode("ascii"),extended
def selective_len(str, max):
"""Return the length of str, considering only characters below max."""
@@ -74,16 +76,16 @@ def T(j, bias):
if res > 26: return 26
return res
-digits = b"abcdefghijklmnopqrstuvwxyz0123456789"
+digits = "abcdefghijklmnopqrstuvwxyz0123456789"
def generate_generalized_integer(N, bias):
"""3.3 Generalized variable-length integers"""
- result = bytearray()
+ result = []
j = 0
while 1:
t = T(j, bias)
if N < t:
result.append(digits[N])
- return bytes(result)
+ return result
result.append(digits[t + ((N - t) % (36 - t))])
N = (N - t) // (36 - t)
j += 1
@@ -106,20 +108,21 @@ def adapt(delta, first, numchars):
def generate_integers(baselen, deltas):
"""3.4 Bias adaptation"""
# Punycode parameters: initial bias = 72, damp = 700, skew = 38
- result = bytearray()
+ result = []
bias = 72
for points, delta in enumerate(deltas):
s = generate_generalized_integer(delta, bias)
result.extend(s)
bias = adapt(delta, points==0, baselen+points+1)
- return bytes(result)
+ return "".join(result)
def punycode_encode(text):
base, extended = segregate(text)
+ base = base.encode("ascii")
deltas = insertion_unsort(text, extended)
extended = generate_integers(len(base), deltas)
if base:
- return base + b"-" + extended
+ return base + "-" + extended
return extended
##################### Decoding #####################################
@@ -134,7 +137,7 @@ def decode_generalized_number(extended, extpos, bias, errors):
char = ord(extended[extpos])
except IndexError:
if errors == "strict":
- raise UnicodeError("incomplete punicode string")
+ raise UnicodeError, "incomplete punicode string"
return extpos + 1, None
extpos += 1
if 0x41 <= char <= 0x5A: # A-Z
@@ -171,39 +174,37 @@ def insertion_sort(base, extended, errors):
char += pos // (len(base) + 1)
if char > 0x10FFFF:
if errors == "strict":
- raise UnicodeError("Invalid character U+%x" % char)
+ raise UnicodeError, ("Invalid character U+%x" % char)
char = ord('?')
pos = pos % (len(base) + 1)
- base = base[:pos] + chr(char) + base[pos:]
+ base = base[:pos] + unichr(char) + base[pos:]
bias = adapt(delta, (extpos == 0), len(base))
extpos = newpos
return base
def punycode_decode(text, errors):
- if isinstance(text, str):
- text = text.encode("ascii")
- if isinstance(text, memoryview):
- text = bytes(text)
- pos = text.rfind(b"-")
+ pos = text.rfind("-")
if pos == -1:
base = ""
- extended = str(text, "ascii").upper()
+ extended = text
else:
- base = str(text[:pos], "ascii", errors)
- extended = str(text[pos+1:], "ascii").upper()
+ base = text[:pos]
+ extended = text[pos+1:]
+ base = unicode(base, "ascii", errors)
+ extended = extended.upper()
return insertion_sort(base, extended, errors)
### Codec APIs
class Codec(codecs.Codec):
- def encode(self, input, errors='strict'):
+ def encode(self,input,errors='strict'):
res = punycode_encode(input)
return res, len(input)
- def decode(self, input, errors='strict'):
+ def decode(self,input,errors='strict'):
if errors not in ('strict', 'replace', 'ignore'):
- raise UnicodeError("Unsupported error handling "+errors)
+ raise UnicodeError, "Unsupported error handling "+errors
res = punycode_decode(input, errors)
return res, len(input)
@@ -214,7 +215,7 @@ class IncrementalEncoder(codecs.IncrementalEncoder):
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
if self.errors not in ('strict', 'replace', 'ignore'):
- raise UnicodeError("Unsupported error handling "+self.errors)
+ raise UnicodeError, "Unsupported error handling "+self.errors
return punycode_decode(input, self.errors)
class StreamWriter(Codec,codecs.StreamWriter):