summaryrefslogtreecommitdiffstats
path: root/Lib/encodings
diff options
context:
space:
mode:
authorWalter Dörwald <walter@livinglogic.de>2007-05-11 10:32:57 (GMT)
committerWalter Dörwald <walter@livinglogic.de>2007-05-11 10:32:57 (GMT)
commit0ac30f82fe1beb4e0255d06c693ccfba56e45a9f (patch)
tree1795d671685687ef172c7f4d57290292cdf06879 /Lib/encodings
parent1f05a3b7fb754d6b30300e1e50aeb92aabe6afd6 (diff)
downloadcpython-0ac30f82fe1beb4e0255d06c693ccfba56e45a9f.zip
cpython-0ac30f82fe1beb4e0255d06c693ccfba56e45a9f.tar.gz
cpython-0ac30f82fe1beb4e0255d06c693ccfba56e45a9f.tar.bz2
Enhance the punycode decoder so that it can decode
unicode objects. Fix the idna codec and the tests.
Diffstat (limited to 'Lib/encodings')
-rw-r--r--Lib/encodings/idna.py43
-rw-r--r--Lib/encodings/punycode.py6
2 files changed, 27 insertions, 22 deletions
diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py
index 5c3d056..55e1643 100644
--- a/Lib/encodings/idna.py
+++ b/Lib/encodings/idna.py
@@ -7,7 +7,8 @@ from unicodedata import ucd_3_2_0 as unicodedata
dots = re.compile("[\u002E\u3002\uFF0E\uFF61]")
# IDNA section 5
-ace_prefix = "xn--"
+ace_prefix = b"xn--"
+sace_prefix = "xn--"
# This assumes query strings, so AllowUnassigned is true
def nameprep(label):
@@ -87,7 +88,7 @@ def ToASCII(label):
raise UnicodeError("label empty or too long")
# Step 5: Check ACE prefix
- if label.startswith(ace_prefix):
+ if label.startswith(sace_prefix):
raise UnicodeError("Label starts with ACE prefix")
# Step 6: Encode with PUNYCODE
@@ -134,7 +135,7 @@ def ToUnicode(label):
# Step 7: Compare the result of step 6 with the one of step 3
# label2 will already be in lower case.
- if label.lower() != label2:
+ if str(label, "ascii").lower() != str(label2, "ascii"):
raise UnicodeError("IDNA does not round-trip", label, label2)
# Step 8: return the result of step 5
@@ -143,7 +144,7 @@ def ToUnicode(label):
### Codec APIs
class Codec(codecs.Codec):
- def encode(self,input,errors='strict'):
+ def encode(self, input, errors='strict'):
if errors != 'strict':
# IDNA is quite clear that implementations must be strict
@@ -152,19 +153,21 @@ class Codec(codecs.Codec):
if not input:
return b"", 0
- result = []
+ result = b""
labels = dots.split(input)
- if labels and len(labels[-1])==0:
+ if labels and not labels[-1]:
trailing_dot = b'.'
del labels[-1]
else:
trailing_dot = b''
for label in labels:
- result.append(ToASCII(label))
- # Join with U+002E
- return b".".join(result)+trailing_dot, len(input)
+ if result:
+ # Join with U+002E
+ result.extend(b'.')
+ result.extend(ToASCII(label))
+ return result+trailing_dot, len(input)
- def decode(self,input,errors='strict'):
+ def decode(self, input, errors='strict'):
if errors != 'strict':
raise UnicodeError("Unsupported error handling "+errors)
@@ -199,30 +202,31 @@ class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
raise UnicodeError("unsupported error handling "+errors)
if not input:
- return ("", 0)
+ return (b'', 0)
labels = dots.split(input)
- trailing_dot = ''
+ trailing_dot = b''
if labels:
if not labels[-1]:
- trailing_dot = '.'
+ trailing_dot = b'.'
del labels[-1]
elif not final:
# Keep potentially unfinished label until the next call
del labels[-1]
if labels:
- trailing_dot = '.'
+ trailing_dot = b'.'
- result = []
+ result = b""
size = 0
for label in labels:
- result.append(ToASCII(label))
if size:
+ # Join with U+002E
+ result.extend(b'.')
size += 1
+ result.extend(ToASCII(label))
size += len(label)
- # Join with U+002E
- result = ".".join(result) + trailing_dot
+ result += trailing_dot
size += len(trailing_dot)
return (result, size)
@@ -239,8 +243,7 @@ class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
labels = dots.split(input)
else:
# Must be ASCII string
- input = str(input)
- str(input, "ascii")
+ input = str(input, "ascii")
labels = input.split(".")
trailing_dot = ''
diff --git a/Lib/encodings/punycode.py b/Lib/encodings/punycode.py
index 9d7df10..4c22fe5 100644
--- a/Lib/encodings/punycode.py
+++ b/Lib/encodings/punycode.py
@@ -181,6 +181,8 @@ def insertion_sort(base, extended, errors):
return base
def punycode_decode(text, errors):
+ if isinstance(text, str):
+ text = text.encode("ascii")
pos = text.rfind(b"-")
if pos == -1:
base = ""
@@ -194,11 +196,11 @@ def punycode_decode(text, errors):
class Codec(codecs.Codec):
- def encode(self,input,errors='strict'):
+ def encode(self, input, errors='strict'):
res = punycode_encode(input)
return res, len(input)
- def decode(self,input,errors='strict'):
+ def decode(self, input, errors='strict'):
if errors not in ('strict', 'replace', 'ignore'):
raise UnicodeError, "Unsupported error handling "+errors
res = punycode_decode(input, errors)