summaryrefslogtreecommitdiffstats
path: root/Lib/encodings/idna.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/encodings/idna.py')
-rw-r--r--Lib/encodings/idna.py117
1 files changed, 48 insertions, 69 deletions
diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py
index ea40585..ea90d67 100644
--- a/Lib/encodings/idna.py
+++ b/Lib/encodings/idna.py
@@ -4,11 +4,11 @@ import stringprep, re, codecs
from unicodedata import ucd_3_2_0 as unicodedata
# IDNA section 3.1
-dots = re.compile("[\u002E\u3002\uFF0E\uFF61]")
+dots = re.compile(u"[\u002E\u3002\uFF0E\uFF61]")
# IDNA section 5
-ace_prefix = b"xn--"
-sace_prefix = "xn--"
+ace_prefix = "xn--"
+uace_prefix = unicode(ace_prefix, "ascii")
# This assumes query strings, so AllowUnassigned is true
def nameprep(label):
@@ -19,7 +19,7 @@ def nameprep(label):
# Map to nothing
continue
newlabel.append(stringprep.map_table_b2(c))
- label = "".join(newlabel)
+ label = u"".join(newlabel)
# Normalize
label = unicodedata.normalize("NFKC", label)
@@ -38,7 +38,7 @@ def nameprep(label):
raise UnicodeError("Invalid character %r" % c)
# Check bidi
- RandAL = [stringprep.in_table_d1(x) for x in label]
+ RandAL = map(stringprep.in_table_d1, label)
for c in RandAL:
if c:
# There is a RandAL char in the string. Must perform further
@@ -47,7 +47,7 @@ def nameprep(label):
# This is table C.8, which was already checked
# 2) If a string contains any RandALCat character, the string
# MUST NOT contain any LCat character.
- if any(stringprep.in_table_d2(x) for x in label):
+ if filter(stringprep.in_table_d2, label):
raise UnicodeError("Violation of BIDI requirement 2")
# 3) If a string contains any RandALCat character, a
@@ -88,7 +88,7 @@ def ToASCII(label):
raise UnicodeError("label empty or too long")
# Step 5: Check ACE prefix
- if label.startswith(sace_prefix):
+ if label.startswith(uace_prefix):
raise UnicodeError("Label starts with ACE prefix")
# Step 6: Encode with PUNYCODE
@@ -104,7 +104,7 @@ def ToASCII(label):
def ToUnicode(label):
# Step 1: Check for ASCII
- if isinstance(label, bytes):
+ if isinstance(label, str):
pure_ascii = True
else:
try:
@@ -122,7 +122,7 @@ def ToUnicode(label):
raise UnicodeError("Invalid character in IDN label")
# Step 3: Check for ACE prefix
if not label.startswith(ace_prefix):
- return str(label, "ascii")
+ return unicode(label, "ascii")
# Step 4: Remove ACE prefix
label1 = label[len(ace_prefix):]
@@ -135,7 +135,7 @@ def ToUnicode(label):
# Step 7: Compare the result of step 6 with the one of step 3
# label2 will already be in lower case.
- if str(label, "ascii").lower() != str(label2, "ascii"):
+ if label.lower() != label2:
raise UnicodeError("IDNA does not round-trip", label, label2)
# Step 8: return the result of step 5
@@ -144,76 +144,55 @@ def ToUnicode(label):
### Codec APIs
class Codec(codecs.Codec):
- def encode(self, input, errors='strict'):
+ def encode(self,input,errors='strict'):
if errors != 'strict':
# IDNA is quite clear that implementations must be strict
raise UnicodeError("unsupported error handling "+errors)
if not input:
- return b'', 0
+ return "", 0
- try:
- result = input.encode('ascii')
- except UnicodeEncodeError:
- pass
- else:
- # ASCII name: fast path
- labels = result.split(b'.')
- for label in labels[:-1]:
- if not (0 < len(label) < 64):
- raise UnicodeError("label empty or too long")
- if len(labels[-1]) >= 64:
- raise UnicodeError("label too long")
- return result, len(input)
-
- result = bytearray()
+ result = []
labels = dots.split(input)
- if labels and not labels[-1]:
- trailing_dot = b'.'
+ if labels and len(labels[-1])==0:
+ trailing_dot = '.'
del labels[-1]
else:
- trailing_dot = b''
+ trailing_dot = ''
for label in labels:
- if result:
- # Join with U+002E
- result.extend(b'.')
- result.extend(ToASCII(label))
- return bytes(result+trailing_dot), len(input)
+ result.append(ToASCII(label))
+ # Join with U+002E
+ return ".".join(result)+trailing_dot, len(input)
- def decode(self, input, errors='strict'):
+ def decode(self,input,errors='strict'):
if errors != 'strict':
raise UnicodeError("Unsupported error handling "+errors)
if not input:
- return "", 0
+ return u"", 0
# IDNA allows decoding to operate on Unicode strings, too.
- if not isinstance(input, bytes):
- # XXX obviously wrong, see #3232
- input = bytes(input)
-
- if ace_prefix not in input:
- # Fast path
- try:
- return input.decode('ascii'), len(input)
- except UnicodeDecodeError:
- pass
-
- labels = input.split(b".")
+ if isinstance(input, unicode):
+ labels = dots.split(input)
+ else:
+ # Must be ASCII string
+ input = str(input)
+ unicode(input, "ascii")
+ labels = input.split(".")
if labels and len(labels[-1]) == 0:
- trailing_dot = '.'
+ trailing_dot = u'.'
del labels[-1]
else:
- trailing_dot = ''
+ trailing_dot = u''
result = []
for label in labels:
result.append(ToUnicode(label))
- return ".".join(result)+trailing_dot, len(input)
+ return u".".join(result)+trailing_dot, len(input)
class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
def _buffer_encode(self, input, errors, final):
@@ -222,33 +201,32 @@ class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
raise UnicodeError("unsupported error handling "+errors)
if not input:
- return (b'', 0)
+ return ("", 0)
labels = dots.split(input)
- trailing_dot = b''
+ trailing_dot = u''
if labels:
if not labels[-1]:
- trailing_dot = b'.'
+ trailing_dot = '.'
del labels[-1]
elif not final:
# Keep potentially unfinished label until the next call
del labels[-1]
if labels:
- trailing_dot = b'.'
+ trailing_dot = '.'
- result = bytearray()
+ result = []
size = 0
for label in labels:
+ result.append(ToASCII(label))
if size:
- # Join with U+002E
- result.extend(b'.')
size += 1
- result.extend(ToASCII(label))
size += len(label)
- result += trailing_dot
+ # Join with U+002E
+ result = ".".join(result) + trailing_dot
size += len(trailing_dot)
- return (bytes(result), size)
+ return (result, size)
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
def _buffer_decode(self, input, errors, final):
@@ -256,26 +234,27 @@ class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
raise UnicodeError("Unsupported error handling "+errors)
if not input:
- return ("", 0)
+ return (u"", 0)
# IDNA allows decoding to operate on Unicode strings, too.
- if isinstance(input, str):
+ if isinstance(input, unicode):
labels = dots.split(input)
else:
# Must be ASCII string
- input = str(input, "ascii")
+ input = str(input)
+ unicode(input, "ascii")
labels = input.split(".")
- trailing_dot = ''
+ trailing_dot = u''
if labels:
if not labels[-1]:
- trailing_dot = '.'
+ trailing_dot = u'.'
del labels[-1]
elif not final:
# Keep potentially unfinished label until the next call
del labels[-1]
if labels:
- trailing_dot = '.'
+ trailing_dot = u'.'
result = []
size = 0
@@ -285,7 +264,7 @@ class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
size += 1
size += len(label)
- result = ".".join(result) + trailing_dot
+ result = u".".join(result) + trailing_dot
size += len(trailing_dot)
return (result, size)