diff options
author | Walter Dörwald <walter@livinglogic.de> | 2007-05-11 10:32:57 (GMT) |
---|---|---|
committer | Walter Dörwald <walter@livinglogic.de> | 2007-05-11 10:32:57 (GMT) |
commit | 0ac30f82fe1beb4e0255d06c693ccfba56e45a9f (patch) | |
tree | 1795d671685687ef172c7f4d57290292cdf06879 /Lib/encodings | |
parent | 1f05a3b7fb754d6b30300e1e50aeb92aabe6afd6 (diff) | |
download | cpython-0ac30f82fe1beb4e0255d06c693ccfba56e45a9f.zip cpython-0ac30f82fe1beb4e0255d06c693ccfba56e45a9f.tar.gz cpython-0ac30f82fe1beb4e0255d06c693ccfba56e45a9f.tar.bz2 |
Enhance the punycode decoder so that it can decode
unicode objects.
Fix the idna codec and the tests.
Diffstat (limited to 'Lib/encodings')
-rw-r--r-- | Lib/encodings/idna.py | 43 | ||||
-rw-r--r-- | Lib/encodings/punycode.py | 6 |
2 files changed, 27 insertions, 22 deletions
diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py index 5c3d056..55e1643 100644 --- a/Lib/encodings/idna.py +++ b/Lib/encodings/idna.py @@ -7,7 +7,8 @@ from unicodedata import ucd_3_2_0 as unicodedata dots = re.compile("[\u002E\u3002\uFF0E\uFF61]") # IDNA section 5 -ace_prefix = "xn--" +ace_prefix = b"xn--" +sace_prefix = "xn--" # This assumes query strings, so AllowUnassigned is true def nameprep(label): @@ -87,7 +88,7 @@ def ToASCII(label): raise UnicodeError("label empty or too long") # Step 5: Check ACE prefix - if label.startswith(ace_prefix): + if label.startswith(sace_prefix): raise UnicodeError("Label starts with ACE prefix") # Step 6: Encode with PUNYCODE @@ -134,7 +135,7 @@ def ToUnicode(label): # Step 7: Compare the result of step 6 with the one of step 3 # label2 will already be in lower case. - if label.lower() != label2: + if str(label, "ascii").lower() != str(label2, "ascii"): raise UnicodeError("IDNA does not round-trip", label, label2) # Step 8: return the result of step 5 @@ -143,7 +144,7 @@ def ToUnicode(label): ### Codec APIs class Codec(codecs.Codec): - def encode(self,input,errors='strict'): + def encode(self, input, errors='strict'): if errors != 'strict': # IDNA is quite clear that implementations must be strict @@ -152,19 +153,21 @@ class Codec(codecs.Codec): if not input: return b"", 0 - result = [] + result = b"" labels = dots.split(input) - if labels and len(labels[-1])==0: + if labels and not labels[-1]: trailing_dot = b'.' del labels[-1] else: trailing_dot = b'' for label in labels: - result.append(ToASCII(label)) - # Join with U+002E - return b".".join(result)+trailing_dot, len(input) + if result: + # Join with U+002E + result.extend(b'.') + result.extend(ToASCII(label)) + return result+trailing_dot, len(input) - def decode(self,input,errors='strict'): + def decode(self, input, errors='strict'): if errors != 'strict': raise UnicodeError("Unsupported error handling "+errors) @@ -199,30 +202,31 @@ class IncrementalEncoder(codecs.BufferedIncrementalEncoder): raise UnicodeError("unsupported error handling "+errors) if not input: - return ("", 0) + return (b'', 0) labels = dots.split(input) - trailing_dot = '' + trailing_dot = b'' if labels: if not labels[-1]: - trailing_dot = '.' + trailing_dot = b'.' del labels[-1] elif not final: # Keep potentially unfinished label until the next call del labels[-1] if labels: - trailing_dot = '.' + trailing_dot = b'.' - result = [] + result = b"" size = 0 for label in labels: - result.append(ToASCII(label)) if size: + # Join with U+002E + result.extend(b'.') size += 1 + result.extend(ToASCII(label)) size += len(label) - # Join with U+002E - result = ".".join(result) + trailing_dot + result += trailing_dot size += len(trailing_dot) return (result, size) @@ -239,8 +243,7 @@ class IncrementalDecoder(codecs.BufferedIncrementalDecoder): labels = dots.split(input) else: # Must be ASCII string - input = str(input) - str(input, "ascii") + input = str(input, "ascii") labels = input.split(".") trailing_dot = '' diff --git a/Lib/encodings/punycode.py b/Lib/encodings/punycode.py index 9d7df10..4c22fe5 100644 --- a/Lib/encodings/punycode.py +++ b/Lib/encodings/punycode.py @@ -181,6 +181,8 @@ def insertion_sort(base, extended, errors): return base def punycode_decode(text, errors): + if isinstance(text, str): + text = text.encode("ascii") pos = text.rfind(b"-") if pos == -1: base = "" @@ -194,11 +196,11 @@ def punycode_decode(text, errors): class Codec(codecs.Codec): - def encode(self,input,errors='strict'): + def encode(self, input, errors='strict'): res = punycode_encode(input) return res, len(input) - def decode(self,input,errors='strict'): + def decode(self, input, errors='strict'): if errors not in ('strict', 'replace', 'ignore'): raise UnicodeError, "Unsupported error handling "+errors res = punycode_decode(input, errors) |