Enhance the punycode decoder so that it can decode

unicode objects. Fix the idna codec and the tests.

Enhance the punycode decoder so that it can decode
unicode objects. Fix the idna codec and the tests.
0ac30f82 · Walter Dörwald · 1f05a3b7 · 0ac30f82 · 0ac30f82 · 0ac30f82
Kaydet (Commit) 0ac30f82 authored May 11, 2007 tarafından Walter Dörwald
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 27 additions and 22 deletions

idna.py Lib/encodings/idna.py +23 -20

punycode.py Lib/encodings/punycode.py +4 -2

test_codecs.py Lib/test/test_codecs.py +0 -0

No files found.
--- a/Lib/encodings/idna.py
+++ b/Lib/encodings/idna.py
@@ -7,7 +7,8 @@ from unicodedata import ucd_3_2_0 as unicodedata
 dots = re.compile("[\u002E\u3002\uFF0E\uFF61]")
 # IDNA section 5
-ace_prefix = "xn--"
+ace_prefix = b"xn--"
+sace_prefix = "xn--"
 # This assumes query strings, so AllowUnassigned is true
 def nameprep(label):
@@ -87,7 +88,7 @@ def ToASCII(label):
        raise UnicodeError("label empty or too long")
    # Step 5: Check ACE prefix
-    if label.startswith(ace_prefix):
+    if label.startswith(sace_prefix):
        raise UnicodeError("Label starts with ACE prefix")
    # Step 6: Encode with PUNYCODE
@@ -134,7 +135,7 @@ def ToUnicode(label):
    # Step 7: Compare the result of step 6 with the one of step 3
    # label2 will already be in lower case.
-    if label.lower() != label2:
+    if str(label, "ascii").lower() != str(label2, "ascii"):
        raise UnicodeError("IDNA does not round-trip", label, label2)
    # Step 8: return the result of step 5
@@ -143,7 +144,7 @@ def ToUnicode(label):
 ### Codec APIs
 class Codec(codecs.Codec):
-    def encode(self,input,errors='strict'):
+    def encode(self, input, errors='strict'):
        if errors != 'strict':
            # IDNA is quite clear that implementations must be strict
@@ -152,19 +153,21 @@ class Codec(codecs.Codec):
        if not input:
            return b"", 0
-        result = []
+        result = b""
        labels = dots.split(input)
-        if labels and len(labels[-1])==0:
+        if labels and not labels[-1]:
            trailing_dot = b'.'
            del labels[-1]
        else:
            trailing_dot = b''
        for label in labels:
-            result.append(ToASCII(label))
+            if result:
-        # Join with U+002E
+                # Join with U+002E
-        return b".".join(result)+trailing_dot, len(input)
+                result.extend(b'.')
+            result.extend(ToASCII(label))
+        return result+trailing_dot, len(input)
-    def decode(self,input,errors='strict'):
+    def decode(self, input, errors='strict'):
        if errors != 'strict':
            raise UnicodeError("Unsupported error handling "+errors)
@@ -199,30 +202,31 @@ class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
            raise UnicodeError("unsupported error handling "+errors)
        if not input:
-            return ("", 0)
+            return (b'', 0)
        labels = dots.split(input)
-        trailing_dot = ''
+        trailing_dot = b''
        if labels:
            if not labels[-1]:
-                trailing_dot = '.'
+                trailing_dot = b'.'
                del labels[-1]
            elif not final:
                # Keep potentially unfinished label until the next call
                del labels[-1]
                if labels:
-                    trailing_dot = '.'
+                    trailing_dot = b'.'
-        result = []
+        result = b""
        size = 0
        for label in labels:
-            result.append(ToASCII(label))
            if size:
+                # Join with U+002E
+                result.extend(b'.')
                size += 1
+            result.extend(ToASCII(label))
            size += len(label)
-        # Join with U+002E
+        result += trailing_dot
-        result = ".".join(result) + trailing_dot
        size += len(trailing_dot)
        return (result, size)
@@ -239,8 +243,7 @@ class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
            labels = dots.split(input)
        else:
            # Must be ASCII string
-            input = str(input)
+            input = str(input, "ascii")
-            str(input, "ascii")
            labels = input.split(".")
        trailing_dot = ''

--- a/Lib/encodings/punycode.py
+++ b/Lib/encodings/punycode.py
@@ -181,6 +181,8 @@ def insertion_sort(base, extended, errors):
    return base
 def punycode_decode(text, errors):
+    if isinstance(text, str):
+        text = text.encode("ascii")
    pos = text.rfind(b"-")
    if pos == -1:
        base = ""
@@ -194,11 +196,11 @@ def punycode_decode(text, errors):
 class Codec(codecs.Codec):
-    def encode(self,input,errors='strict'):
+    def encode(self, input, errors='strict'):
        res = punycode_encode(input)
        return res, len(input)
-    def decode(self,input,errors='strict'):
+    def decode(self, input, errors='strict'):
        if errors not in ('strict', 'replace', 'ignore'):
            raise UnicodeError, "Unsupported error handling "+errors
        res = punycode_decode(input, errors)

--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py