Kaydet (Commit) 0ac30f82 authored tarafından Walter Dörwald's avatar Walter Dörwald

Enhance the punycode decoder so that it can decode

unicode objects.

Fix the idna codec and the tests.
üst 1f05a3b7
...@@ -7,7 +7,8 @@ from unicodedata import ucd_3_2_0 as unicodedata ...@@ -7,7 +7,8 @@ from unicodedata import ucd_3_2_0 as unicodedata
dots = re.compile("[\u002E\u3002\uFF0E\uFF61]") dots = re.compile("[\u002E\u3002\uFF0E\uFF61]")
# IDNA section 5 # IDNA section 5
ace_prefix = "xn--" ace_prefix = b"xn--"
sace_prefix = "xn--"
# This assumes query strings, so AllowUnassigned is true # This assumes query strings, so AllowUnassigned is true
def nameprep(label): def nameprep(label):
...@@ -87,7 +88,7 @@ def ToASCII(label): ...@@ -87,7 +88,7 @@ def ToASCII(label):
raise UnicodeError("label empty or too long") raise UnicodeError("label empty or too long")
# Step 5: Check ACE prefix # Step 5: Check ACE prefix
if label.startswith(ace_prefix): if label.startswith(sace_prefix):
raise UnicodeError("Label starts with ACE prefix") raise UnicodeError("Label starts with ACE prefix")
# Step 6: Encode with PUNYCODE # Step 6: Encode with PUNYCODE
...@@ -134,7 +135,7 @@ def ToUnicode(label): ...@@ -134,7 +135,7 @@ def ToUnicode(label):
# Step 7: Compare the result of step 6 with the one of step 3 # Step 7: Compare the result of step 6 with the one of step 3
# label2 will already be in lower case. # label2 will already be in lower case.
if label.lower() != label2: if str(label, "ascii").lower() != str(label2, "ascii"):
raise UnicodeError("IDNA does not round-trip", label, label2) raise UnicodeError("IDNA does not round-trip", label, label2)
# Step 8: return the result of step 5 # Step 8: return the result of step 5
...@@ -143,7 +144,7 @@ def ToUnicode(label): ...@@ -143,7 +144,7 @@ def ToUnicode(label):
### Codec APIs ### Codec APIs
class Codec(codecs.Codec): class Codec(codecs.Codec):
def encode(self,input,errors='strict'): def encode(self, input, errors='strict'):
if errors != 'strict': if errors != 'strict':
# IDNA is quite clear that implementations must be strict # IDNA is quite clear that implementations must be strict
...@@ -152,19 +153,21 @@ class Codec(codecs.Codec): ...@@ -152,19 +153,21 @@ class Codec(codecs.Codec):
if not input: if not input:
return b"", 0 return b"", 0
result = [] result = b""
labels = dots.split(input) labels = dots.split(input)
if labels and len(labels[-1])==0: if labels and not labels[-1]:
trailing_dot = b'.' trailing_dot = b'.'
del labels[-1] del labels[-1]
else: else:
trailing_dot = b'' trailing_dot = b''
for label in labels: for label in labels:
result.append(ToASCII(label)) if result:
# Join with U+002E # Join with U+002E
return b".".join(result)+trailing_dot, len(input) result.extend(b'.')
result.extend(ToASCII(label))
return result+trailing_dot, len(input)
def decode(self,input,errors='strict'): def decode(self, input, errors='strict'):
if errors != 'strict': if errors != 'strict':
raise UnicodeError("Unsupported error handling "+errors) raise UnicodeError("Unsupported error handling "+errors)
...@@ -199,30 +202,31 @@ class IncrementalEncoder(codecs.BufferedIncrementalEncoder): ...@@ -199,30 +202,31 @@ class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
raise UnicodeError("unsupported error handling "+errors) raise UnicodeError("unsupported error handling "+errors)
if not input: if not input:
return ("", 0) return (b'', 0)
labels = dots.split(input) labels = dots.split(input)
trailing_dot = '' trailing_dot = b''
if labels: if labels:
if not labels[-1]: if not labels[-1]:
trailing_dot = '.' trailing_dot = b'.'
del labels[-1] del labels[-1]
elif not final: elif not final:
# Keep potentially unfinished label until the next call # Keep potentially unfinished label until the next call
del labels[-1] del labels[-1]
if labels: if labels:
trailing_dot = '.' trailing_dot = b'.'
result = [] result = b""
size = 0 size = 0
for label in labels: for label in labels:
result.append(ToASCII(label))
if size: if size:
# Join with U+002E
result.extend(b'.')
size += 1 size += 1
result.extend(ToASCII(label))
size += len(label) size += len(label)
# Join with U+002E result += trailing_dot
result = ".".join(result) + trailing_dot
size += len(trailing_dot) size += len(trailing_dot)
return (result, size) return (result, size)
...@@ -239,8 +243,7 @@ class IncrementalDecoder(codecs.BufferedIncrementalDecoder): ...@@ -239,8 +243,7 @@ class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
labels = dots.split(input) labels = dots.split(input)
else: else:
# Must be ASCII string # Must be ASCII string
input = str(input) input = str(input, "ascii")
str(input, "ascii")
labels = input.split(".") labels = input.split(".")
trailing_dot = '' trailing_dot = ''
......
...@@ -181,6 +181,8 @@ def insertion_sort(base, extended, errors): ...@@ -181,6 +181,8 @@ def insertion_sort(base, extended, errors):
return base return base
def punycode_decode(text, errors): def punycode_decode(text, errors):
if isinstance(text, str):
text = text.encode("ascii")
pos = text.rfind(b"-") pos = text.rfind(b"-")
if pos == -1: if pos == -1:
base = "" base = ""
...@@ -194,11 +196,11 @@ def punycode_decode(text, errors): ...@@ -194,11 +196,11 @@ def punycode_decode(text, errors):
class Codec(codecs.Codec): class Codec(codecs.Codec):
def encode(self,input,errors='strict'): def encode(self, input, errors='strict'):
res = punycode_encode(input) res = punycode_encode(input)
return res, len(input) return res, len(input)
def decode(self,input,errors='strict'): def decode(self, input, errors='strict'):
if errors not in ('strict', 'replace', 'ignore'): if errors not in ('strict', 'replace', 'ignore'):
raise UnicodeError, "Unsupported error handling "+errors raise UnicodeError, "Unsupported error handling "+errors
res = punycode_decode(input, errors) res = punycode_decode(input, errors)
......
...@@ -624,6 +624,7 @@ class PunycodeTest(unittest.TestCase): ...@@ -624,6 +624,7 @@ class PunycodeTest(unittest.TestCase):
def test_decode(self): def test_decode(self):
for uni, puny in punycode_testcases: for uni, puny in punycode_testcases:
self.assertEquals(uni, puny.decode("punycode")) self.assertEquals(uni, puny.decode("punycode"))
self.assertEquals(uni, puny.decode("ascii").decode("punycode"))
class UnicodeInternalTest(unittest.TestCase): class UnicodeInternalTest(unittest.TestCase):
def test_bug1251300(self): def test_bug1251300(self):
...@@ -676,154 +677,154 @@ class UnicodeInternalTest(unittest.TestCase): ...@@ -676,154 +677,154 @@ class UnicodeInternalTest(unittest.TestCase):
# From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html # From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
nameprep_tests = [ nameprep_tests = [
# 3.1 Map to nothing. # 3.1 Map to nothing.
('foo\xc2\xad\xcd\x8f\xe1\xa0\x86\xe1\xa0\x8bbar' (b'foo\xc2\xad\xcd\x8f\xe1\xa0\x86\xe1\xa0\x8bbar'
'\xe2\x80\x8b\xe2\x81\xa0baz\xef\xb8\x80\xef\xb8\x88\xef' b'\xe2\x80\x8b\xe2\x81\xa0baz\xef\xb8\x80\xef\xb8\x88\xef'
'\xb8\x8f\xef\xbb\xbf', b'\xb8\x8f\xef\xbb\xbf',
'foobarbaz'), b'foobarbaz'),
# 3.2 Case folding ASCII U+0043 U+0041 U+0046 U+0045. # 3.2 Case folding ASCII U+0043 U+0041 U+0046 U+0045.
('CAFE', (b'CAFE',
'cafe'), b'cafe'),
# 3.3 Case folding 8bit U+00DF (german sharp s). # 3.3 Case folding 8bit U+00DF (german sharp s).
# The original test case is bogus; it says \xc3\xdf # The original test case is bogus; it says \xc3\xdf
('\xc3\x9f', (b'\xc3\x9f',
'ss'), b'ss'),
# 3.4 Case folding U+0130 (turkish capital I with dot). # 3.4 Case folding U+0130 (turkish capital I with dot).
('\xc4\xb0', (b'\xc4\xb0',
'i\xcc\x87'), b'i\xcc\x87'),
# 3.5 Case folding multibyte U+0143 U+037A. # 3.5 Case folding multibyte U+0143 U+037A.
('\xc5\x83\xcd\xba', (b'\xc5\x83\xcd\xba',
'\xc5\x84 \xce\xb9'), b'\xc5\x84 \xce\xb9'),
# 3.6 Case folding U+2121 U+33C6 U+1D7BB. # 3.6 Case folding U+2121 U+33C6 U+1D7BB.
# XXX: skip this as it fails in UCS-2 mode # XXX: skip this as it fails in UCS-2 mode
#('\xe2\x84\xa1\xe3\x8f\x86\xf0\x9d\x9e\xbb', #('\xe2\x84\xa1\xe3\x8f\x86\xf0\x9d\x9e\xbb',
# 'telc\xe2\x88\x95kg\xcf\x83'), # 'telc\xe2\x88\x95kg\xcf\x83'),
(None, None), (None, None),
# 3.7 Normalization of U+006a U+030c U+00A0 U+00AA. # 3.7 Normalization of U+006a U+030c U+00A0 U+00AA.
('j\xcc\x8c\xc2\xa0\xc2\xaa', (b'j\xcc\x8c\xc2\xa0\xc2\xaa',
'\xc7\xb0 a'), b'\xc7\xb0 a'),
# 3.8 Case folding U+1FB7 and normalization. # 3.8 Case folding U+1FB7 and normalization.
('\xe1\xbe\xb7', (b'\xe1\xbe\xb7',
'\xe1\xbe\xb6\xce\xb9'), b'\xe1\xbe\xb6\xce\xb9'),
# 3.9 Self-reverting case folding U+01F0 and normalization. # 3.9 Self-reverting case folding U+01F0 and normalization.
# The original test case is bogus, it says `\xc7\xf0' # The original test case is bogus, it says `\xc7\xf0'
('\xc7\xb0', (b'\xc7\xb0',
'\xc7\xb0'), b'\xc7\xb0'),
# 3.10 Self-reverting case folding U+0390 and normalization. # 3.10 Self-reverting case folding U+0390 and normalization.
('\xce\x90', (b'\xce\x90',
'\xce\x90'), b'\xce\x90'),
# 3.11 Self-reverting case folding U+03B0 and normalization. # 3.11 Self-reverting case folding U+03B0 and normalization.
('\xce\xb0', (b'\xce\xb0',
'\xce\xb0'), b'\xce\xb0'),
# 3.12 Self-reverting case folding U+1E96 and normalization. # 3.12 Self-reverting case folding U+1E96 and normalization.
('\xe1\xba\x96', (b'\xe1\xba\x96',
'\xe1\xba\x96'), b'\xe1\xba\x96'),
# 3.13 Self-reverting case folding U+1F56 and normalization. # 3.13 Self-reverting case folding U+1F56 and normalization.
('\xe1\xbd\x96', (b'\xe1\xbd\x96',
'\xe1\xbd\x96'), b'\xe1\xbd\x96'),
# 3.14 ASCII space character U+0020. # 3.14 ASCII space character U+0020.
(' ', (b' ',
' '), b' '),
# 3.15 Non-ASCII 8bit space character U+00A0. # 3.15 Non-ASCII 8bit space character U+00A0.
('\xc2\xa0', (b'\xc2\xa0',
' '), b' '),
# 3.16 Non-ASCII multibyte space character U+1680. # 3.16 Non-ASCII multibyte space character U+1680.
('\xe1\x9a\x80', (b'\xe1\x9a\x80',
None), None),
# 3.17 Non-ASCII multibyte space character U+2000. # 3.17 Non-ASCII multibyte space character U+2000.
('\xe2\x80\x80', (b'\xe2\x80\x80',
' '), b' '),
# 3.18 Zero Width Space U+200b. # 3.18 Zero Width Space U+200b.
('\xe2\x80\x8b', (b'\xe2\x80\x8b',
''), b''),
# 3.19 Non-ASCII multibyte space character U+3000. # 3.19 Non-ASCII multibyte space character U+3000.
('\xe3\x80\x80', (b'\xe3\x80\x80',
' '), b' '),
# 3.20 ASCII control characters U+0010 U+007F. # 3.20 ASCII control characters U+0010 U+007F.
('\x10\x7f', (b'\x10\x7f',
'\x10\x7f'), b'\x10\x7f'),
# 3.21 Non-ASCII 8bit control character U+0085. # 3.21 Non-ASCII 8bit control character U+0085.
('\xc2\x85', (b'\xc2\x85',
None), None),
# 3.22 Non-ASCII multibyte control character U+180E. # 3.22 Non-ASCII multibyte control character U+180E.
('\xe1\xa0\x8e', (b'\xe1\xa0\x8e',
None), None),
# 3.23 Zero Width No-Break Space U+FEFF. # 3.23 Zero Width No-Break Space U+FEFF.
('\xef\xbb\xbf', (b'\xef\xbb\xbf',
''), b''),
# 3.24 Non-ASCII control character U+1D175. # 3.24 Non-ASCII control character U+1D175.
('\xf0\x9d\x85\xb5', (b'\xf0\x9d\x85\xb5',
None), None),
# 3.25 Plane 0 private use character U+F123. # 3.25 Plane 0 private use character U+F123.
('\xef\x84\xa3', (b'\xef\x84\xa3',
None), None),
# 3.26 Plane 15 private use character U+F1234. # 3.26 Plane 15 private use character U+F1234.
('\xf3\xb1\x88\xb4', (b'\xf3\xb1\x88\xb4',
None), None),
# 3.27 Plane 16 private use character U+10F234. # 3.27 Plane 16 private use character U+10F234.
('\xf4\x8f\x88\xb4', (b'\xf4\x8f\x88\xb4',
None), None),
# 3.28 Non-character code point U+8FFFE. # 3.28 Non-character code point U+8FFFE.
('\xf2\x8f\xbf\xbe', (b'\xf2\x8f\xbf\xbe',
None), None),
# 3.29 Non-character code point U+10FFFF. # 3.29 Non-character code point U+10FFFF.
('\xf4\x8f\xbf\xbf', (b'\xf4\x8f\xbf\xbf',
None), None),
# 3.30 Surrogate code U+DF42. # 3.30 Surrogate code U+DF42.
('\xed\xbd\x82', (b'\xed\xbd\x82',
None), None),
# 3.31 Non-plain text character U+FFFD. # 3.31 Non-plain text character U+FFFD.
('\xef\xbf\xbd', (b'\xef\xbf\xbd',
None), None),
# 3.32 Ideographic description character U+2FF5. # 3.32 Ideographic description character U+2FF5.
('\xe2\xbf\xb5', (b'\xe2\xbf\xb5',
None), None),
# 3.33 Display property character U+0341. # 3.33 Display property character U+0341.
('\xcd\x81', (b'\xcd\x81',
'\xcc\x81'), b'\xcc\x81'),
# 3.34 Left-to-right mark U+200E. # 3.34 Left-to-right mark U+200E.
('\xe2\x80\x8e', (b'\xe2\x80\x8e',
None), None),
# 3.35 Deprecated U+202A. # 3.35 Deprecated U+202A.
('\xe2\x80\xaa', (b'\xe2\x80\xaa',
None), None),
# 3.36 Language tagging character U+E0001. # 3.36 Language tagging character U+E0001.
('\xf3\xa0\x80\x81', (b'\xf3\xa0\x80\x81',
None), None),
# 3.37 Language tagging character U+E0042. # 3.37 Language tagging character U+E0042.
('\xf3\xa0\x81\x82', (b'\xf3\xa0\x81\x82',
None), None),
# 3.38 Bidi: RandALCat character U+05BE and LCat characters. # 3.38 Bidi: RandALCat character U+05BE and LCat characters.
('foo\xd6\xbebar', (b'foo\xd6\xbebar',
None), None),
# 3.39 Bidi: RandALCat character U+FD50 and LCat characters. # 3.39 Bidi: RandALCat character U+FD50 and LCat characters.
('foo\xef\xb5\x90bar', (b'foo\xef\xb5\x90bar',
None), None),
# 3.40 Bidi: RandALCat character U+FB38 and LCat characters. # 3.40 Bidi: RandALCat character U+FB38 and LCat characters.
('foo\xef\xb9\xb6bar', (b'foo\xef\xb9\xb6bar',
'foo \xd9\x8ebar'), b'foo \xd9\x8ebar'),
# 3.41 Bidi: RandALCat without trailing RandALCat U+0627 U+0031. # 3.41 Bidi: RandALCat without trailing RandALCat U+0627 U+0031.
('\xd8\xa71', (b'\xd8\xa71',
None), None),
# 3.42 Bidi: RandALCat character U+0627 U+0031 U+0628. # 3.42 Bidi: RandALCat character U+0627 U+0031 U+0628.
('\xd8\xa71\xd8\xa8', (b'\xd8\xa71\xd8\xa8',
'\xd8\xa71\xd8\xa8'), b'\xd8\xa71\xd8\xa8'),
# 3.43 Unassigned code point U+E0002. # 3.43 Unassigned code point U+E0002.
# Skip this test as we allow unassigned # Skip this test as we allow unassigned
#('\xf3\xa0\x80\x82', #(b'\xf3\xa0\x80\x82',
# None), # None),
(None, None), (None, None),
# 3.44 Larger test (shrinking). # 3.44 Larger test (shrinking).
# Original test case reads \xc3\xdf # Original test case reads \xc3\xdf
('X\xc2\xad\xc3\x9f\xc4\xb0\xe2\x84\xa1j\xcc\x8c\xc2\xa0\xc2' (b'X\xc2\xad\xc3\x9f\xc4\xb0\xe2\x84\xa1j\xcc\x8c\xc2\xa0\xc2'
'\xaa\xce\xb0\xe2\x80\x80', b'\xaa\xce\xb0\xe2\x80\x80',
'xssi\xcc\x87tel\xc7\xb0 a\xce\xb0 '), b'xssi\xcc\x87tel\xc7\xb0 a\xce\xb0 '),
# 3.45 Larger test (expanding). # 3.45 Larger test (expanding).
# Original test case reads \xc3\x9f # Original test case reads \xc3\x9f
('X\xc3\x9f\xe3\x8c\x96\xc4\xb0\xe2\x84\xa1\xe2\x92\x9f\xe3\x8c' (b'X\xc3\x9f\xe3\x8c\x96\xc4\xb0\xe2\x84\xa1\xe2\x92\x9f\xe3\x8c'
'\x80', b'\x80',
'xss\xe3\x82\xad\xe3\x83\xad\xe3\x83\xa1\xe3\x83\xbc\xe3' b'xss\xe3\x82\xad\xe3\x83\xad\xe3\x83\xa1\xe3\x83\xbc\xe3'
'\x83\x88\xe3\x83\xabi\xcc\x87tel\x28d\x29\xe3\x82' b'\x83\x88\xe3\x83\xabi\xcc\x87tel\x28d\x29\xe3\x82'
'\xa2\xe3\x83\x91\xe3\x83\xbc\xe3\x83\x88') b'\xa2\xe3\x83\x91\xe3\x83\xbc\xe3\x83\x88')
] ]
...@@ -848,16 +849,16 @@ class NameprepTest(unittest.TestCase): ...@@ -848,16 +849,16 @@ class NameprepTest(unittest.TestCase):
class IDNACodecTest(unittest.TestCase): class IDNACodecTest(unittest.TestCase):
def test_builtin_decode(self): def test_builtin_decode(self):
self.assertEquals(str("python.org", "idna"), "python.org") self.assertEquals(str(b"python.org", "idna"), "python.org")
self.assertEquals(str("python.org.", "idna"), "python.org.") self.assertEquals(str(b"python.org.", "idna"), "python.org.")
self.assertEquals(str("xn--pythn-mua.org", "idna"), "pyth\xf6n.org") self.assertEquals(str(b"xn--pythn-mua.org", "idna"), "pyth\xf6n.org")
self.assertEquals(str("xn--pythn-mua.org.", "idna"), "pyth\xf6n.org.") self.assertEquals(str(b"xn--pythn-mua.org.", "idna"), "pyth\xf6n.org.")
def test_builtin_encode(self): def test_builtin_encode(self):
self.assertEquals("python.org".encode("idna"), "python.org") self.assertEquals("python.org".encode("idna"), b"python.org")
self.assertEquals("python.org.".encode("idna"), "python.org.") self.assertEquals("python.org.".encode("idna"), b"python.org.")
self.assertEquals("pyth\xf6n.org".encode("idna"), "xn--pythn-mua.org") self.assertEquals("pyth\xf6n.org".encode("idna"), b"xn--pythn-mua.org")
self.assertEquals("pyth\xf6n.org.".encode("idna"), "xn--pythn-mua.org.") self.assertEquals("pyth\xf6n.org.".encode("idna"), b"xn--pythn-mua.org.")
def test_stream(self): def test_stream(self):
r = codecs.getreader("idna")(io.BytesIO(b"abc")) r = codecs.getreader("idna")(io.BytesIO(b"abc"))
...@@ -866,61 +867,61 @@ class IDNACodecTest(unittest.TestCase): ...@@ -866,61 +867,61 @@ class IDNACodecTest(unittest.TestCase):
def test_incremental_decode(self): def test_incremental_decode(self):
self.assertEquals( self.assertEquals(
"".join(codecs.iterdecode("python.org", "idna")), "".join(codecs.iterdecode((bytes(chr(c)) for c in b"python.org"), "idna")),
"python.org" "python.org"
) )
self.assertEquals( self.assertEquals(
"".join(codecs.iterdecode("python.org.", "idna")), "".join(codecs.iterdecode((bytes(chr(c)) for c in b"python.org."), "idna")),
"python.org." "python.org."
) )
self.assertEquals( self.assertEquals(
"".join(codecs.iterdecode("xn--pythn-mua.org.", "idna")), "".join(codecs.iterdecode((bytes(chr(c)) for c in b"xn--pythn-mua.org."), "idna")),
"pyth\xf6n.org." "pyth\xf6n.org."
) )
self.assertEquals( self.assertEquals(
"".join(codecs.iterdecode("xn--pythn-mua.org.", "idna")), "".join(codecs.iterdecode((bytes(chr(c)) for c in b"xn--pythn-mua.org."), "idna")),
"pyth\xf6n.org." "pyth\xf6n.org."
) )
decoder = codecs.getincrementaldecoder("idna")() decoder = codecs.getincrementaldecoder("idna")()
self.assertEquals(decoder.decode("xn--xam", ), "") self.assertEquals(decoder.decode(b"xn--xam", ), "")
self.assertEquals(decoder.decode("ple-9ta.o", ), "\xe4xample.") self.assertEquals(decoder.decode(b"ple-9ta.o", ), "\xe4xample.")
self.assertEquals(decoder.decode("rg"), "") self.assertEquals(decoder.decode(b"rg"), "")
self.assertEquals(decoder.decode("", True), "org") self.assertEquals(decoder.decode(b"", True), "org")
decoder.reset() decoder.reset()
self.assertEquals(decoder.decode("xn--xam", ), "") self.assertEquals(decoder.decode(b"xn--xam", ), "")
self.assertEquals(decoder.decode("ple-9ta.o", ), "\xe4xample.") self.assertEquals(decoder.decode(b"ple-9ta.o", ), "\xe4xample.")
self.assertEquals(decoder.decode("rg."), "org.") self.assertEquals(decoder.decode(b"rg."), "org.")
self.assertEquals(decoder.decode("", True), "") self.assertEquals(decoder.decode(b"", True), "")
def test_incremental_encode(self): def test_incremental_encode(self):
self.assertEquals( self.assertEquals(
"".join(codecs.iterencode("python.org", "idna")), b"".join(codecs.iterencode("python.org", "idna")),
"python.org" b"python.org"
) )
self.assertEquals( self.assertEquals(
"".join(codecs.iterencode("python.org.", "idna")), b"".join(codecs.iterencode("python.org.", "idna")),
"python.org." b"python.org."
) )
self.assertEquals( self.assertEquals(
"".join(codecs.iterencode("pyth\xf6n.org.", "idna")), b"".join(codecs.iterencode("pyth\xf6n.org.", "idna")),
"xn--pythn-mua.org." b"xn--pythn-mua.org."
) )
self.assertEquals( self.assertEquals(
"".join(codecs.iterencode("pyth\xf6n.org.", "idna")), b"".join(codecs.iterencode("pyth\xf6n.org.", "idna")),
"xn--pythn-mua.org." b"xn--pythn-mua.org."
) )
encoder = codecs.getincrementalencoder("idna")() encoder = codecs.getincrementalencoder("idna")()
self.assertEquals(encoder.encode("\xe4x"), "") self.assertEquals(encoder.encode("\xe4x"), b"")
self.assertEquals(encoder.encode("ample.org"), "xn--xample-9ta.") self.assertEquals(encoder.encode("ample.org"), b"xn--xample-9ta.")
self.assertEquals(encoder.encode("", True), "org") self.assertEquals(encoder.encode("", True), b"org")
encoder.reset() encoder.reset()
self.assertEquals(encoder.encode("\xe4x"), "") self.assertEquals(encoder.encode("\xe4x"), b"")
self.assertEquals(encoder.encode("ample.org."), "xn--xample-9ta.org.") self.assertEquals(encoder.encode("ample.org."), b"xn--xample-9ta.org.")
self.assertEquals(encoder.encode("", True), "") self.assertEquals(encoder.encode("", True), b"")
class CodecsModuleTest(unittest.TestCase): class CodecsModuleTest(unittest.TestCase):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment