test_codecencodings_cn.py 3.38 KB
Newer Older
1 2 3 4 5
#
# test_codecencodings_cn.py
#   Codec encoding tests for PRC encodings.
#

6
from test import support
7
from test import multibytecodec_support
8 9
import unittest

10
class Test_GB2312(multibytecodec_support.TestBase, unittest.TestCase):
11
    encoding = 'gb2312'
12
    tstring = multibytecodec_support.load_teststring('gb2312')
13 14
    codectests = (
        # invalid bytes
15 16
        (b"abc\x81\x81\xc1\xc4", "strict",  None),
        (b"abc\xc8", "strict",  None),
17 18
        (b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
        (b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
19 20
        (b"abc\x81\x81\xc1\xc4", "ignore",  "abc\u804a"),
        (b"\xc1\x64", "strict", None),
21 22
    )

23
class Test_GBK(multibytecodec_support.TestBase, unittest.TestCase):
24
    encoding = 'gbk'
25
    tstring = multibytecodec_support.load_teststring('gbk')
26 27
    codectests = (
        # invalid bytes
28 29
        (b"abc\x80\x80\xc1\xc4", "strict",  None),
        (b"abc\xc8", "strict",  None),
30 31
        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
32 33
        (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\u804a"),
        (b"\x83\x34\x83\x31", "strict", None),
34
        ("\u30fb", "strict", None),
35 36
    )

37
class Test_GB18030(multibytecodec_support.TestBase, unittest.TestCase):
38
    encoding = 'gb18030'
39
    tstring = multibytecodec_support.load_teststring('gb18030')
40 41
    codectests = (
        # invalid bytes
42 43
        (b"abc\x80\x80\xc1\xc4", "strict",  None),
        (b"abc\xc8", "strict",  None),
44 45
        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
46
        (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\u804a"),
47
        (b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd9\ufffd9\u804a"),
48
        ("\u30fb", "strict", b"\x819\xa79"),
49 50 51
        (b"abc\x84\x32\x80\x80def", "replace", 'abc\ufffd2\ufffd\ufffddef'),
        (b"abc\x81\x30\x81\x30def", "strict", 'abc\x80def'),
        (b"abc\x86\x30\x81\x30def", "replace", 'abc\ufffd0\ufffd0def'),
52 53 54
    )
    has_iso10646 = True

55
class Test_HZ(multibytecodec_support.TestBase, unittest.TestCase):
56
    encoding = 'hz'
57
    tstring = multibytecodec_support.load_teststring('hz')
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
    codectests = (
        # test '~\n' (3 lines)
        (b'This sentence is in ASCII.\n'
         b'The next sentence is in GB.~{<:Ky2;S{#,~}~\n'
         b'~{NpJ)l6HK!#~}Bye.\n',
         'strict',
         'This sentence is in ASCII.\n'
         'The next sentence is in GB.'
         '\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
         'Bye.\n'),
        # test '~\n' (4 lines)
        (b'This sentence is in ASCII.\n'
         b'The next sentence is in GB.~\n'
         b'~{<:Ky2;S{#,NpJ)l6HK!#~}~\n'
         b'Bye.\n',
         'strict',
         'This sentence is in ASCII.\n'
         'The next sentence is in GB.'
         '\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
         'Bye.\n'),
        # invalid bytes
79
        (b'ab~cd', 'replace', 'ab\uFFFDcd'),
80 81
        (b'ab\xffcd', 'replace', 'ab\uFFFDcd'),
        (b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'),
82 83
        (b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'),
        (b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"),
84 85
    )

86
def test_main():
87
    support.run_unittest(__name__)
88 89 90

if __name__ == "__main__":
    test_main()