Kaydet (Commit) 4fe72f9b authored tarafından Guido van Rossum's avatar Guido van Rossum

Patch 1420 by Ron Adam.

This adds support for bytes literals (b'...') to tokenize.py, and
removes support for unicode literals (u'...').
üst 1607278c
...@@ -342,59 +342,59 @@ test_tokenize ...@@ -342,59 +342,59 @@ test_tokenize
112,3-112,4: NEWLINE '\n' 112,3-112,4: NEWLINE '\n'
113,0-113,1: NAME 'x' 113,0-113,1: NAME 'x'
113,2-113,3: OP '=' 113,2-113,3: OP '='
113,4-113,10: STRING "u'abc'" 113,4-113,10: STRING "b'abc'"
113,11-113,12: OP '+' 113,11-113,12: OP '+'
113,13-113,19: STRING "U'ABC'" 113,13-113,19: STRING "B'ABC'"
113,19-113,20: NEWLINE '\n' 113,19-113,20: NEWLINE '\n'
114,0-114,1: NAME 'y' 114,0-114,1: NAME 'y'
114,2-114,3: OP '=' 114,2-114,3: OP '='
114,4-114,10: STRING 'u"abc"' 114,4-114,10: STRING 'b"abc"'
114,11-114,12: OP '+' 114,11-114,12: OP '+'
114,13-114,19: STRING 'U"ABC"' 114,13-114,19: STRING 'B"ABC"'
114,19-114,20: NEWLINE '\n' 114,19-114,20: NEWLINE '\n'
115,0-115,1: NAME 'x' 115,0-115,1: NAME 'x'
115,2-115,3: OP '=' 115,2-115,3: OP '='
115,4-115,11: STRING "ur'abc'" 115,4-115,11: STRING "br'abc'"
115,12-115,13: OP '+' 115,12-115,13: OP '+'
115,14-115,21: STRING "Ur'ABC'" 115,14-115,21: STRING "Br'ABC'"
115,22-115,23: OP '+' 115,22-115,23: OP '+'
115,24-115,31: STRING "uR'ABC'" 115,24-115,31: STRING "bR'ABC'"
115,32-115,33: OP '+' 115,32-115,33: OP '+'
115,34-115,41: STRING "UR'ABC'" 115,34-115,41: STRING "BR'ABC'"
115,41-115,42: NEWLINE '\n' 115,41-115,42: NEWLINE '\n'
116,0-116,1: NAME 'y' 116,0-116,1: NAME 'y'
116,2-116,3: OP '=' 116,2-116,3: OP '='
116,4-116,11: STRING 'ur"abc"' 116,4-116,11: STRING 'br"abc"'
116,12-116,13: OP '+' 116,12-116,13: OP '+'
116,14-116,21: STRING 'Ur"ABC"' 116,14-116,21: STRING 'Br"ABC"'
116,22-116,23: OP '+' 116,22-116,23: OP '+'
116,24-116,31: STRING 'uR"ABC"' 116,24-116,31: STRING 'bR"ABC"'
116,32-116,33: OP '+' 116,32-116,33: OP '+'
116,34-116,41: STRING 'UR"ABC"' 116,34-116,41: STRING 'BR"ABC"'
116,41-116,42: NEWLINE '\n' 116,41-116,42: NEWLINE '\n'
117,0-117,1: NAME 'x' 117,0-117,1: NAME 'x'
117,2-117,3: OP '=' 117,2-117,3: OP '='
117,4-117,10: STRING "ur'\\\\'" 117,4-117,10: STRING "br'\\\\'"
117,11-117,12: OP '+' 117,11-117,12: OP '+'
117,13-117,19: STRING "UR'\\\\'" 117,13-117,19: STRING "BR'\\\\'"
117,19-117,20: NEWLINE '\n' 117,19-117,20: NEWLINE '\n'
118,0-118,1: NAME 'x' 118,0-118,1: NAME 'x'
118,2-118,3: OP '=' 118,2-118,3: OP '='
118,4-118,10: STRING "ur'\\''" 118,4-118,10: STRING "br'\\''"
118,11-118,12: OP '+' 118,11-118,12: OP '+'
118,13-118,15: STRING "''" 118,13-118,15: STRING "''"
118,15-118,16: NEWLINE '\n' 118,15-118,16: NEWLINE '\n'
119,0-119,1: NAME 'y' 119,0-119,1: NAME 'y'
119,2-119,3: OP '=' 119,2-119,3: OP '='
119,4-121,6: STRING "ur'''\nfoo bar \\\\\nbaz'''" 119,4-121,6: STRING "br'''\nfoo bar \\\\\nbaz'''"
121,7-121,8: OP '+' 121,7-121,8: OP '+'
121,9-122,6: STRING "UR'''\nfoo'''" 121,9-122,6: STRING "BR'''\nfoo'''"
122,6-122,7: NEWLINE '\n' 122,6-122,7: NEWLINE '\n'
123,0-123,1: NAME 'y' 123,0-123,1: NAME 'y'
123,2-123,3: OP '=' 123,2-123,3: OP '='
123,4-125,3: STRING 'Ur"""foo\nbar \\\\ baz\n"""' 123,4-125,3: STRING 'Br"""foo\nbar \\\\ baz\n"""'
125,4-125,5: OP '+' 125,4-125,5: OP '+'
125,6-126,3: STRING "uR'''spam\n'''" 125,6-126,3: STRING "bR'''spam\n'''"
126,3-126,4: NEWLINE '\n' 126,3-126,4: NEWLINE '\n'
127,0-127,1: NL '\n' 127,0-127,1: NL '\n'
128,0-128,13: COMMENT '# Indentation' 128,0-128,13: COMMENT '# Indentation'
......
...@@ -183,6 +183,13 @@ def test_main(): ...@@ -183,6 +183,13 @@ def test_main():
next_time = time.time() + _PRINT_WORKING_MSG_INTERVAL next_time = time.time() + _PRINT_WORKING_MSG_INTERVAL
# Validate the tokenize_tests.txt file.
# This makes sure it compiles, and displays any errors in it.
f = open(findfile('tokenize_tests.txt'))
sf = f.read()
f.close()
cf = compile(sf, 'tokenize_tests.txt', 'exec')
# This displays the tokenization of tokenize_tests.py to stdout, and # This displays the tokenization of tokenize_tests.py to stdout, and
# regrtest.py checks that this equals the expected output (in the # regrtest.py checks that this equals the expected output (in the
# test/output/ directory). # test/output/ directory).
...@@ -190,10 +197,12 @@ def test_main(): ...@@ -190,10 +197,12 @@ def test_main():
tokenize(f.readline) tokenize(f.readline)
f.close() f.close()
# Now run test_roundtrip() over tokenize_test.py too, and over all # Now run test_roundtrip() over test_tokenize.py too, and over all
# (if the "compiler" resource is enabled) or a small random sample (if # (if the "compiler" resource is enabled) or a small random sample (if
# "compiler" is not enabled) of the test*.py files. # "compiler" is not enabled) of the test*.py files.
f = findfile('tokenize_tests.txt') f = findfile('test_tokenize.py')
if verbose:
print(' round trip: ', f, file=sys.__stdout__)
test_roundtrip(f) test_roundtrip(f)
testdir = os.path.dirname(f) or os.curdir testdir = os.path.dirname(f) or os.curdir
......
...@@ -110,19 +110,19 @@ y = r"""foo ...@@ -110,19 +110,19 @@ y = r"""foo
bar \\ baz bar \\ baz
""" + R'''spam """ + R'''spam
''' '''
x = u'abc' + U'ABC' x = b'abc' + B'ABC'
y = u"abc" + U"ABC" y = b"abc" + B"ABC"
x = ur'abc' + Ur'ABC' + uR'ABC' + UR'ABC' x = br'abc' + Br'ABC' + bR'ABC' + BR'ABC'
y = ur"abc" + Ur"ABC" + uR"ABC" + UR"ABC" y = br"abc" + Br"ABC" + bR"ABC" + BR"ABC"
x = ur'\\' + UR'\\' x = br'\\' + BR'\\'
x = ur'\'' + '' x = br'\'' + ''
y = ur''' y = br'''
foo bar \\ foo bar \\
baz''' + UR''' baz''' + BR'''
foo''' foo'''
y = Ur"""foo y = Br"""foo
bar \\ baz bar \\ baz
""" + uR'''spam """ + bR'''spam
''' '''
# Indentation # Indentation
......
...@@ -69,10 +69,10 @@ Double = r'[^"\\]*(?:\\.[^"\\]*)*"' ...@@ -69,10 +69,10 @@ Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''" Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
# Tail end of """ string. # Tail end of """ string.
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""' Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
Triple = group("[uU]?[rR]?'''", '[uU]?[rR]?"""') Triple = group("[bB]?[rR]?'''", '[bB]?[rR]?"""')
# Single-line ' or " string. # Single-line ' or " string.
String = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'", String = group(r"[bB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"') r'[bB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
# Because of leftmost-then-longest match semantics, be sure to put the # Because of leftmost-then-longest match semantics, be sure to put the
# longest operators first (e.g., if = came before ==, == would get # longest operators first (e.g., if = came before ==, == would get
...@@ -90,9 +90,9 @@ PlainToken = group(Number, Funny, String, Name) ...@@ -90,9 +90,9 @@ PlainToken = group(Number, Funny, String, Name)
Token = Ignore + PlainToken Token = Ignore + PlainToken
# First (or only) line of ' or " string. # First (or only) line of ' or " string.
ContStr = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" + ContStr = group(r"[bB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
group("'", r'\\\r?\n'), group("'", r'\\\r?\n'),
r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' + r'[bB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
group('"', r'\\\r?\n')) group('"', r'\\\r?\n'))
PseudoExtras = group(r'\\\r?\n', Comment, Triple) PseudoExtras = group(r'\\\r?\n', Comment, Triple)
PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name) PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
...@@ -102,28 +102,28 @@ tokenprog, pseudoprog, single3prog, double3prog = map( ...@@ -102,28 +102,28 @@ tokenprog, pseudoprog, single3prog, double3prog = map(
endprogs = {"'": re.compile(Single), '"': re.compile(Double), endprogs = {"'": re.compile(Single), '"': re.compile(Double),
"'''": single3prog, '"""': double3prog, "'''": single3prog, '"""': double3prog,
"r'''": single3prog, 'r"""': double3prog, "r'''": single3prog, 'r"""': double3prog,
"u'''": single3prog, 'u"""': double3prog, "b'''": single3prog, 'b"""': double3prog,
"ur'''": single3prog, 'ur"""': double3prog, "br'''": single3prog, 'br"""': double3prog,
"R'''": single3prog, 'R"""': double3prog, "R'''": single3prog, 'R"""': double3prog,
"U'''": single3prog, 'U"""': double3prog, "B'''": single3prog, 'B"""': double3prog,
"uR'''": single3prog, 'uR"""': double3prog, "bR'''": single3prog, 'bR"""': double3prog,
"Ur'''": single3prog, 'Ur"""': double3prog, "Br'''": single3prog, 'Br"""': double3prog,
"UR'''": single3prog, 'UR"""': double3prog, "BR'''": single3prog, 'BR"""': double3prog,
'r': None, 'R': None, 'u': None, 'U': None} 'r': None, 'R': None, 'b': None, 'B': None}
triple_quoted = {} triple_quoted = {}
for t in ("'''", '"""', for t in ("'''", '"""',
"r'''", 'r"""', "R'''", 'R"""', "r'''", 'r"""', "R'''", 'R"""',
"u'''", 'u"""', "U'''", 'U"""', "b'''", 'b"""', "B'''", 'B"""',
"ur'''", 'ur"""', "Ur'''", 'Ur"""', "br'''", 'br"""', "Br'''", 'Br"""',
"uR'''", 'uR"""', "UR'''", 'UR"""'): "bR'''", 'bR"""', "BR'''", 'BR"""'):
triple_quoted[t] = t triple_quoted[t] = t
single_quoted = {} single_quoted = {}
for t in ("'", '"', for t in ("'", '"',
"r'", 'r"', "R'", 'R"', "r'", 'r"', "R'", 'R"',
"u'", 'u"', "U'", 'U"', "b'", 'b"', "B'", 'B"',
"ur'", 'ur"', "Ur'", 'Ur"', "br'", 'br"', "Br'", 'Br"',
"uR'", 'uR"', "UR'", 'UR"' ): "bR'", 'bR"', "BR'", 'BR"' ):
single_quoted[t] = t single_quoted[t] = t
tabsize = 8 tabsize = 8
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment