Kaydet (Commit) 4fe72f9b authored tarafından Guido van Rossum's avatar Guido van Rossum

Patch 1420 by Ron Adam.

This adds support for bytes literals (b'...') to tokenize.py, and
removes support for unicode literals (u'...').
üst 1607278c
......@@ -342,59 +342,59 @@ test_tokenize
112,3-112,4: NEWLINE '\n'
113,0-113,1: NAME 'x'
113,2-113,3: OP '='
113,4-113,10: STRING "u'abc'"
113,4-113,10: STRING "b'abc'"
113,11-113,12: OP '+'
113,13-113,19: STRING "U'ABC'"
113,13-113,19: STRING "B'ABC'"
113,19-113,20: NEWLINE '\n'
114,0-114,1: NAME 'y'
114,2-114,3: OP '='
114,4-114,10: STRING 'u"abc"'
114,4-114,10: STRING 'b"abc"'
114,11-114,12: OP '+'
114,13-114,19: STRING 'U"ABC"'
114,13-114,19: STRING 'B"ABC"'
114,19-114,20: NEWLINE '\n'
115,0-115,1: NAME 'x'
115,2-115,3: OP '='
115,4-115,11: STRING "ur'abc'"
115,4-115,11: STRING "br'abc'"
115,12-115,13: OP '+'
115,14-115,21: STRING "Ur'ABC'"
115,14-115,21: STRING "Br'ABC'"
115,22-115,23: OP '+'
115,24-115,31: STRING "uR'ABC'"
115,24-115,31: STRING "bR'ABC'"
115,32-115,33: OP '+'
115,34-115,41: STRING "UR'ABC'"
115,34-115,41: STRING "BR'ABC'"
115,41-115,42: NEWLINE '\n'
116,0-116,1: NAME 'y'
116,2-116,3: OP '='
116,4-116,11: STRING 'ur"abc"'
116,4-116,11: STRING 'br"abc"'
116,12-116,13: OP '+'
116,14-116,21: STRING 'Ur"ABC"'
116,14-116,21: STRING 'Br"ABC"'
116,22-116,23: OP '+'
116,24-116,31: STRING 'uR"ABC"'
116,24-116,31: STRING 'bR"ABC"'
116,32-116,33: OP '+'
116,34-116,41: STRING 'UR"ABC"'
116,34-116,41: STRING 'BR"ABC"'
116,41-116,42: NEWLINE '\n'
117,0-117,1: NAME 'x'
117,2-117,3: OP '='
117,4-117,10: STRING "ur'\\\\'"
117,4-117,10: STRING "br'\\\\'"
117,11-117,12: OP '+'
117,13-117,19: STRING "UR'\\\\'"
117,13-117,19: STRING "BR'\\\\'"
117,19-117,20: NEWLINE '\n'
118,0-118,1: NAME 'x'
118,2-118,3: OP '='
118,4-118,10: STRING "ur'\\''"
118,4-118,10: STRING "br'\\''"
118,11-118,12: OP '+'
118,13-118,15: STRING "''"
118,15-118,16: NEWLINE '\n'
119,0-119,1: NAME 'y'
119,2-119,3: OP '='
119,4-121,6: STRING "ur'''\nfoo bar \\\\\nbaz'''"
119,4-121,6: STRING "br'''\nfoo bar \\\\\nbaz'''"
121,7-121,8: OP '+'
121,9-122,6: STRING "UR'''\nfoo'''"
121,9-122,6: STRING "BR'''\nfoo'''"
122,6-122,7: NEWLINE '\n'
123,0-123,1: NAME 'y'
123,2-123,3: OP '='
123,4-125,3: STRING 'Ur"""foo\nbar \\\\ baz\n"""'
123,4-125,3: STRING 'Br"""foo\nbar \\\\ baz\n"""'
125,4-125,5: OP '+'
125,6-126,3: STRING "uR'''spam\n'''"
125,6-126,3: STRING "bR'''spam\n'''"
126,3-126,4: NEWLINE '\n'
127,0-127,1: NL '\n'
128,0-128,13: COMMENT '# Indentation'
......
......@@ -183,6 +183,13 @@ def test_main():
next_time = time.time() + _PRINT_WORKING_MSG_INTERVAL
# Validate the tokenize_tests.txt file.
# This makes sure it compiles, and displays any errors in it.
f = open(findfile('tokenize_tests.txt'))
sf = f.read()
f.close()
cf = compile(sf, 'tokenize_tests.txt', 'exec')
# This displays the tokenization of tokenize_tests.py to stdout, and
# regrtest.py checks that this equals the expected output (in the
# test/output/ directory).
......@@ -190,10 +197,12 @@ def test_main():
tokenize(f.readline)
f.close()
# Now run test_roundtrip() over tokenize_test.py too, and over all
# Now run test_roundtrip() over test_tokenize.py too, and over all
# (if the "compiler" resource is enabled) or a small random sample (if
# "compiler" is not enabled) of the test*.py files.
f = findfile('tokenize_tests.txt')
f = findfile('test_tokenize.py')
if verbose:
print(' round trip: ', f, file=sys.__stdout__)
test_roundtrip(f)
testdir = os.path.dirname(f) or os.curdir
......
......@@ -110,19 +110,19 @@ y = r"""foo
bar \\ baz
""" + R'''spam
'''
x = u'abc' + U'ABC'
y = u"abc" + U"ABC"
x = ur'abc' + Ur'ABC' + uR'ABC' + UR'ABC'
y = ur"abc" + Ur"ABC" + uR"ABC" + UR"ABC"
x = ur'\\' + UR'\\'
x = ur'\'' + ''
y = ur'''
x = b'abc' + B'ABC'
y = b"abc" + B"ABC"
x = br'abc' + Br'ABC' + bR'ABC' + BR'ABC'
y = br"abc" + Br"ABC" + bR"ABC" + BR"ABC"
x = br'\\' + BR'\\'
x = br'\'' + ''
y = br'''
foo bar \\
baz''' + UR'''
baz''' + BR'''
foo'''
y = Ur"""foo
y = Br"""foo
bar \\ baz
""" + uR'''spam
""" + bR'''spam
'''
# Indentation
......
......@@ -69,10 +69,10 @@ Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
# Tail end of """ string.
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
Triple = group("[uU]?[rR]?'''", '[uU]?[rR]?"""')
Triple = group("[bB]?[rR]?'''", '[bB]?[rR]?"""')
# Single-line ' or " string.
String = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
String = group(r"[bB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
r'[bB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
# Because of leftmost-then-longest match semantics, be sure to put the
# longest operators first (e.g., if = came before ==, == would get
......@@ -90,9 +90,9 @@ PlainToken = group(Number, Funny, String, Name)
Token = Ignore + PlainToken
# First (or only) line of ' or " string.
ContStr = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
ContStr = group(r"[bB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
group("'", r'\\\r?\n'),
r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
r'[bB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
group('"', r'\\\r?\n'))
PseudoExtras = group(r'\\\r?\n', Comment, Triple)
PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
......@@ -102,28 +102,28 @@ tokenprog, pseudoprog, single3prog, double3prog = map(
endprogs = {"'": re.compile(Single), '"': re.compile(Double),
"'''": single3prog, '"""': double3prog,
"r'''": single3prog, 'r"""': double3prog,
"u'''": single3prog, 'u"""': double3prog,
"ur'''": single3prog, 'ur"""': double3prog,
"b'''": single3prog, 'b"""': double3prog,
"br'''": single3prog, 'br"""': double3prog,
"R'''": single3prog, 'R"""': double3prog,
"U'''": single3prog, 'U"""': double3prog,
"uR'''": single3prog, 'uR"""': double3prog,
"Ur'''": single3prog, 'Ur"""': double3prog,
"UR'''": single3prog, 'UR"""': double3prog,
'r': None, 'R': None, 'u': None, 'U': None}
"B'''": single3prog, 'B"""': double3prog,
"bR'''": single3prog, 'bR"""': double3prog,
"Br'''": single3prog, 'Br"""': double3prog,
"BR'''": single3prog, 'BR"""': double3prog,
'r': None, 'R': None, 'b': None, 'B': None}
triple_quoted = {}
for t in ("'''", '"""',
"r'''", 'r"""', "R'''", 'R"""',
"u'''", 'u"""', "U'''", 'U"""',
"ur'''", 'ur"""', "Ur'''", 'Ur"""',
"uR'''", 'uR"""', "UR'''", 'UR"""'):
"b'''", 'b"""', "B'''", 'B"""',
"br'''", 'br"""', "Br'''", 'Br"""',
"bR'''", 'bR"""', "BR'''", 'BR"""'):
triple_quoted[t] = t
single_quoted = {}
for t in ("'", '"',
"r'", 'r"', "R'", 'R"',
"u'", 'u"', "U'", 'U"',
"ur'", 'ur"', "Ur'", 'Ur"',
"uR'", 'uR"', "UR'", 'UR"' ):
"b'", 'b"', "B'", 'B"',
"br'", 'br"', "Br'", 'Br"',
"bR'", 'bR"', "BR'", 'BR"' ):
single_quoted[t] = t
tabsize = 8
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment