Patch 1420 by Ron Adam.

This adds support for bytes literals (b'...') to tokenize.py, and removes support for unicode literals (u'...').

Patch 1420 by Ron Adam.
This adds support for bytes literals (b'...') to tokenize.py, and removes support for unicode literals (u'...').
4fe72f9b · Guido van Rossum · 1607278c · 4fe72f9b · 4fe72f9b · 4fe72f9b
Kaydet (Commit) 4fe72f9b authored Kas 12, 2007 tarafından Guido van Rossum
Showing with 58 additions and 49 deletions

test_tokenize Lib/test/output/test_tokenize +19 -19

test_tokenize.py Lib/test/test_tokenize.py +11 -2

tokenize_tests.txt Lib/test/tokenize_tests.txt +10 -10

tokenize.py Lib/tokenize.py +18 -18

No files found.
--- a/Lib/test/output/test_tokenize
+++ b/Lib/test/output/test_tokenize
@@ -342,59 +342,59 @@ test_tokenize
 112,3-112,4:	NEWLINE	'\n'
 113,0-113,1:	NAME	'x'
 113,2-113,3:	OP	'='
-113,4-113,10:	STRING	"u'abc'"
+113,4-113,10:	STRING	"b'abc'"
 113,11-113,12:	OP	'+'
-113,13-113,19:	STRING	"U'ABC'"
+113,13-113,19:	STRING	"B'ABC'"
 113,19-113,20:	NEWLINE	'\n'
 114,0-114,1:	NAME	'y'
 114,2-114,3:	OP	'='
-114,4-114,10:	STRING	'u"abc"'
+114,4-114,10:	STRING	'b"abc"'
 114,11-114,12:	OP	'+'
-114,13-114,19:	STRING	'U"ABC"'
+114,13-114,19:	STRING	'B"ABC"'
 114,19-114,20:	NEWLINE	'\n'
 115,0-115,1:	NAME	'x'
 115,2-115,3:	OP	'='
-115,4-115,11:	STRING	"ur'abc'"
+115,4-115,11:	STRING	"br'abc'"
 115,12-115,13:	OP	'+'
-115,14-115,21:	STRING	"Ur'ABC'"
+115,14-115,21:	STRING	"Br'ABC'"
 115,22-115,23:	OP	'+'
-115,24-115,31:	STRING	"uR'ABC'"
+115,24-115,31:	STRING	"bR'ABC'"
 115,32-115,33:	OP	'+'
-115,34-115,41:	STRING	"UR'ABC'"
+115,34-115,41:	STRING	"BR'ABC'"
 115,41-115,42:	NEWLINE	'\n'
 116,0-116,1:	NAME	'y'
 116,2-116,3:	OP	'='
-116,4-116,11:	STRING	'ur"abc"'
+116,4-116,11:	STRING	'br"abc"'
 116,12-116,13:	OP	'+'
-116,14-116,21:	STRING	'Ur"ABC"'
+116,14-116,21:	STRING	'Br"ABC"'
 116,22-116,23:	OP	'+'
-116,24-116,31:	STRING	'uR"ABC"'
+116,24-116,31:	STRING	'bR"ABC"'
 116,32-116,33:	OP	'+'
-116,34-116,41:	STRING	'UR"ABC"'
+116,34-116,41:	STRING	'BR"ABC"'
 116,41-116,42:	NEWLINE	'\n'
 117,0-117,1:	NAME	'x'
 117,2-117,3:	OP	'='
-117,4-117,10:	STRING	"ur'\\\\'"
+117,4-117,10:	STRING	"br'\\\\'"
 117,11-117,12:	OP	'+'
-117,13-117,19:	STRING	"UR'\\\\'"
+117,13-117,19:	STRING	"BR'\\\\'"
 117,19-117,20:	NEWLINE	'\n'
 118,0-118,1:	NAME	'x'
 118,2-118,3:	OP	'='
-118,4-118,10:	STRING	"ur'\\''"
+118,4-118,10:	STRING	"br'\\''"
 118,11-118,12:	OP	'+'
 118,13-118,15:	STRING	"''"
 118,15-118,16:	NEWLINE	'\n'
 119,0-119,1:	NAME	'y'
 119,2-119,3:	OP	'='
-119,4-121,6:	STRING	"ur'''\nfoo bar \\\\\nbaz'''"
+119,4-121,6:	STRING	"br'''\nfoo bar \\\\\nbaz'''"
 121,7-121,8:	OP	'+'
-121,9-122,6:	STRING	"UR'''\nfoo'''"
+121,9-122,6:	STRING	"BR'''\nfoo'''"
 122,6-122,7:	NEWLINE	'\n'
 123,0-123,1:	NAME	'y'
 123,2-123,3:	OP	'='
-123,4-125,3:	STRING	'Ur"""foo\nbar \\\\ baz\n"""'
+123,4-125,3:	STRING	'Br"""foo\nbar \\\\ baz\n"""'
 125,4-125,5:	OP	'+'
-125,6-126,3:	STRING	"uR'''spam\n'''"
+125,6-126,3:	STRING	"bR'''spam\n'''"
 126,3-126,4:	NEWLINE	'\n'
 127,0-127,1:	NL	'\n'
 128,0-128,13:	COMMENT	'# Indentation'

--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -183,6 +183,13 @@ def test_main():

    next_time = time.time() + _PRINT_WORKING_MSG_INTERVAL

+    # Validate the tokenize_tests.txt file.
+    # This makes sure it compiles, and displays any errors in it.
+    f = open(findfile('tokenize_tests.txt'))
+    sf = f.read()
+    f.close()
+    cf = compile(sf, 'tokenize_tests.txt', 'exec')
+
    # This displays the tokenization of tokenize_tests.py to stdout, and
    # regrtest.py checks that this equals the expected output (in the
    # test/output/ directory).
@@ -190,10 +197,12 @@ def test_main():
    tokenize(f.readline)
    f.close()

-    # Now run test_roundtrip() over tokenize_test.py too, and over all
+    # Now run test_roundtrip() over test_tokenize.py too, and over all
    # (if the "compiler" resource is enabled) or a small random sample (if
    # "compiler" is not enabled) of the test*.py files.
-    f = findfile('tokenize_tests.txt')
+    f = findfile('test_tokenize.py')
+    if verbose:
+        print('    round trip: ', f, file=sys.__stdout__)
    test_roundtrip(f)

    testdir = os.path.dirname(f) or os.curdir

--- a/Lib/test/tokenize_tests.txt
+++ b/Lib/test/tokenize_tests.txt
@@ -110,19 +110,19 @@ y = r"""foo
 bar \\ baz
 """ + R'''spam
 '''
-x = u'abc' + U'ABC'
-y = u"abc" + U"ABC"
-x = ur'abc' + Ur'ABC' + uR'ABC' + UR'ABC'
-y = ur"abc" + Ur"ABC" + uR"ABC" + UR"ABC"
-x = ur'\\' + UR'\\'
-x = ur'\'' + ''
-y = ur'''
+x = b'abc' + B'ABC'
+y = b"abc" + B"ABC"
+x = br'abc' + Br'ABC' + bR'ABC' + BR'ABC'
+y = br"abc" + Br"ABC" + bR"ABC" + BR"ABC"
+x = br'\\' + BR'\\'
+x = br'\'' + ''
+y = br'''
 foo bar \\
-baz''' + UR'''
+baz''' + BR'''
 foo'''
-y = Ur"""foo
+y = Br"""foo
 bar \\ baz
-""" + uR'''spam
+""" + bR'''spam
 '''

 # Indentation

--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -69,10 +69,10 @@ Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
 Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
 # Tail end of """ string.
 Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
-Triple = group("[uU]?[rR]?'''", '[uU]?[rR]?"""')
+Triple = group("[bB]?[rR]?'''", '[bB]?[rR]?"""')
 # Single-line ' or " string.
-String = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
-               r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
+String = group(r"[bB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
+               r'[bB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')

 # Because of leftmost-then-longest match semantics, be sure to put the
 # longest operators first (e.g., if = came before ==, == would get
@@ -90,9 +90,9 @@ PlainToken = group(Number, Funny, String, Name)
 Token = Ignore + PlainToken

 # First (or only) line of ' or " string.
-ContStr = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
+ContStr = group(r"[bB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
                group("'", r'\\\r?\n'),
-                r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
+                r'[bB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
                group('"', r'\\\r?\n'))
 PseudoExtras = group(r'\\\r?\n', Comment, Triple)
 PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
@@ -102,28 +102,28 @@ tokenprog, pseudoprog, single3prog, double3prog = map(
 endprogs = {"'": re.compile(Single), '"': re.compile(Double),
            "'''": single3prog, '"""': double3prog,
            "r'''": single3prog, 'r"""': double3prog,
-            "u'''": single3prog, 'u"""': double3prog,
-            "ur'''": single3prog, 'ur"""': double3prog,
+            "b'''": single3prog, 'b"""': double3prog,
+            "br'''": single3prog, 'br"""': double3prog,
            "R'''": single3prog, 'R"""': double3prog,
-            "U'''": single3prog, 'U"""': double3prog,
-            "uR'''": single3prog, 'uR"""': double3prog,
-            "Ur'''": single3prog, 'Ur"""': double3prog,
-            "UR'''": single3prog, 'UR"""': double3prog,
-            'r': None, 'R': None, 'u': None, 'U': None}
+            "B'''": single3prog, 'B"""': double3prog,
+            "bR'''": single3prog, 'bR"""': double3prog,
+            "Br'''": single3prog, 'Br"""': double3prog,
+            "BR'''": single3prog, 'BR"""': double3prog,
+            'r': None, 'R': None, 'b': None, 'B': None}

 triple_quoted = {}
 for t in ("'''", '"""',
          "r'''", 'r"""', "R'''", 'R"""',
-          "u'''", 'u"""', "U'''", 'U"""',
-          "ur'''", 'ur"""', "Ur'''", 'Ur"""',
-          "uR'''", 'uR"""', "UR'''", 'UR"""'):
+          "b'''", 'b"""', "B'''", 'B"""',
+          "br'''", 'br"""', "Br'''", 'Br"""',
+          "bR'''", 'bR"""', "BR'''", 'BR"""'):
    triple_quoted[t] = t
 single_quoted = {}
 for t in ("'", '"',
          "r'", 'r"', "R'", 'R"',
-          "u'", 'u"', "U'", 'U"',
-          "ur'", 'ur"', "Ur'", 'Ur"',
-          "uR'", 'uR"', "UR'", 'UR"' ):
+          "b'", 'b"', "B'", 'B"',
+          "br'", 'br"', "Br'", 'Br"',
+          "bR'", 'bR"', "BR'", 'BR"' ):
    single_quoted[t] = t

 tabsize = 8