Kaydet (Commit) 66428b2e authored tarafından Benjamin Peterson's avatar Benjamin Peterson

Merged revisions 84364 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/branches/py3k

........
  r84364 | benjamin.peterson | 2010-08-30 09:41:20 -0500 (Mon, 30 Aug 2010) | 1 line

  handle names starting with non-ascii characters correctly #9712
........
üst f8a08d9d
...@@ -533,6 +533,7 @@ pass the '-ucompiler' option to process the full directory. ...@@ -533,6 +533,7 @@ pass the '-ucompiler' option to process the full directory.
True True
Evil tabs Evil tabs
>>> dump_tokens("def f():\\n\\tif x\\n \\tpass") >>> dump_tokens("def f():\\n\\tif x\\n \\tpass")
ENCODING 'utf-8' (0, 0) (0, 0) ENCODING 'utf-8' (0, 0) (0, 0)
NAME 'def' (1, 0) (1, 3) NAME 'def' (1, 0) (1, 3)
...@@ -549,6 +550,18 @@ Evil tabs ...@@ -549,6 +550,18 @@ Evil tabs
NAME 'pass' (3, 9) (3, 13) NAME 'pass' (3, 9) (3, 13)
DEDENT '' (4, 0) (4, 0) DEDENT '' (4, 0) (4, 0)
DEDENT '' (4, 0) (4, 0) DEDENT '' (4, 0) (4, 0)
Non-ascii identifiers
>>> dump_tokens("Örter = 'places'\\ngrün = 'green'")
ENCODING 'utf-8' (0, 0) (0, 0)
NAME 'Örter' (1, 0) (1, 5)
OP '=' (1, 6) (1, 7)
STRING "'places'" (1, 8) (1, 16)
NEWLINE '\\n' (1, 16) (1, 17)
NAME 'grün' (2, 0) (2, 4)
OP '=' (2, 5) (2, 6)
STRING "'green'" (2, 7) (2, 14)
""" """
from test import support from test import support
......
...@@ -92,7 +92,7 @@ def maybe(*choices): return group(*choices) + '?' ...@@ -92,7 +92,7 @@ def maybe(*choices): return group(*choices) + '?'
Whitespace = r'[ \f\t]*' Whitespace = r'[ \f\t]*'
Comment = r'#[^\r\n]*' Comment = r'#[^\r\n]*'
Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment) Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
Name = r'[a-zA-Z_]\w*' Name = r'\w+'
Hexnumber = r'0[xX][0-9a-fA-F]+' Hexnumber = r'0[xX][0-9a-fA-F]+'
Binnumber = r'0[bB][01]+' Binnumber = r'0[bB][01]+'
...@@ -142,9 +142,12 @@ ContStr = group(r"[bB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" + ...@@ -142,9 +142,12 @@ ContStr = group(r"[bB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
PseudoExtras = group(r'\\\r?\n', Comment, Triple) PseudoExtras = group(r'\\\r?\n', Comment, Triple)
PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name) PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
def _compile(expr):
return re.compile(expr, re.UNICODE)
tokenprog, pseudoprog, single3prog, double3prog = map( tokenprog, pseudoprog, single3prog, double3prog = map(
re.compile, (Token, PseudoToken, Single3, Double3)) _compile, (Token, PseudoToken, Single3, Double3))
endprogs = {"'": re.compile(Single), '"': re.compile(Double), endprogs = {"'": _compile(Single), '"': _compile(Double),
"'''": single3prog, '"""': double3prog, "'''": single3prog, '"""': double3prog,
"r'''": single3prog, 'r"""': double3prog, "r'''": single3prog, 'r"""': double3prog,
"b'''": single3prog, 'b"""': double3prog, "b'''": single3prog, 'b"""': double3prog,
...@@ -171,6 +174,8 @@ for t in ("'", '"', ...@@ -171,6 +174,8 @@ for t in ("'", '"',
"bR'", 'bR"', "BR'", 'BR"' ): "bR'", 'bR"', "BR'", 'BR"' ):
single_quoted[t] = t single_quoted[t] = t
del _compile
tabsize = 8 tabsize = 8
class TokenError(Exception): pass class TokenError(Exception): pass
...@@ -392,7 +397,7 @@ def tokenize(readline): ...@@ -392,7 +397,7 @@ def tokenize(readline):
def _tokenize(readline, encoding): def _tokenize(readline, encoding):
lnum = parenlev = continued = 0 lnum = parenlev = continued = 0
namechars, numchars = string.ascii_letters + '_', '0123456789' numchars = '0123456789'
contstr, needcont = '', 0 contstr, needcont = '', 0
contline = None contline = None
indents = [0] indents = [0]
...@@ -516,7 +521,7 @@ def _tokenize(readline, encoding): ...@@ -516,7 +521,7 @@ def _tokenize(readline, encoding):
break break
else: # ordinary string else: # ordinary string
yield TokenInfo(STRING, token, spos, epos, line) yield TokenInfo(STRING, token, spos, epos, line)
elif initial in namechars: # ordinary name elif initial.isidentifier(): # ordinary name
yield TokenInfo(NAME, token, spos, epos, line) yield TokenInfo(NAME, token, spos, epos, line)
elif initial == '\\': # continued stmt elif initial == '\\': # continued stmt
continued = 1 continued = 1
......
...@@ -14,6 +14,8 @@ Core and Builtins ...@@ -14,6 +14,8 @@ Core and Builtins
- Restore GIL in nis_cat in case of error. - Restore GIL in nis_cat in case of error.
- Issue #9712: Fix tokenize on identifiers that start with non-ascii names.
- Issue #9688: __basicsize__ and __itemsize__ must be accessed as Py_ssize_t. - Issue #9688: __basicsize__ and __itemsize__ must be accessed as Py_ssize_t.
- Issue #5319: Print an error if flushing stdout fails at interpreter - Issue #5319: Print an error if flushing stdout fails at interpreter
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment