Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
00c7f852
Kaydet (Commit)
00c7f852
authored
Ock 19, 2012
tarafından
Meador Inge
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
Issue #2134: Add support for tokenize.TokenInfo.exact_type.
üst
3f67ec1a
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
187 additions
and
3 deletions
+187
-3
tokenize.rst
Doc/library/tokenize.rst
+52
-1
test_tokenize.py
Lib/test/test_tokenize.py
+74
-1
tokenize.py
Lib/tokenize.py
+58
-1
NEWS
Misc/NEWS
+3
-0
No files found.
Doc/library/tokenize.rst
Dosyayı görüntüle @
00c7f852
...
...
@@ -15,6 +15,11 @@ implemented in Python. The scanner in this module returns comments as tokens
as well, making it useful for implementing "pretty-printers," including
colorizers for on-screen displays.
To simplify token stream handling, all :ref:`operators` and :ref:`delimiters`
tokens are returned using the generic :data:`token.OP` token type. The exact
type can be determined by checking the ``exact_type`` property on the
:term:`named tuple` returned from :func:`tokenize.tokenize`.
Tokenizing Input
----------------
...
...
@@ -36,9 +41,17 @@ The primary entry point is a :term:`generator`:
returned as a :term:`named tuple` with the field names:
``type string start end line``.
The returned :term:`named tuple` has a additional property named
``exact_type`` that contains the exact operator type for
:data:`token.OP` tokens. For all other token types ``exact_type``
equals the named tuple ``type`` field.
.. versionchanged:: 3.1
Added support for named tuples.
.. versionchanged:: 3.3
Added support for ``exact_type``.
:func:`tokenize` determines the source encoding of the file by looking for a
UTF-8 BOM or encoding cookie, according to :pep:`263`.
...
...
@@ -131,7 +144,19 @@ It is as simple as:
.. code-block:: sh
python -m tokenize [filename.py]
python -m tokenize [-e] [filename.py]
The following options are accepted:
.. program:: tokenize
.. cmdoption:: -h, --help
show this help message and exit
.. cmdoption:: -e, --exact
display token names using the exact type
If :file:`filename.py` is specified its contents are tokenized to stdout.
Otherwise, tokenization is performed on stdin.
...
...
@@ -215,3 +240,29 @@ the name of the token, and the final column is the value of the token (if any)
4,10-4,11: OP ')'
4,11-4,12: NEWLINE '\n'
5,0-5,0: ENDMARKER ''
The exact token type names can be displayed using the ``-e`` option:
.. code-block:: sh
$ python -m tokenize -e hello.py
0,0-0,0: ENCODING 'utf-8'
1,0-1,3: NAME 'def'
1,4-1,13: NAME 'say_hello'
1,13-1,14: LPAR '('
1,14-1,15: RPAR ')'
1,15-1,16: COLON ':'
1,16-1,17: NEWLINE '\n'
2,0-2,4: INDENT ' '
2,4-2,9: NAME 'print'
2,9-2,10: LPAR '('
2,10-2,25: STRING '"Hello, World!"'
2,25-2,26: RPAR ')'
2,26-2,27: NEWLINE '\n'
3,0-3,1: NL '\n'
4,0-4,0: DEDENT ''
4,0-4,9: NAME 'say_hello'
4,9-4,10: LPAR '('
4,10-4,11: RPAR ')'
4,11-4,12: NEWLINE '\n'
5,0-5,0: ENDMARKER ''
Lib/test/test_tokenize.py
Dosyayı görüntüle @
00c7f852
...
...
@@ -567,11 +567,12 @@ Non-ascii identifiers
from
test
import
support
from
tokenize
import
(
tokenize
,
_tokenize
,
untokenize
,
NUMBER
,
NAME
,
OP
,
STRING
,
ENDMARKER
,
tok_name
,
detect_encoding
,
STRING
,
ENDMARKER
,
ENCODING
,
tok_name
,
detect_encoding
,
open
as
tokenize_open
)
from
io
import
BytesIO
from
unittest
import
TestCase
import
os
,
sys
,
glob
import
token
def
dump_tokens
(
s
):
"""Print out the tokens in s in a table format.
...
...
@@ -922,6 +923,78 @@ class TestTokenize(TestCase):
self
.
assertTrue
(
encoding_used
,
encoding
)
def
assertExactTypeEqual
(
self
,
opstr
,
*
optypes
):
tokens
=
list
(
tokenize
(
BytesIO
(
opstr
.
encode
(
'utf-8'
))
.
readline
))
num_optypes
=
len
(
optypes
)
self
.
assertEqual
(
len
(
tokens
),
2
+
num_optypes
)
self
.
assertEqual
(
token
.
tok_name
[
tokens
[
0
]
.
exact_type
],
token
.
tok_name
[
ENCODING
])
for
i
in
range
(
num_optypes
):
self
.
assertEqual
(
token
.
tok_name
[
tokens
[
i
+
1
]
.
exact_type
],
token
.
tok_name
[
optypes
[
i
]])
self
.
assertEqual
(
token
.
tok_name
[
tokens
[
1
+
num_optypes
]
.
exact_type
],
token
.
tok_name
[
token
.
ENDMARKER
])
def
test_exact_type
(
self
):
self
.
assertExactTypeEqual
(
'()'
,
token
.
LPAR
,
token
.
RPAR
)
self
.
assertExactTypeEqual
(
'[]'
,
token
.
LSQB
,
token
.
RSQB
)
self
.
assertExactTypeEqual
(
':'
,
token
.
COLON
)
self
.
assertExactTypeEqual
(
','
,
token
.
COMMA
)
self
.
assertExactTypeEqual
(
';'
,
token
.
SEMI
)
self
.
assertExactTypeEqual
(
'+'
,
token
.
PLUS
)
self
.
assertExactTypeEqual
(
'-'
,
token
.
MINUS
)
self
.
assertExactTypeEqual
(
'*'
,
token
.
STAR
)
self
.
assertExactTypeEqual
(
'/'
,
token
.
SLASH
)
self
.
assertExactTypeEqual
(
'|'
,
token
.
VBAR
)
self
.
assertExactTypeEqual
(
'&'
,
token
.
AMPER
)
self
.
assertExactTypeEqual
(
'<'
,
token
.
LESS
)
self
.
assertExactTypeEqual
(
'>'
,
token
.
GREATER
)
self
.
assertExactTypeEqual
(
'='
,
token
.
EQUAL
)
self
.
assertExactTypeEqual
(
'.'
,
token
.
DOT
)
self
.
assertExactTypeEqual
(
'
%
'
,
token
.
PERCENT
)
self
.
assertExactTypeEqual
(
'{}'
,
token
.
LBRACE
,
token
.
RBRACE
)
self
.
assertExactTypeEqual
(
'=='
,
token
.
EQEQUAL
)
self
.
assertExactTypeEqual
(
'!='
,
token
.
NOTEQUAL
)
self
.
assertExactTypeEqual
(
'<='
,
token
.
LESSEQUAL
)
self
.
assertExactTypeEqual
(
'>='
,
token
.
GREATEREQUAL
)
self
.
assertExactTypeEqual
(
'~'
,
token
.
TILDE
)
self
.
assertExactTypeEqual
(
'^'
,
token
.
CIRCUMFLEX
)
self
.
assertExactTypeEqual
(
'<<'
,
token
.
LEFTSHIFT
)
self
.
assertExactTypeEqual
(
'>>'
,
token
.
RIGHTSHIFT
)
self
.
assertExactTypeEqual
(
'**'
,
token
.
DOUBLESTAR
)
self
.
assertExactTypeEqual
(
'+='
,
token
.
PLUSEQUAL
)
self
.
assertExactTypeEqual
(
'-='
,
token
.
MINEQUAL
)
self
.
assertExactTypeEqual
(
'*='
,
token
.
STAREQUAL
)
self
.
assertExactTypeEqual
(
'/='
,
token
.
SLASHEQUAL
)
self
.
assertExactTypeEqual
(
'
%
='
,
token
.
PERCENTEQUAL
)
self
.
assertExactTypeEqual
(
'&='
,
token
.
AMPEREQUAL
)
self
.
assertExactTypeEqual
(
'|='
,
token
.
VBAREQUAL
)
self
.
assertExactTypeEqual
(
'^='
,
token
.
CIRCUMFLEXEQUAL
)
self
.
assertExactTypeEqual
(
'^='
,
token
.
CIRCUMFLEXEQUAL
)
self
.
assertExactTypeEqual
(
'<<='
,
token
.
LEFTSHIFTEQUAL
)
self
.
assertExactTypeEqual
(
'>>='
,
token
.
RIGHTSHIFTEQUAL
)
self
.
assertExactTypeEqual
(
'**='
,
token
.
DOUBLESTAREQUAL
)
self
.
assertExactTypeEqual
(
'//'
,
token
.
DOUBLESLASH
)
self
.
assertExactTypeEqual
(
'//='
,
token
.
DOUBLESLASHEQUAL
)
self
.
assertExactTypeEqual
(
'@'
,
token
.
AT
)
self
.
assertExactTypeEqual
(
'a**2+b**2==c**2'
,
NAME
,
token
.
DOUBLESTAR
,
NUMBER
,
token
.
PLUS
,
NAME
,
token
.
DOUBLESTAR
,
NUMBER
,
token
.
EQEQUAL
,
NAME
,
token
.
DOUBLESTAR
,
NUMBER
)
self
.
assertExactTypeEqual
(
'{1, 2, 3}'
,
token
.
LBRACE
,
token
.
NUMBER
,
token
.
COMMA
,
token
.
NUMBER
,
token
.
COMMA
,
token
.
NUMBER
,
token
.
RBRACE
)
self
.
assertExactTypeEqual
(
'^(x & 0x1)'
,
token
.
CIRCUMFLEX
,
token
.
LPAR
,
token
.
NAME
,
token
.
AMPER
,
token
.
NUMBER
,
token
.
RPAR
)
__test__
=
{
"doctests"
:
doctests
,
'decistmt'
:
decistmt
}
...
...
Lib/tokenize.py
Dosyayı görüntüle @
00c7f852
...
...
@@ -45,6 +45,51 @@ tok_name[NL] = 'NL'
ENCODING
=
N_TOKENS
+
2
tok_name
[
ENCODING
]
=
'ENCODING'
N_TOKENS
+=
3
EXACT_TOKEN_TYPES
=
{
'('
:
LPAR
,
')'
:
RPAR
,
'['
:
LSQB
,
']'
:
RSQB
,
':'
:
COLON
,
','
:
COMMA
,
';'
:
SEMI
,
'+'
:
PLUS
,
'-'
:
MINUS
,
'*'
:
STAR
,
'/'
:
SLASH
,
'|'
:
VBAR
,
'&'
:
AMPER
,
'<'
:
LESS
,
'>'
:
GREATER
,
'='
:
EQUAL
,
'.'
:
DOT
,
'
%
'
:
PERCENT
,
'{'
:
LBRACE
,
'}'
:
RBRACE
,
'=='
:
EQEQUAL
,
'!='
:
NOTEQUAL
,
'<='
:
LESSEQUAL
,
'>='
:
GREATEREQUAL
,
'~'
:
TILDE
,
'^'
:
CIRCUMFLEX
,
'<<'
:
LEFTSHIFT
,
'>>'
:
RIGHTSHIFT
,
'**'
:
DOUBLESTAR
,
'+='
:
PLUSEQUAL
,
'-='
:
MINEQUAL
,
'*='
:
STAREQUAL
,
'/='
:
SLASHEQUAL
,
'
%
='
:
PERCENTEQUAL
,
'&='
:
AMPEREQUAL
,
'|='
:
VBAREQUAL
,
'^='
:
CIRCUMFLEXEQUAL
,
'<<='
:
LEFTSHIFTEQUAL
,
'>>='
:
RIGHTSHIFTEQUAL
,
'**='
:
DOUBLESTAREQUAL
,
'//'
:
DOUBLESLASH
,
'//='
:
DOUBLESLASHEQUAL
,
'@'
:
AT
}
class
TokenInfo
(
collections
.
namedtuple
(
'TokenInfo'
,
'type string start end line'
)):
def
__repr__
(
self
):
...
...
@@ -52,6 +97,13 @@ class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line'
return
(
'TokenInfo(type=
%
s, string=
%
r, start=
%
r, end=
%
r, line=
%
r)'
%
self
.
_replace
(
type
=
annotated_type
))
@property
def
exact_type
(
self
):
if
self
.
type
==
OP
and
self
.
string
in
EXACT_TOKEN_TYPES
:
return
EXACT_TOKEN_TYPES
[
self
.
string
]
else
:
return
self
.
type
def
group
(
*
choices
):
return
'('
+
'|'
.
join
(
choices
)
+
')'
def
any
(
*
choices
):
return
group
(
*
choices
)
+
'*'
def
maybe
(
*
choices
):
return
group
(
*
choices
)
+
'?'
...
...
@@ -549,6 +601,8 @@ def main():
parser
.
add_argument
(
dest
=
'filename'
,
nargs
=
'?'
,
metavar
=
'filename.py'
,
help
=
'the file to tokenize; defaults to stdin'
)
parser
.
add_argument
(
'-e'
,
'--exact'
,
dest
=
'exact'
,
action
=
'store_true'
,
help
=
'display token names using the exact type'
)
args
=
parser
.
parse_args
()
try
:
...
...
@@ -563,9 +617,12 @@ def main():
# Output the tokenization
for
token
in
tokens
:
token_type
=
token
.
type
if
args
.
exact
:
token_type
=
token
.
exact_type
token_range
=
"
%
d,
%
d-
%
d,
%
d:"
%
(
token
.
start
+
token
.
end
)
print
(
"
%-20
s
%-15
s
%-15
r"
%
(
token_range
,
tok_name
[
token
.
type
],
token
.
string
))
(
token_range
,
tok_name
[
token
_
type
],
token
.
string
))
except
IndentationError
as
err
:
line
,
column
=
err
.
args
[
1
][
1
:
3
]
error
(
err
.
args
[
0
],
filename
,
(
line
,
column
))
...
...
Misc/NEWS
Dosyayı görüntüle @
00c7f852
...
...
@@ -450,6 +450,9 @@ Core and Builtins
Library
-------
-
Issue
#
2134
:
A
new
attribute
that
specifies
the
exact
type
of
token
.
OP
tokens
has
been
added
to
tokenize
.
TokenInfo
.
-
Issue
#
13722
:
Avoid
silencing
ImportErrors
when
initializing
the
codecs
registry
.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment