Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
c7f7d389
Kaydet (Commit)
c7f7d389
authored
Kas 09, 2014
tarafından
Serhiy Storchaka
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
Issue #22434: Constants in sre_constants are now named constants (enum-like).
üst
bf764a19
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
130 additions
and
189 deletions
+130
-189
sre_compile.py
Lib/sre_compile.py
+37
-38
sre_constants.py
Lib/sre_constants.py
+73
-130
sre_parse.py
Lib/sre_parse.py
+4
-5
test_re.py
Lib/test/test_re.py
+16
-16
No files found.
Lib/sre_compile.py
Dosyayı görüntüle @
c7f7d389
...
...
@@ -13,7 +13,6 @@
import
_sre
import
sre_parse
from
sre_constants
import
*
from
_sre
import
MAXREPEAT
assert
_sre
.
MAGIC
==
MAGIC
,
"SRE module mismatch"
...
...
@@ -38,65 +37,65 @@ def _compile(code, pattern, flags):
for
op
,
av
in
pattern
:
if
op
in
LITERAL_CODES
:
if
flags
&
SRE_FLAG_IGNORECASE
:
emit
(
OP
CODES
[
OP_IGNORE
[
op
]
])
emit
(
OP
_IGNORE
[
op
])
emit
(
_sre
.
getlower
(
av
,
flags
))
else
:
emit
(
OPCODES
[
op
]
)
emit
(
op
)
emit
(
av
)
elif
op
is
IN
:
if
flags
&
SRE_FLAG_IGNORECASE
:
emit
(
OP
CODES
[
OP_IGNORE
[
op
]
])
emit
(
OP
_IGNORE
[
op
])
def
fixup
(
literal
,
flags
=
flags
):
return
_sre
.
getlower
(
literal
,
flags
)
else
:
emit
(
OPCODES
[
op
]
)
emit
(
op
)
fixup
=
None
skip
=
_len
(
code
);
emit
(
0
)
_compile_charset
(
av
,
flags
,
code
,
fixup
)
code
[
skip
]
=
_len
(
code
)
-
skip
elif
op
is
ANY
:
if
flags
&
SRE_FLAG_DOTALL
:
emit
(
OPCODES
[
ANY_ALL
]
)
emit
(
ANY_ALL
)
else
:
emit
(
OPCODES
[
ANY
]
)
emit
(
ANY
)
elif
op
in
REPEATING_CODES
:
if
flags
&
SRE_FLAG_TEMPLATE
:
raise
error
(
"internal: unsupported template operator"
)
elif
_simple
(
av
)
and
op
is
not
REPEAT
:
if
op
is
MAX_REPEAT
:
emit
(
OPCODES
[
REPEAT_ONE
]
)
emit
(
REPEAT_ONE
)
else
:
emit
(
OPCODES
[
MIN_REPEAT_ONE
]
)
emit
(
MIN_REPEAT_ONE
)
skip
=
_len
(
code
);
emit
(
0
)
emit
(
av
[
0
])
emit
(
av
[
1
])
_compile
(
code
,
av
[
2
],
flags
)
emit
(
OPCODES
[
SUCCESS
]
)
emit
(
SUCCESS
)
code
[
skip
]
=
_len
(
code
)
-
skip
else
:
emit
(
OPCODES
[
REPEAT
]
)
emit
(
REPEAT
)
skip
=
_len
(
code
);
emit
(
0
)
emit
(
av
[
0
])
emit
(
av
[
1
])
_compile
(
code
,
av
[
2
],
flags
)
code
[
skip
]
=
_len
(
code
)
-
skip
if
op
is
MAX_REPEAT
:
emit
(
OPCODES
[
MAX_UNTIL
]
)
emit
(
MAX_UNTIL
)
else
:
emit
(
OPCODES
[
MIN_UNTIL
]
)
emit
(
MIN_UNTIL
)
elif
op
is
SUBPATTERN
:
if
av
[
0
]:
emit
(
OPCODES
[
MARK
]
)
emit
(
MARK
)
emit
((
av
[
0
]
-
1
)
*
2
)
# _compile_info(code, av[1], flags)
_compile
(
code
,
av
[
1
],
flags
)
if
av
[
0
]:
emit
(
OPCODES
[
MARK
]
)
emit
(
MARK
)
emit
((
av
[
0
]
-
1
)
*
2
+
1
)
elif
op
in
SUCCESS_CODES
:
emit
(
OPCODES
[
op
]
)
emit
(
op
)
elif
op
in
ASSERT_CODES
:
emit
(
OPCODES
[
op
]
)
emit
(
op
)
skip
=
_len
(
code
);
emit
(
0
)
if
av
[
0
]
>=
0
:
emit
(
0
)
# look ahead
...
...
@@ -106,57 +105,57 @@ def _compile(code, pattern, flags):
raise
error
(
"look-behind requires fixed-width pattern"
)
emit
(
lo
)
# look behind
_compile
(
code
,
av
[
1
],
flags
)
emit
(
OPCODES
[
SUCCESS
]
)
emit
(
SUCCESS
)
code
[
skip
]
=
_len
(
code
)
-
skip
elif
op
is
CALL
:
emit
(
OPCODES
[
op
]
)
emit
(
op
)
skip
=
_len
(
code
);
emit
(
0
)
_compile
(
code
,
av
,
flags
)
emit
(
OPCODES
[
SUCCESS
]
)
emit
(
SUCCESS
)
code
[
skip
]
=
_len
(
code
)
-
skip
elif
op
is
AT
:
emit
(
OPCODES
[
op
]
)
emit
(
op
)
if
flags
&
SRE_FLAG_MULTILINE
:
av
=
AT_MULTILINE
.
get
(
av
,
av
)
if
flags
&
SRE_FLAG_LOCALE
:
av
=
AT_LOCALE
.
get
(
av
,
av
)
elif
flags
&
SRE_FLAG_UNICODE
:
av
=
AT_UNICODE
.
get
(
av
,
av
)
emit
(
ATCODES
[
av
]
)
emit
(
av
)
elif
op
is
BRANCH
:
emit
(
OPCODES
[
op
]
)
emit
(
op
)
tail
=
[]
tailappend
=
tail
.
append
for
av
in
av
[
1
]:
skip
=
_len
(
code
);
emit
(
0
)
# _compile_info(code, av, flags)
_compile
(
code
,
av
,
flags
)
emit
(
OPCODES
[
JUMP
]
)
emit
(
JUMP
)
tailappend
(
_len
(
code
));
emit
(
0
)
code
[
skip
]
=
_len
(
code
)
-
skip
emit
(
0
)
# end of branch
for
tail
in
tail
:
code
[
tail
]
=
_len
(
code
)
-
tail
elif
op
is
CATEGORY
:
emit
(
OPCODES
[
op
]
)
emit
(
op
)
if
flags
&
SRE_FLAG_LOCALE
:
av
=
CH_LOCALE
[
av
]
elif
flags
&
SRE_FLAG_UNICODE
:
av
=
CH_UNICODE
[
av
]
emit
(
CHCODES
[
av
]
)
emit
(
av
)
elif
op
is
GROUPREF
:
if
flags
&
SRE_FLAG_IGNORECASE
:
emit
(
OP
CODES
[
OP_IGNORE
[
op
]
])
emit
(
OP
_IGNORE
[
op
])
else
:
emit
(
OPCODES
[
op
]
)
emit
(
op
)
emit
(
av
-
1
)
elif
op
is
GROUPREF_EXISTS
:
emit
(
OPCODES
[
op
]
)
emit
(
op
)
emit
(
av
[
0
]
-
1
)
skipyes
=
_len
(
code
);
emit
(
0
)
_compile
(
code
,
av
[
1
],
flags
)
if
av
[
2
]:
emit
(
OPCODES
[
JUMP
]
)
emit
(
JUMP
)
skipno
=
_len
(
code
);
emit
(
0
)
code
[
skipyes
]
=
_len
(
code
)
-
skipyes
+
1
_compile
(
code
,
av
[
2
],
flags
)
...
...
@@ -170,7 +169,7 @@ def _compile_charset(charset, flags, code, fixup=None):
# compile charset subprogram
emit
=
code
.
append
for
op
,
av
in
_optimize_charset
(
charset
,
fixup
):
emit
(
OPCODES
[
op
]
)
emit
(
op
)
if
op
is
NEGATE
:
pass
elif
op
is
LITERAL
:
...
...
@@ -184,14 +183,14 @@ def _compile_charset(charset, flags, code, fixup=None):
code
.
extend
(
av
)
elif
op
is
CATEGORY
:
if
flags
&
SRE_FLAG_LOCALE
:
emit
(
CH
CODES
[
CH_LOCALE
[
av
]
])
emit
(
CH
_LOCALE
[
av
])
elif
flags
&
SRE_FLAG_UNICODE
:
emit
(
CH
CODES
[
CH_UNICODE
[
av
]
])
emit
(
CH
_UNICODE
[
av
])
else
:
emit
(
CHCODES
[
av
]
)
emit
(
av
)
else
:
raise
error
(
"internal: unsupported set operator"
)
emit
(
OPCODES
[
FAILURE
]
)
emit
(
FAILURE
)
def
_optimize_charset
(
charset
,
fixup
):
# internal: optimize character set
...
...
@@ -414,7 +413,7 @@ def _compile_info(code, pattern, flags):
## print "*** CHARSET", charset
# add an info block
emit
=
code
.
append
emit
(
OPCODES
[
INFO
]
)
emit
(
INFO
)
skip
=
len
(
code
);
emit
(
0
)
# literal flag
mask
=
0
...
...
@@ -460,7 +459,7 @@ def _code(p, flags):
# compile the pattern
_compile
(
code
,
p
.
data
,
flags
)
code
.
append
(
OPCODES
[
SUCCESS
]
)
code
.
append
(
SUCCESS
)
return
code
...
...
@@ -475,7 +474,7 @@ def compile(p, flags=0):
code
=
_code
(
p
,
flags
)
# print
code
# print
(code)
# map in either direction
groupindex
=
p
.
pattern
.
groupdict
...
...
Lib/sre_constants.py
Dosyayı görüntüle @
c7f7d389
...
...
@@ -23,138 +23,81 @@ from _sre import MAXREPEAT, MAXGROUPS
class
error
(
Exception
):
pass
# operators
FAILURE
=
"failure"
SUCCESS
=
"success"
ANY
=
"any"
ANY_ALL
=
"any_all"
ASSERT
=
"assert"
ASSERT_NOT
=
"assert_not"
AT
=
"at"
BIGCHARSET
=
"bigcharset"
BRANCH
=
"branch"
CALL
=
"call"
CATEGORY
=
"category"
CHARSET
=
"charset"
GROUPREF
=
"groupref"
GROUPREF_IGNORE
=
"groupref_ignore"
GROUPREF_EXISTS
=
"groupref_exists"
IN
=
"in"
IN_IGNORE
=
"in_ignore"
INFO
=
"info"
JUMP
=
"jump"
LITERAL
=
"literal"
LITERAL_IGNORE
=
"literal_ignore"
MARK
=
"mark"
MAX_REPEAT
=
"max_repeat"
MAX_UNTIL
=
"max_until"
MIN_REPEAT
=
"min_repeat"
MIN_UNTIL
=
"min_until"
NEGATE
=
"negate"
NOT_LITERAL
=
"not_literal"
NOT_LITERAL_IGNORE
=
"not_literal_ignore"
RANGE
=
"range"
RANGE_IGNORE
=
"range_ignore"
REPEAT
=
"repeat"
REPEAT_ONE
=
"repeat_one"
SUBPATTERN
=
"subpattern"
MIN_REPEAT_ONE
=
"min_repeat_one"
class
_NamedIntConstant
(
int
):
def
__new__
(
cls
,
value
,
name
):
self
=
super
(
_NamedIntConstant
,
cls
)
.
__new__
(
cls
,
value
)
self
.
name
=
name
return
self
def
__str__
(
self
):
return
self
.
name
__repr__
=
__str__
MAXREPEAT
=
_NamedIntConstant
(
MAXREPEAT
,
'MAXREPEAT'
)
def
_makecodes
(
names
):
names
=
names
.
strip
()
.
split
()
items
=
[
_NamedIntConstant
(
i
,
name
)
for
i
,
name
in
enumerate
(
names
)]
globals
()
.
update
({
item
.
name
:
item
for
item
in
items
})
return
items
# operators
# failure=0 success=1 (just because it looks better that way :-)
OPCODES
=
_makecodes
(
"""
FAILURE SUCCESS
ANY ANY_ALL
ASSERT ASSERT_NOT
AT
BRANCH
CALL
CATEGORY
CHARSET BIGCHARSET
GROUPREF GROUPREF_EXISTS GROUPREF_IGNORE
IN IN_IGNORE
INFO
JUMP
LITERAL LITERAL_IGNORE
MARK
MAX_UNTIL
MIN_UNTIL
NOT_LITERAL NOT_LITERAL_IGNORE
NEGATE
RANGE
REPEAT
REPEAT_ONE
SUBPATTERN
MIN_REPEAT_ONE
RANGE_IGNORE
MIN_REPEAT MAX_REPEAT
"""
)
del
OPCODES
[
-
2
:]
# remove MIN_REPEAT and MAX_REPEAT
# positions
AT_BEGINNING
=
"at_beginning"
AT_BEGINNING_LINE
=
"at_beginning_line"
AT_BEGINNING_STRING
=
"at_beginning_string"
AT_BOUNDARY
=
"at_boundary"
AT_NON_BOUNDARY
=
"at_non_boundary"
AT_END
=
"at_end"
AT_END_LINE
=
"at_end_line"
AT_END_STRING
=
"at_end_string"
AT_LOC_BOUNDARY
=
"at_loc_boundary"
AT_LOC_NON_BOUNDARY
=
"at_loc_non_boundary"
AT_UNI_BOUNDARY
=
"at_uni_boundary"
AT_UNI_NON_BOUNDARY
=
"at_uni_non_boundary"
ATCODES
=
_makecodes
(
"""
AT_BEGINNING AT_BEGINNING_LINE AT_BEGINNING_STRING
AT_BOUNDARY AT_NON_BOUNDARY
AT_END AT_END_LINE AT_END_STRING
AT_LOC_BOUNDARY AT_LOC_NON_BOUNDARY
AT_UNI_BOUNDARY AT_UNI_NON_BOUNDARY
"""
)
# categories
CATEGORY_DIGIT
=
"category_digit"
CATEGORY_NOT_DIGIT
=
"category_not_digit"
CATEGORY_SPACE
=
"category_space"
CATEGORY_NOT_SPACE
=
"category_not_space"
CATEGORY_WORD
=
"category_word"
CATEGORY_NOT_WORD
=
"category_not_word"
CATEGORY_LINEBREAK
=
"category_linebreak"
CATEGORY_NOT_LINEBREAK
=
"category_not_linebreak"
CATEGORY_LOC_WORD
=
"category_loc_word"
CATEGORY_LOC_NOT_WORD
=
"category_loc_not_word"
CATEGORY_UNI_DIGIT
=
"category_uni_digit"
CATEGORY_UNI_NOT_DIGIT
=
"category_uni_not_digit"
CATEGORY_UNI_SPACE
=
"category_uni_space"
CATEGORY_UNI_NOT_SPACE
=
"category_uni_not_space"
CATEGORY_UNI_WORD
=
"category_uni_word"
CATEGORY_UNI_NOT_WORD
=
"category_uni_not_word"
CATEGORY_UNI_LINEBREAK
=
"category_uni_linebreak"
CATEGORY_UNI_NOT_LINEBREAK
=
"category_uni_not_linebreak"
OPCODES
=
[
# failure=0 success=1 (just because it looks better that way :-)
FAILURE
,
SUCCESS
,
ANY
,
ANY_ALL
,
ASSERT
,
ASSERT_NOT
,
AT
,
BRANCH
,
CALL
,
CATEGORY
,
CHARSET
,
BIGCHARSET
,
GROUPREF
,
GROUPREF_EXISTS
,
GROUPREF_IGNORE
,
IN
,
IN_IGNORE
,
INFO
,
JUMP
,
LITERAL
,
LITERAL_IGNORE
,
MARK
,
MAX_UNTIL
,
MIN_UNTIL
,
NOT_LITERAL
,
NOT_LITERAL_IGNORE
,
NEGATE
,
RANGE
,
REPEAT
,
REPEAT_ONE
,
SUBPATTERN
,
MIN_REPEAT_ONE
,
RANGE_IGNORE
,
]
ATCODES
=
[
AT_BEGINNING
,
AT_BEGINNING_LINE
,
AT_BEGINNING_STRING
,
AT_BOUNDARY
,
AT_NON_BOUNDARY
,
AT_END
,
AT_END_LINE
,
AT_END_STRING
,
AT_LOC_BOUNDARY
,
AT_LOC_NON_BOUNDARY
,
AT_UNI_BOUNDARY
,
AT_UNI_NON_BOUNDARY
]
CHCODES
=
[
CATEGORY_DIGIT
,
CATEGORY_NOT_DIGIT
,
CATEGORY_SPACE
,
CATEGORY_NOT_SPACE
,
CATEGORY_WORD
,
CATEGORY_NOT_WORD
,
CATEGORY_LINEBREAK
,
CATEGORY_NOT_LINEBREAK
,
CATEGORY_LOC_WORD
,
CATEGORY_LOC_NOT_WORD
,
CATEGORY_UNI_DIGIT
,
CATEGORY_UNI_NOT_DIGIT
,
CATEGORY_UNI_SPACE
,
CATEGORY_UNI_NOT_SPACE
,
CATEGORY_UNI_WORD
,
CATEGORY_UNI_NOT_WORD
,
CATEGORY_UNI_LINEBREAK
,
CATEGORY_UNI_NOT_LINEBREAK
]
def
makedict
(
list
):
d
=
{}
i
=
0
for
item
in
list
:
d
[
item
]
=
i
i
=
i
+
1
return
d
OPCODES
=
makedict
(
OPCODES
)
ATCODES
=
makedict
(
ATCODES
)
CHCODES
=
makedict
(
CHCODES
)
CHCODES
=
_makecodes
(
"""
CATEGORY_DIGIT CATEGORY_NOT_DIGIT
CATEGORY_SPACE CATEGORY_NOT_SPACE
CATEGORY_WORD CATEGORY_NOT_WORD
CATEGORY_LINEBREAK CATEGORY_NOT_LINEBREAK
CATEGORY_LOC_WORD CATEGORY_LOC_NOT_WORD
CATEGORY_UNI_DIGIT CATEGORY_UNI_NOT_DIGIT
CATEGORY_UNI_SPACE CATEGORY_UNI_NOT_SPACE
CATEGORY_UNI_WORD CATEGORY_UNI_NOT_WORD
CATEGORY_UNI_LINEBREAK CATEGORY_UNI_NOT_LINEBREAK
"""
)
# replacement operations for "ignore case" mode
OP_IGNORE
=
{
...
...
@@ -220,9 +163,9 @@ SRE_INFO_CHARSET = 4 # pattern starts with character from given set
if
__name__
==
"__main__"
:
def
dump
(
f
,
d
,
prefix
):
items
=
sorted
(
d
.
items
(),
key
=
lambda
a
:
a
[
1
]
)
for
k
,
v
in
items
:
f
.
write
(
"#define
%
s_
%
s
%
s
\n
"
%
(
prefix
,
k
.
upper
(),
v
))
items
=
sorted
(
d
)
for
item
in
items
:
f
.
write
(
"#define
%
s_
%
s
%
d
\n
"
%
(
prefix
,
item
,
item
))
f
=
open
(
"sre_constants.h"
,
"w"
)
f
.
write
(
"""
\
/*
...
...
Lib/sre_parse.py
Dosyayı görüntüle @
c7f7d389
...
...
@@ -13,7 +13,6 @@
# XXX: show string offset and offending character for all errors
from
sre_constants
import
*
from
_sre
import
MAXREPEAT
SPECIAL_CHARS
=
".
\\
[{()*+?^$|"
REPEAT_CHARS
=
"*+?{"
...
...
@@ -103,24 +102,24 @@ class SubPattern:
nl
=
True
seqtypes
=
(
tuple
,
list
)
for
op
,
av
in
self
.
data
:
print
(
level
*
" "
+
op
,
end
=
''
)
print
(
level
*
" "
+
str
(
op
)
,
end
=
''
)
if
op
==
IN
:
# member sublanguage
print
()
for
op
,
a
in
av
:
print
((
level
+
1
)
*
" "
+
op
,
a
)
print
((
level
+
1
)
*
" "
+
str
(
op
)
,
a
)
elif
op
==
BRANCH
:
print
()
for
i
,
a
in
enumerate
(
av
[
1
]):
if
i
:
print
(
level
*
" "
+
"
or
"
)
print
(
level
*
" "
+
"
OR
"
)
a
.
dump
(
level
+
1
)
elif
op
==
GROUPREF_EXISTS
:
condgroup
,
item_yes
,
item_no
=
av
print
(
''
,
condgroup
)
item_yes
.
dump
(
level
+
1
)
if
item_no
:
print
(
level
*
" "
+
"
else
"
)
print
(
level
*
" "
+
"
ELSE
"
)
item_no
.
dump
(
level
+
1
)
elif
isinstance
(
av
,
seqtypes
):
nl
=
False
...
...
Lib/test/test_re.py
Dosyayı görüntüle @
c7f7d389
...
...
@@ -1285,22 +1285,22 @@ class ReTests(unittest.TestCase):
with
captured_stdout
()
as
out
:
re
.
compile
(
pat
,
re
.
DEBUG
)
dump
=
'''
\
subpattern
1
literal
46
subpattern
None
branch
in
literal
99
literal
104
or
literal
112
literal
121
subpattern
None
groupref_exists
1
at at_end
else
literal
58
literal
32
SUBPATTERN
1
LITERAL
46
SUBPATTERN
None
BRANCH
IN
LITERAL
99
LITERAL
104
OR
LITERAL
112
LITERAL
121
SUBPATTERN
None
GROUPREF_EXISTS
1
AT AT_END
ELSE
LITERAL
58
LITERAL
32
'''
self
.
assertEqual
(
out
.
getvalue
(),
dump
)
# Debug output is output again even a second time (bypassing
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment