Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
90a07913
Kaydet (Commit)
90a07913
authored
Haz 30, 2000
tarafından
Fredrik Lundh
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
- pedantic: make sure "python -t" doesn't complain...
üst
df02d0b3
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
587 additions
and
587 deletions
+587
-587
sre.py
Lib/sre.py
+11
-11
sre_compile.py
Lib/sre_compile.py
+153
-153
sre_parse.py
Lib/sre_parse.py
+418
-418
test_sre.py
Lib/test/test_sre.py
+5
-5
No files found.
Lib/sre.py
Dosyayı görüntüle @
90a07913
...
@@ -98,7 +98,7 @@ def _subn(pattern, template, string, count=0):
...
@@ -98,7 +98,7 @@ def _subn(pattern, template, string, count=0):
if
callable
(
template
):
if
callable
(
template
):
filter
=
template
filter
=
template
else
:
else
:
template
=
sre_parse
.
parse_template
(
template
,
pattern
)
template
=
sre_parse
.
parse_template
(
template
,
pattern
)
def
filter
(
match
,
template
=
template
):
def
filter
(
match
,
template
=
template
):
return
sre_parse
.
expand_template
(
template
,
match
)
return
sre_parse
.
expand_template
(
template
,
match
)
n
=
i
=
0
n
=
i
=
0
...
@@ -109,11 +109,11 @@ def _subn(pattern, template, string, count=0):
...
@@ -109,11 +109,11 @@ def _subn(pattern, template, string, count=0):
m
=
c
.
search
()
m
=
c
.
search
()
if
not
m
:
if
not
m
:
break
break
b
,
e
=
m
.
span
()
b
,
e
=
m
.
span
()
if
i
<
b
:
if
i
<
b
:
append
(
string
[
i
:
b
])
append
(
string
[
i
:
b
])
append
(
filter
(
m
))
append
(
filter
(
m
))
i
=
e
i
=
e
n
=
n
+
1
n
=
n
+
1
append
(
string
[
i
:])
append
(
string
[
i
:])
return
string
[:
0
]
.
join
(
s
),
n
return
string
[:
0
]
.
join
(
s
),
n
...
@@ -130,15 +130,15 @@ def _split(pattern, string, maxsplit=0):
...
@@ -130,15 +130,15 @@ def _split(pattern, string, maxsplit=0):
m
=
c
.
search
()
m
=
c
.
search
()
if
not
m
:
if
not
m
:
break
break
b
,
e
=
m
.
span
()
b
,
e
=
m
.
span
()
if
b
==
e
:
if
b
==
e
:
if
i
>=
len
(
string
):
if
i
>=
len
(
string
):
break
break
continue
continue
append
(
string
[
i
:
b
])
append
(
string
[
i
:
b
])
if
g
and
b
!=
e
:
if
g
and
b
!=
e
:
extend
(
m
.
groups
())
extend
(
m
.
groups
())
i
=
e
i
=
e
n
=
n
+
1
n
=
n
+
1
append
(
string
[
i
:])
append
(
string
[
i
:])
return
s
return
s
Lib/sre_compile.py
Dosyayı görüntüle @
90a07913
...
@@ -18,7 +18,7 @@ from sre_constants import *
...
@@ -18,7 +18,7 @@ from sre_constants import *
# find an array type code that matches the engine's code size
# find an array type code that matches the engine's code size
for
WORDSIZE
in
"BHil"
:
for
WORDSIZE
in
"BHil"
:
if
len
(
array
.
array
(
WORDSIZE
,
[
0
])
.
tostring
())
==
_sre
.
getcodesize
():
if
len
(
array
.
array
(
WORDSIZE
,
[
0
])
.
tostring
())
==
_sre
.
getcodesize
():
break
break
else
:
else
:
raise
RuntimeError
,
"cannot find a useable array type"
raise
RuntimeError
,
"cannot find a useable array type"
...
@@ -26,132 +26,132 @@ def _compile(code, pattern, flags):
...
@@ -26,132 +26,132 @@ def _compile(code, pattern, flags):
# internal: compile a (sub)pattern
# internal: compile a (sub)pattern
emit
=
code
.
append
emit
=
code
.
append
for
op
,
av
in
pattern
:
for
op
,
av
in
pattern
:
if
op
is
ANY
:
if
op
is
ANY
:
if
flags
&
SRE_FLAG_DOTALL
:
if
flags
&
SRE_FLAG_DOTALL
:
emit
(
OPCODES
[
op
])
emit
(
OPCODES
[
op
])
else
:
else
:
emit
(
OPCODES
[
CATEGORY
])
emit
(
OPCODES
[
CATEGORY
])
emit
(
CHCODES
[
CATEGORY_NOT_LINEBREAK
])
emit
(
CHCODES
[
CATEGORY_NOT_LINEBREAK
])
elif
op
in
(
SUCCESS
,
FAILURE
):
elif
op
in
(
SUCCESS
,
FAILURE
):
emit
(
OPCODES
[
op
])
emit
(
OPCODES
[
op
])
elif
op
is
AT
:
elif
op
is
AT
:
emit
(
OPCODES
[
op
])
emit
(
OPCODES
[
op
])
if
flags
&
SRE_FLAG_MULTILINE
:
if
flags
&
SRE_FLAG_MULTILINE
:
emit
(
ATCODES
[
AT_MULTILINE
[
av
]])
emit
(
ATCODES
[
AT_MULTILINE
[
av
]])
else
:
else
:
emit
(
ATCODES
[
av
])
emit
(
ATCODES
[
av
])
elif
op
is
BRANCH
:
elif
op
is
BRANCH
:
emit
(
OPCODES
[
op
])
emit
(
OPCODES
[
op
])
tail
=
[]
tail
=
[]
for
av
in
av
[
1
]:
for
av
in
av
[
1
]:
skip
=
len
(
code
);
emit
(
0
)
skip
=
len
(
code
);
emit
(
0
)
_compile
(
code
,
av
,
flags
)
_compile
(
code
,
av
,
flags
)
emit
(
OPCODES
[
JUMP
])
emit
(
OPCODES
[
JUMP
])
tail
.
append
(
len
(
code
));
emit
(
0
)
tail
.
append
(
len
(
code
));
emit
(
0
)
code
[
skip
]
=
len
(
code
)
-
skip
code
[
skip
]
=
len
(
code
)
-
skip
emit
(
0
)
# end of branch
emit
(
0
)
# end of branch
for
tail
in
tail
:
for
tail
in
tail
:
code
[
tail
]
=
len
(
code
)
-
tail
code
[
tail
]
=
len
(
code
)
-
tail
elif
op
is
CALL
:
elif
op
is
CALL
:
emit
(
OPCODES
[
op
])
emit
(
OPCODES
[
op
])
skip
=
len
(
code
);
emit
(
0
)
skip
=
len
(
code
);
emit
(
0
)
_compile
(
code
,
av
,
flags
)
_compile
(
code
,
av
,
flags
)
emit
(
OPCODES
[
SUCCESS
])
emit
(
OPCODES
[
SUCCESS
])
code
[
skip
]
=
len
(
code
)
-
skip
code
[
skip
]
=
len
(
code
)
-
skip
elif
op
is
CATEGORY
:
elif
op
is
CATEGORY
:
emit
(
OPCODES
[
op
])
emit
(
OPCODES
[
op
])
if
flags
&
SRE_FLAG_LOCALE
:
if
flags
&
SRE_FLAG_LOCALE
:
emit
(
CHCODES
[
CH_LOCALE
[
av
]])
emit
(
CHCODES
[
CH_LOCALE
[
av
]])
elif
flags
&
SRE_FLAG_UNICODE
:
elif
flags
&
SRE_FLAG_UNICODE
:
emit
(
CHCODES
[
CH_UNICODE
[
av
]])
emit
(
CHCODES
[
CH_UNICODE
[
av
]])
else
:
else
:
emit
(
CHCODES
[
av
])
emit
(
CHCODES
[
av
])
elif
op
is
GROUP
:
elif
op
is
GROUP
:
if
flags
&
SRE_FLAG_IGNORECASE
:
if
flags
&
SRE_FLAG_IGNORECASE
:
emit
(
OPCODES
[
OP_IGNORE
[
op
]])
emit
(
OPCODES
[
OP_IGNORE
[
op
]])
else
:
else
:
emit
(
OPCODES
[
op
])
emit
(
OPCODES
[
op
])
emit
(
av
-
1
)
emit
(
av
-
1
)
elif
op
is
IN
:
elif
op
is
IN
:
if
flags
&
SRE_FLAG_IGNORECASE
:
if
flags
&
SRE_FLAG_IGNORECASE
:
emit
(
OPCODES
[
OP_IGNORE
[
op
]])
emit
(
OPCODES
[
OP_IGNORE
[
op
]])
def
fixup
(
literal
,
flags
=
flags
):
def
fixup
(
literal
,
flags
=
flags
):
return
_sre
.
getlower
(
ord
(
literal
),
flags
)
return
_sre
.
getlower
(
ord
(
literal
),
flags
)
else
:
else
:
emit
(
OPCODES
[
op
])
emit
(
OPCODES
[
op
])
fixup
=
ord
fixup
=
ord
skip
=
len
(
code
);
emit
(
0
)
skip
=
len
(
code
);
emit
(
0
)
for
op
,
av
in
av
:
for
op
,
av
in
av
:
emit
(
OPCODES
[
op
])
emit
(
OPCODES
[
op
])
if
op
is
NEGATE
:
if
op
is
NEGATE
:
pass
pass
elif
op
is
LITERAL
:
elif
op
is
LITERAL
:
emit
(
fixup
(
av
))
emit
(
fixup
(
av
))
elif
op
is
RANGE
:
elif
op
is
RANGE
:
emit
(
fixup
(
av
[
0
]))
emit
(
fixup
(
av
[
0
]))
emit
(
fixup
(
av
[
1
]))
emit
(
fixup
(
av
[
1
]))
elif
op
is
CATEGORY
:
elif
op
is
CATEGORY
:
if
flags
&
SRE_FLAG_LOCALE
:
if
flags
&
SRE_FLAG_LOCALE
:
emit
(
CHCODES
[
CH_LOCALE
[
av
]])
emit
(
CHCODES
[
CH_LOCALE
[
av
]])
elif
flags
&
SRE_FLAG_UNICODE
:
elif
flags
&
SRE_FLAG_UNICODE
:
emit
(
CHCODES
[
CH_UNICODE
[
av
]])
emit
(
CHCODES
[
CH_UNICODE
[
av
]])
else
:
else
:
emit
(
CHCODES
[
av
])
emit
(
CHCODES
[
av
])
else
:
else
:
raise
error
,
"internal: unsupported set operator"
raise
error
,
"internal: unsupported set operator"
emit
(
OPCODES
[
FAILURE
])
emit
(
OPCODES
[
FAILURE
])
code
[
skip
]
=
len
(
code
)
-
skip
code
[
skip
]
=
len
(
code
)
-
skip
elif
op
in
(
LITERAL
,
NOT_LITERAL
):
elif
op
in
(
LITERAL
,
NOT_LITERAL
):
if
flags
&
SRE_FLAG_IGNORECASE
:
if
flags
&
SRE_FLAG_IGNORECASE
:
emit
(
OPCODES
[
OP_IGNORE
[
op
]])
emit
(
OPCODES
[
OP_IGNORE
[
op
]])
else
:
else
:
emit
(
OPCODES
[
op
])
emit
(
OPCODES
[
op
])
emit
(
ord
(
av
))
emit
(
ord
(
av
))
elif
op
is
MARK
:
elif
op
is
MARK
:
emit
(
OPCODES
[
op
])
emit
(
OPCODES
[
op
])
emit
(
av
)
emit
(
av
)
elif
op
in
(
REPEAT
,
MIN_REPEAT
,
MAX_REPEAT
):
elif
op
in
(
REPEAT
,
MIN_REPEAT
,
MAX_REPEAT
):
if
flags
&
SRE_FLAG_TEMPLATE
:
if
flags
&
SRE_FLAG_TEMPLATE
:
emit
(
OPCODES
[
REPEAT
])
emit
(
OPCODES
[
REPEAT
])
skip
=
len
(
code
);
emit
(
0
)
skip
=
len
(
code
);
emit
(
0
)
emit
(
av
[
0
])
emit
(
av
[
0
])
emit
(
av
[
1
])
emit
(
av
[
1
])
_compile
(
code
,
av
[
2
],
flags
)
_compile
(
code
,
av
[
2
],
flags
)
emit
(
OPCODES
[
SUCCESS
])
emit
(
OPCODES
[
SUCCESS
])
code
[
skip
]
=
len
(
code
)
-
skip
code
[
skip
]
=
len
(
code
)
-
skip
else
:
else
:
lo
,
hi
=
av
[
2
]
.
getwidth
()
lo
,
hi
=
av
[
2
]
.
getwidth
()
if
lo
==
0
:
if
lo
==
0
:
raise
error
,
"nothing to repeat"
raise
error
,
"nothing to repeat"
if
0
and
lo
==
hi
==
1
and
op
is
MAX_REPEAT
:
if
0
and
lo
==
hi
==
1
and
op
is
MAX_REPEAT
:
# FIXME: <fl> need a better way to figure out when
# FIXME: <fl> need a better way to figure out when
# it's safe to use this one (in the parser, probably)
# it's safe to use this one (in the parser, probably)
emit
(
OPCODES
[
MAX_REPEAT_ONE
])
emit
(
OPCODES
[
MAX_REPEAT_ONE
])
skip
=
len
(
code
);
emit
(
0
)
skip
=
len
(
code
);
emit
(
0
)
emit
(
av
[
0
])
emit
(
av
[
0
])
emit
(
av
[
1
])
emit
(
av
[
1
])
_compile
(
code
,
av
[
2
],
flags
)
_compile
(
code
,
av
[
2
],
flags
)
emit
(
OPCODES
[
SUCCESS
])
emit
(
OPCODES
[
SUCCESS
])
code
[
skip
]
=
len
(
code
)
-
skip
code
[
skip
]
=
len
(
code
)
-
skip
else
:
else
:
emit
(
OPCODES
[
op
])
emit
(
OPCODES
[
op
])
skip
=
len
(
code
);
emit
(
0
)
skip
=
len
(
code
);
emit
(
0
)
emit
(
av
[
0
])
emit
(
av
[
0
])
emit
(
av
[
1
])
emit
(
av
[
1
])
_compile
(
code
,
av
[
2
],
flags
)
_compile
(
code
,
av
[
2
],
flags
)
emit
(
OPCODES
[
SUCCESS
])
emit
(
OPCODES
[
SUCCESS
])
code
[
skip
]
=
len
(
code
)
-
skip
code
[
skip
]
=
len
(
code
)
-
skip
elif
op
is
SUBPATTERN
:
elif
op
is
SUBPATTERN
:
group
=
av
[
0
]
group
=
av
[
0
]
if
group
:
if
group
:
emit
(
OPCODES
[
MARK
])
emit
(
OPCODES
[
MARK
])
emit
((
group
-
1
)
*
2
)
emit
((
group
-
1
)
*
2
)
_compile
(
code
,
av
[
1
],
flags
)
_compile
(
code
,
av
[
1
],
flags
)
if
group
:
if
group
:
emit
(
OPCODES
[
MARK
])
emit
(
OPCODES
[
MARK
])
emit
((
group
-
1
)
*
2
+
1
)
emit
((
group
-
1
)
*
2
+
1
)
else
:
else
:
raise
ValueError
,
(
"unsupported operand type"
,
op
)
raise
ValueError
,
(
"unsupported operand type"
,
op
)
def
_compile_info
(
code
,
pattern
,
flags
):
def
_compile_info
(
code
,
pattern
,
flags
):
# internal: compile an info block. in the current version,
# internal: compile an info block. in the current version,
...
@@ -159,15 +159,15 @@ def _compile_info(code, pattern, flags):
...
@@ -159,15 +159,15 @@ def _compile_info(code, pattern, flags):
# if any
# if any
lo
,
hi
=
pattern
.
getwidth
()
lo
,
hi
=
pattern
.
getwidth
()
if
lo
==
0
:
if
lo
==
0
:
return
# not worth it
return
# not worth it
# look for a literal prefix
# look for a literal prefix
prefix
=
[]
prefix
=
[]
if
not
(
flags
&
SRE_FLAG_IGNORECASE
):
if
not
(
flags
&
SRE_FLAG_IGNORECASE
):
for
op
,
av
in
pattern
.
data
:
for
op
,
av
in
pattern
.
data
:
if
op
is
LITERAL
:
if
op
is
LITERAL
:
prefix
.
append
(
ord
(
av
))
prefix
.
append
(
ord
(
av
))
else
:
else
:
break
break
# add an info block
# add an info block
emit
=
code
.
append
emit
=
code
.
append
emit
(
OPCODES
[
INFO
])
emit
(
OPCODES
[
INFO
])
...
@@ -175,25 +175,25 @@ def _compile_info(code, pattern, flags):
...
@@ -175,25 +175,25 @@ def _compile_info(code, pattern, flags):
# literal flag
# literal flag
mask
=
0
mask
=
0
if
len
(
prefix
)
==
len
(
pattern
.
data
):
if
len
(
prefix
)
==
len
(
pattern
.
data
):
mask
=
1
mask
=
1
emit
(
mask
)
emit
(
mask
)
# pattern length
# pattern length
emit
(
lo
)
emit
(
lo
)
if
hi
<
32768
:
if
hi
<
32768
:
emit
(
hi
)
emit
(
hi
)
else
:
else
:
emit
(
0
)
emit
(
0
)
# add literal prefix
# add literal prefix
emit
(
len
(
prefix
))
emit
(
len
(
prefix
))
if
prefix
:
if
prefix
:
code
.
extend
(
prefix
)
code
.
extend
(
prefix
)
# generate overlap table
# generate overlap table
table
=
[
-
1
]
+
([
0
]
*
len
(
prefix
))
table
=
[
-
1
]
+
([
0
]
*
len
(
prefix
))
for
i
in
range
(
len
(
prefix
)):
for
i
in
range
(
len
(
prefix
)):
table
[
i
+
1
]
=
table
[
i
]
+
1
table
[
i
+
1
]
=
table
[
i
]
+
1
while
table
[
i
+
1
]
>
0
and
prefix
[
i
]
!=
prefix
[
table
[
i
+
1
]
-
1
]:
while
table
[
i
+
1
]
>
0
and
prefix
[
i
]
!=
prefix
[
table
[
i
+
1
]
-
1
]:
table
[
i
+
1
]
=
table
[
table
[
i
+
1
]
-
1
]
+
1
table
[
i
+
1
]
=
table
[
table
[
i
+
1
]
-
1
]
+
1
code
.
extend
(
table
[
1
:])
# don't store first entry
code
.
extend
(
table
[
1
:])
# don't store first entry
code
[
skip
]
=
len
(
code
)
-
skip
code
[
skip
]
=
len
(
code
)
-
skip
def
compile
(
p
,
flags
=
0
):
def
compile
(
p
,
flags
=
0
):
...
@@ -201,11 +201,11 @@ def compile(p, flags=0):
...
@@ -201,11 +201,11 @@ def compile(p, flags=0):
# compile, as necessary
# compile, as necessary
if
type
(
p
)
in
(
type
(
""
),
type
(
u""
)):
if
type
(
p
)
in
(
type
(
""
),
type
(
u""
)):
import
sre_parse
import
sre_parse
pattern
=
p
pattern
=
p
p
=
sre_parse
.
parse
(
p
)
p
=
sre_parse
.
parse
(
p
)
else
:
else
:
pattern
=
None
pattern
=
None
flags
=
p
.
pattern
.
flags
|
flags
flags
=
p
.
pattern
.
flags
|
flags
code
=
[]
code
=
[]
...
@@ -220,10 +220,10 @@ def compile(p, flags=0):
...
@@ -220,10 +220,10 @@ def compile(p, flags=0):
# FIXME: <fl> get rid of this limitation!
# FIXME: <fl> get rid of this limitation!
assert
p
.
pattern
.
groups
<=
100
,
\
assert
p
.
pattern
.
groups
<=
100
,
\
"sorry, but this version only supports 100 named groups"
"sorry, but this version only supports 100 named groups"
return
_sre
.
compile
(
return
_sre
.
compile
(
pattern
,
flags
,
pattern
,
flags
,
array
.
array
(
WORDSIZE
,
code
)
.
tostring
(),
array
.
array
(
WORDSIZE
,
code
)
.
tostring
(),
p
.
pattern
.
groups
-
1
,
p
.
pattern
.
groupdict
p
.
pattern
.
groups
-
1
,
p
.
pattern
.
groupdict
)
)
Lib/sre_parse.py
Dosyayı görüntüle @
90a07913
...
@@ -67,106 +67,106 @@ FLAGS = {
...
@@ -67,106 +67,106 @@ FLAGS = {
class
State
:
class
State
:
def
__init__
(
self
):
def
__init__
(
self
):
self
.
flags
=
0
self
.
flags
=
0
self
.
groups
=
1
self
.
groups
=
1
self
.
groupdict
=
{}
self
.
groupdict
=
{}
def
getgroup
(
self
,
name
=
None
):
def
getgroup
(
self
,
name
=
None
):
gid
=
self
.
groups
gid
=
self
.
groups
self
.
groups
=
gid
+
1
self
.
groups
=
gid
+
1
if
name
:
if
name
:
self
.
groupdict
[
name
]
=
gid
self
.
groupdict
[
name
]
=
gid
return
gid
return
gid
class
SubPattern
:
class
SubPattern
:
# a subpattern, in intermediate form
# a subpattern, in intermediate form
def
__init__
(
self
,
pattern
,
data
=
None
):
def
__init__
(
self
,
pattern
,
data
=
None
):
self
.
pattern
=
pattern
self
.
pattern
=
pattern
if
not
data
:
if
not
data
:
data
=
[]
data
=
[]
self
.
data
=
data
self
.
data
=
data
self
.
width
=
None
self
.
width
=
None
def
__repr__
(
self
):
def
__repr__
(
self
):
return
repr
(
self
.
data
)
return
repr
(
self
.
data
)
def
__len__
(
self
):
def
__len__
(
self
):
return
len
(
self
.
data
)
return
len
(
self
.
data
)
def
__delitem__
(
self
,
index
):
def
__delitem__
(
self
,
index
):
del
self
.
data
[
index
]
del
self
.
data
[
index
]
def
__getitem__
(
self
,
index
):
def
__getitem__
(
self
,
index
):
return
self
.
data
[
index
]
return
self
.
data
[
index
]
def
__setitem__
(
self
,
index
,
code
):
def
__setitem__
(
self
,
index
,
code
):
self
.
data
[
index
]
=
code
self
.
data
[
index
]
=
code
def
__getslice__
(
self
,
start
,
stop
):
def
__getslice__
(
self
,
start
,
stop
):
return
SubPattern
(
self
.
pattern
,
self
.
data
[
start
:
stop
])
return
SubPattern
(
self
.
pattern
,
self
.
data
[
start
:
stop
])
def
insert
(
self
,
index
,
code
):
def
insert
(
self
,
index
,
code
):
self
.
data
.
insert
(
index
,
code
)
self
.
data
.
insert
(
index
,
code
)
def
append
(
self
,
code
):
def
append
(
self
,
code
):
self
.
data
.
append
(
code
)
self
.
data
.
append
(
code
)
def
getwidth
(
self
):
def
getwidth
(
self
):
# determine the width (min, max) for this subpattern
# determine the width (min, max) for this subpattern
if
self
.
width
:
if
self
.
width
:
return
self
.
width
return
self
.
width
lo
=
hi
=
0L
lo
=
hi
=
0L
for
op
,
av
in
self
.
data
:
for
op
,
av
in
self
.
data
:
if
op
is
BRANCH
:
if
op
is
BRANCH
:
l
=
sys
.
maxint
l
=
sys
.
maxint
h
=
0
h
=
0
for
av
in
av
[
1
]:
for
av
in
av
[
1
]:
i
,
j
=
av
.
getwidth
()
i
,
j
=
av
.
getwidth
()
l
=
min
(
l
,
i
)
l
=
min
(
l
,
i
)
h
=
min
(
h
,
j
)
h
=
min
(
h
,
j
)
lo
=
lo
+
i
lo
=
lo
+
i
hi
=
hi
+
j
hi
=
hi
+
j
elif
op
is
CALL
:
elif
op
is
CALL
:
i
,
j
=
av
.
getwidth
()
i
,
j
=
av
.
getwidth
()
lo
=
lo
+
i
lo
=
lo
+
i
hi
=
hi
+
j
hi
=
hi
+
j
elif
op
is
SUBPATTERN
:
elif
op
is
SUBPATTERN
:
i
,
j
=
av
[
1
]
.
getwidth
()
i
,
j
=
av
[
1
]
.
getwidth
()
lo
=
lo
+
i
lo
=
lo
+
i
hi
=
hi
+
j
hi
=
hi
+
j
elif
op
in
(
MIN_REPEAT
,
MAX_REPEAT
):
elif
op
in
(
MIN_REPEAT
,
MAX_REPEAT
):
i
,
j
=
av
[
2
]
.
getwidth
()
i
,
j
=
av
[
2
]
.
getwidth
()
lo
=
lo
+
long
(
i
)
*
av
[
0
]
lo
=
lo
+
long
(
i
)
*
av
[
0
]
hi
=
hi
+
long
(
j
)
*
av
[
1
]
hi
=
hi
+
long
(
j
)
*
av
[
1
]
elif
op
in
(
ANY
,
RANGE
,
IN
,
LITERAL
,
NOT_LITERAL
,
CATEGORY
):
elif
op
in
(
ANY
,
RANGE
,
IN
,
LITERAL
,
NOT_LITERAL
,
CATEGORY
):
lo
=
lo
+
1
lo
=
lo
+
1
hi
=
hi
+
1
hi
=
hi
+
1
elif
op
==
SUCCESS
:
elif
op
==
SUCCESS
:
break
break
self
.
width
=
int
(
min
(
lo
,
sys
.
maxint
)),
int
(
min
(
hi
,
sys
.
maxint
))
self
.
width
=
int
(
min
(
lo
,
sys
.
maxint
)),
int
(
min
(
hi
,
sys
.
maxint
))
return
self
.
width
return
self
.
width
class
Tokenizer
:
class
Tokenizer
:
def
__init__
(
self
,
string
):
def
__init__
(
self
,
string
):
self
.
index
=
0
self
.
index
=
0
self
.
string
=
string
self
.
string
=
string
self
.
next
=
self
.
__next
()
self
.
next
=
self
.
__next
()
def
__next
(
self
):
def
__next
(
self
):
if
self
.
index
>=
len
(
self
.
string
):
if
self
.
index
>=
len
(
self
.
string
):
return
None
return
None
char
=
self
.
string
[
self
.
index
]
char
=
self
.
string
[
self
.
index
]
if
char
[
0
]
==
"
\\
"
:
if
char
[
0
]
==
"
\\
"
:
try
:
try
:
c
=
self
.
string
[
self
.
index
+
1
]
c
=
self
.
string
[
self
.
index
+
1
]
except
IndexError
:
except
IndexError
:
raise
error
,
"bogus escape"
raise
error
,
"bogus escape"
char
=
char
+
c
char
=
char
+
c
self
.
index
=
self
.
index
+
len
(
char
)
self
.
index
=
self
.
index
+
len
(
char
)
return
char
return
char
def
match
(
self
,
char
):
def
match
(
self
,
char
):
if
char
==
self
.
next
:
if
char
==
self
.
next
:
self
.
next
=
self
.
__next
()
self
.
next
=
self
.
__next
()
return
1
return
1
return
0
return
0
def
match_set
(
self
,
set
):
def
match_set
(
self
,
set
):
if
self
.
next
and
self
.
next
in
set
:
if
self
.
next
and
self
.
next
in
set
:
self
.
next
=
self
.
__next
()
self
.
next
=
self
.
__next
()
return
1
return
1
return
0
return
0
def
get
(
self
):
def
get
(
self
):
this
=
self
.
next
this
=
self
.
next
self
.
next
=
self
.
__next
()
self
.
next
=
self
.
__next
()
return
this
return
this
def
isident
(
char
):
def
isident
(
char
):
return
"a"
<=
char
<=
"z"
or
"A"
<=
char
<=
"Z"
or
char
==
"_"
return
"a"
<=
char
<=
"z"
or
"A"
<=
char
<=
"Z"
or
char
==
"_"
...
@@ -180,83 +180,83 @@ def isname(name):
...
@@ -180,83 +180,83 @@ def isname(name):
# expression instead, but I seem to have certain bootstrapping
# expression instead, but I seem to have certain bootstrapping
# problems here ;-)
# problems here ;-)
if
not
isident
(
name
[
0
]):
if
not
isident
(
name
[
0
]):
return
0
return
0
for
char
in
name
:
for
char
in
name
:
if
not
isident
(
char
)
and
not
isdigit
(
char
):
if
not
isident
(
char
)
and
not
isdigit
(
char
):
return
0
return
0
return
1
return
1
def
_group
(
escape
,
groups
):
def
_group
(
escape
,
groups
):
# check if the escape string represents a valid group
# check if the escape string represents a valid group
try
:
try
:
group
=
int
(
escape
[
1
:])
group
=
int
(
escape
[
1
:])
if
group
and
group
<
groups
:
if
group
and
group
<
groups
:
return
group
return
group
except
ValueError
:
except
ValueError
:
pass
pass
return
None
# not a valid group
return
None
# not a valid group
def
_class_escape
(
source
,
escape
):
def
_class_escape
(
source
,
escape
):
# handle escape code inside character class
# handle escape code inside character class
code
=
ESCAPES
.
get
(
escape
)
code
=
ESCAPES
.
get
(
escape
)
if
code
:
if
code
:
return
code
return
code
code
=
CATEGORIES
.
get
(
escape
)
code
=
CATEGORIES
.
get
(
escape
)
if
code
:
if
code
:
return
code
return
code
try
:
try
:
if
escape
[
1
:
2
]
==
"x"
:
if
escape
[
1
:
2
]
==
"x"
:
while
source
.
next
in
HEXDIGITS
:
while
source
.
next
in
HEXDIGITS
:
escape
=
escape
+
source
.
get
()
escape
=
escape
+
source
.
get
()
escape
=
escape
[
2
:]
escape
=
escape
[
2
:]
# FIXME: support unicode characters!
# FIXME: support unicode characters!
return
LITERAL
,
chr
(
int
(
escape
[
-
4
:],
16
)
&
0xff
)
return
LITERAL
,
chr
(
int
(
escape
[
-
4
:],
16
)
&
0xff
)
elif
str
(
escape
[
1
:
2
])
in
OCTDIGITS
:
elif
str
(
escape
[
1
:
2
])
in
OCTDIGITS
:
while
source
.
next
in
OCTDIGITS
:
while
source
.
next
in
OCTDIGITS
:
escape
=
escape
+
source
.
get
()
escape
=
escape
+
source
.
get
()
escape
=
escape
[
1
:]
escape
=
escape
[
1
:]
# FIXME: support unicode characters!
# FIXME: support unicode characters!
return
LITERAL
,
chr
(
int
(
escape
[
-
6
:],
8
)
&
0xff
)
return
LITERAL
,
chr
(
int
(
escape
[
-
6
:],
8
)
&
0xff
)
if
len
(
escape
)
==
2
:
if
len
(
escape
)
==
2
:
return
LITERAL
,
escape
[
1
]
return
LITERAL
,
escape
[
1
]
except
ValueError
:
except
ValueError
:
pass
pass
raise
error
,
"bogus escape:
%
s"
%
repr
(
escape
)
raise
error
,
"bogus escape:
%
s"
%
repr
(
escape
)
def
_escape
(
source
,
escape
,
state
):
def
_escape
(
source
,
escape
,
state
):
# handle escape code in expression
# handle escape code in expression
code
=
CATEGORIES
.
get
(
escape
)
code
=
CATEGORIES
.
get
(
escape
)
if
code
:
if
code
:
return
code
return
code
code
=
ESCAPES
.
get
(
escape
)
code
=
ESCAPES
.
get
(
escape
)
if
code
:
if
code
:
return
code
return
code
try
:
try
:
if
escape
[
1
:
2
]
==
"x"
:
if
escape
[
1
:
2
]
==
"x"
:
while
source
.
next
in
HEXDIGITS
:
while
source
.
next
in
HEXDIGITS
:
escape
=
escape
+
source
.
get
()
escape
=
escape
+
source
.
get
()
escape
=
escape
[
2
:]
escape
=
escape
[
2
:]
# FIXME: support unicode characters!
# FIXME: support unicode characters!
return
LITERAL
,
chr
(
int
(
escape
[
-
4
:],
16
)
&
0xff
)
return
LITERAL
,
chr
(
int
(
escape
[
-
4
:],
16
)
&
0xff
)
elif
escape
[
1
:
2
]
in
DIGITS
:
elif
escape
[
1
:
2
]
in
DIGITS
:
while
1
:
while
1
:
group
=
_group
(
escape
,
state
.
groups
)
group
=
_group
(
escape
,
state
.
groups
)
if
group
:
if
group
:
if
(
not
source
.
next
or
if
(
not
source
.
next
or
not
_group
(
escape
+
source
.
next
,
state
.
groups
)):
not
_group
(
escape
+
source
.
next
,
state
.
groups
)):
return
GROUP
,
group
return
GROUP
,
group
escape
=
escape
+
source
.
get
()
escape
=
escape
+
source
.
get
()
elif
source
.
next
in
OCTDIGITS
:
elif
source
.
next
in
OCTDIGITS
:
escape
=
escape
+
source
.
get
()
escape
=
escape
+
source
.
get
()
else
:
else
:
break
break
escape
=
escape
[
1
:]
escape
=
escape
[
1
:]
# FIXME: support unicode characters!
# FIXME: support unicode characters!
return
LITERAL
,
chr
(
int
(
escape
[
-
6
:],
8
)
&
0xff
)
return
LITERAL
,
chr
(
int
(
escape
[
-
6
:],
8
)
&
0xff
)
if
len
(
escape
)
==
2
:
if
len
(
escape
)
==
2
:
return
LITERAL
,
escape
[
1
]
return
LITERAL
,
escape
[
1
]
except
ValueError
:
except
ValueError
:
pass
pass
raise
error
,
"bogus escape:
%
s"
%
repr
(
escape
)
raise
error
,
"bogus escape:
%
s"
%
repr
(
escape
)
...
@@ -268,35 +268,35 @@ def _branch(pattern, items):
...
@@ -268,35 +268,35 @@ def _branch(pattern, items):
# check if all items share a common prefix
# check if all items share a common prefix
while
1
:
while
1
:
prefix
=
None
prefix
=
None
for
item
in
items
:
for
item
in
items
:
if
not
item
:
if
not
item
:
break
break
if
prefix
is
None
:
if
prefix
is
None
:
prefix
=
item
[
0
]
prefix
=
item
[
0
]
elif
item
[
0
]
!=
prefix
:
elif
item
[
0
]
!=
prefix
:
break
break
else
:
else
:
# all subitems start with a common "prefix".
# all subitems start with a common "prefix".
# move it out of the branch
# move it out of the branch
for
item
in
items
:
for
item
in
items
:
del
item
[
0
]
del
item
[
0
]
subpattern
.
append
(
prefix
)
subpattern
.
append
(
prefix
)
continue
# check next one
continue
# check next one
break
break
# check if the branch can be replaced by a character set
# check if the branch can be replaced by a character set
for
item
in
items
:
for
item
in
items
:
if
len
(
item
)
!=
1
or
item
[
0
][
0
]
!=
LITERAL
:
if
len
(
item
)
!=
1
or
item
[
0
][
0
]
!=
LITERAL
:
break
break
else
:
else
:
# we can store this as a character set instead of a
# we can store this as a character set instead of a
# branch (FIXME: use a range if possible)
# branch (FIXME: use a range if possible)
set
=
[]
set
=
[]
for
item
in
items
:
for
item
in
items
:
set
.
append
(
item
[
0
])
set
.
append
(
item
[
0
])
subpattern
.
append
((
IN
,
set
))
subpattern
.
append
((
IN
,
set
))
return
subpattern
return
subpattern
subpattern
.
append
((
BRANCH
,
(
None
,
items
)))
subpattern
.
append
((
BRANCH
,
(
None
,
items
)))
return
subpattern
return
subpattern
...
@@ -309,197 +309,197 @@ def _parse(source, state, flags=0):
...
@@ -309,197 +309,197 @@ def _parse(source, state, flags=0):
while
1
:
while
1
:
if
source
.
next
in
(
"|"
,
")"
):
if
source
.
next
in
(
"|"
,
")"
):
break
# end of subpattern
break
# end of subpattern
this
=
source
.
get
()
this
=
source
.
get
()
if
this
is
None
:
if
this
is
None
:
break
# end of pattern
break
# end of pattern
if
state
.
flags
&
SRE_FLAG_VERBOSE
:
if
state
.
flags
&
SRE_FLAG_VERBOSE
:
# skip whitespace and comments
# skip whitespace and comments
if
this
in
WHITESPACE
:
if
this
in
WHITESPACE
:
continue
continue
if
this
==
"#"
:
if
this
==
"#"
:
while
1
:
while
1
:
this
=
source
.
get
()
this
=
source
.
get
()
if
this
in
(
None
,
"
\n
"
):
if
this
in
(
None
,
"
\n
"
):
break
break
continue
continue
if
this
and
this
[
0
]
not
in
SPECIAL_CHARS
:
if
this
and
this
[
0
]
not
in
SPECIAL_CHARS
:
subpattern
.
append
((
LITERAL
,
this
))
subpattern
.
append
((
LITERAL
,
this
))
elif
this
==
"["
:
elif
this
==
"["
:
# character set
# character set
set
=
[]
set
=
[]
##
if source.match(":"):
##
if source.match(":"):
##
pass # handle character classes
##
pass # handle character classes
if
source
.
match
(
"^"
):
if
source
.
match
(
"^"
):
set
.
append
((
NEGATE
,
None
))
set
.
append
((
NEGATE
,
None
))
# check remaining characters
# check remaining characters
start
=
set
[:]
start
=
set
[:]
while
1
:
while
1
:
this
=
source
.
get
()
this
=
source
.
get
()
if
this
==
"]"
and
set
!=
start
:
if
this
==
"]"
and
set
!=
start
:
break
break
elif
this
and
this
[
0
]
==
"
\\
"
:
elif
this
and
this
[
0
]
==
"
\\
"
:
code1
=
_class_escape
(
source
,
this
)
code1
=
_class_escape
(
source
,
this
)
elif
this
:
elif
this
:
code1
=
LITERAL
,
this
code1
=
LITERAL
,
this
else
:
else
:
raise
error
,
"unexpected end of regular expression"
raise
error
,
"unexpected end of regular expression"
if
source
.
match
(
"-"
):
if
source
.
match
(
"-"
):
# potential range
# potential range
this
=
source
.
get
()
this
=
source
.
get
()
if
this
==
"]"
:
if
this
==
"]"
:
set
.
append
(
code1
)
set
.
append
(
code1
)
set
.
append
((
LITERAL
,
"-"
))
set
.
append
((
LITERAL
,
"-"
))
break
break
else
:
else
:
if
this
[
0
]
==
"
\\
"
:
if
this
[
0
]
==
"
\\
"
:
code2
=
_class_escape
(
source
,
this
)
code2
=
_class_escape
(
source
,
this
)
else
:
else
:
code2
=
LITERAL
,
this
code2
=
LITERAL
,
this
if
code1
[
0
]
!=
LITERAL
or
code2
[
0
]
!=
LITERAL
:
if
code1
[
0
]
!=
LITERAL
or
code2
[
0
]
!=
LITERAL
:
raise
error
,
"illegal range"
raise
error
,
"illegal range"
if
len
(
code1
[
1
])
!=
1
or
len
(
code2
[
1
])
!=
1
:
if
len
(
code1
[
1
])
!=
1
or
len
(
code2
[
1
])
!=
1
:
raise
error
,
"illegal range"
raise
error
,
"illegal range"
set
.
append
((
RANGE
,
(
code1
[
1
],
code2
[
1
])))
set
.
append
((
RANGE
,
(
code1
[
1
],
code2
[
1
])))
else
:
else
:
if
code1
[
0
]
is
IN
:
if
code1
[
0
]
is
IN
:
code1
=
code1
[
1
][
0
]
code1
=
code1
[
1
][
0
]
set
.
append
(
code1
)
set
.
append
(
code1
)
# FIXME: <fl> move set optimization to compiler!
# FIXME: <fl> move set optimization to compiler!
if
len
(
set
)
==
1
and
set
[
0
][
0
]
is
LITERAL
:
if
len
(
set
)
==
1
and
set
[
0
][
0
]
is
LITERAL
:
subpattern
.
append
(
set
[
0
])
# optimization
subpattern
.
append
(
set
[
0
])
# optimization
elif
len
(
set
)
==
2
and
set
[
0
][
0
]
is
NEGATE
and
set
[
1
][
0
]
is
LITERAL
:
elif
len
(
set
)
==
2
and
set
[
0
][
0
]
is
NEGATE
and
set
[
1
][
0
]
is
LITERAL
:
subpattern
.
append
((
NOT_LITERAL
,
set
[
1
][
1
]))
# optimization
subpattern
.
append
((
NOT_LITERAL
,
set
[
1
][
1
]))
# optimization
else
:
else
:
# FIXME: <fl> add charmap optimization
# FIXME: <fl> add charmap optimization
subpattern
.
append
((
IN
,
set
))
subpattern
.
append
((
IN
,
set
))
elif
this
and
this
[
0
]
in
REPEAT_CHARS
:
elif
this
and
this
[
0
]
in
REPEAT_CHARS
:
# repeat previous item
# repeat previous item
if
this
==
"?"
:
if
this
==
"?"
:
min
,
max
=
0
,
1
min
,
max
=
0
,
1
elif
this
==
"*"
:
elif
this
==
"*"
:
min
,
max
=
0
,
MAXREPEAT
min
,
max
=
0
,
MAXREPEAT
elif
this
==
"+"
:
elif
this
==
"+"
:
min
,
max
=
1
,
MAXREPEAT
min
,
max
=
1
,
MAXREPEAT
elif
this
==
"{"
:
elif
this
==
"{"
:
min
,
max
=
0
,
MAXREPEAT
min
,
max
=
0
,
MAXREPEAT
lo
=
hi
=
""
lo
=
hi
=
""
while
source
.
next
in
DIGITS
:
while
source
.
next
in
DIGITS
:
lo
=
lo
+
source
.
get
()
lo
=
lo
+
source
.
get
()
if
source
.
match
(
","
):
if
source
.
match
(
","
):
while
source
.
next
in
DIGITS
:
while
source
.
next
in
DIGITS
:
hi
=
hi
+
source
.
get
()
hi
=
hi
+
source
.
get
()
else
:
else
:
hi
=
lo
hi
=
lo
if
not
source
.
match
(
"}"
):
if
not
source
.
match
(
"}"
):
raise
error
,
"bogus range"
raise
error
,
"bogus range"
if
lo
:
if
lo
:
min
=
int
(
lo
)
min
=
int
(
lo
)
if
hi
:
if
hi
:
max
=
int
(
hi
)
max
=
int
(
hi
)
# FIXME: <fl> check that hi >= lo!
# FIXME: <fl> check that hi >= lo!
else
:
else
:
raise
error
,
"not supported"
raise
error
,
"not supported"
# figure out which item to repeat
# figure out which item to repeat
if
subpattern
:
if
subpattern
:
item
=
subpattern
[
-
1
:]
item
=
subpattern
[
-
1
:]
else
:
else
:
raise
error
,
"nothing to repeat"
raise
error
,
"nothing to repeat"
if
source
.
match
(
"?"
):
if
source
.
match
(
"?"
):
subpattern
[
-
1
]
=
(
MIN_REPEAT
,
(
min
,
max
,
item
))
subpattern
[
-
1
]
=
(
MIN_REPEAT
,
(
min
,
max
,
item
))
else
:
else
:
subpattern
[
-
1
]
=
(
MAX_REPEAT
,
(
min
,
max
,
item
))
subpattern
[
-
1
]
=
(
MAX_REPEAT
,
(
min
,
max
,
item
))
elif
this
==
"."
:
elif
this
==
"."
:
subpattern
.
append
((
ANY
,
None
))
subpattern
.
append
((
ANY
,
None
))
elif
this
==
"("
:
elif
this
==
"("
:
group
=
1
group
=
1
name
=
None
name
=
None
if
source
.
match
(
"?"
):
if
source
.
match
(
"?"
):
group
=
0
group
=
0
# options
# options
if
source
.
match
(
"P"
):
if
source
.
match
(
"P"
):
# python extensions
# python extensions
if
source
.
match
(
"<"
):
if
source
.
match
(
"<"
):
# named group: skip forward to end of name
# named group: skip forward to end of name
name
=
""
name
=
""
while
1
:
while
1
:
char
=
source
.
get
()
char
=
source
.
get
()
if
char
is
None
:
if
char
is
None
:
raise
error
,
"unterminated name"
raise
error
,
"unterminated name"
if
char
==
">"
:
if
char
==
">"
:
break
break
name
=
name
+
char
name
=
name
+
char
group
=
1
group
=
1
if
not
isname
(
name
):
if
not
isname
(
name
):
raise
error
,
"illegal character in group name"
raise
error
,
"illegal character in group name"
elif
source
.
match
(
"="
):
elif
source
.
match
(
"="
):
# named backreference
# named backreference
raise
error
,
"not yet implemented"
raise
error
,
"not yet implemented"
else
:
else
:
char
=
source
.
get
()
char
=
source
.
get
()
if
char
is
None
:
if
char
is
None
:
raise
error
,
"unexpected end of pattern"
raise
error
,
"unexpected end of pattern"
raise
error
,
"unknown specifier: ?P
%
s"
%
char
raise
error
,
"unknown specifier: ?P
%
s"
%
char
elif
source
.
match
(
":"
):
elif
source
.
match
(
":"
):
# non-capturing group
# non-capturing group
group
=
2
group
=
2
elif
source
.
match
(
"#"
):
elif
source
.
match
(
"#"
):
# comment
# comment
while
1
:
while
1
:
if
source
.
next
is
None
or
source
.
next
==
")"
:
if
source
.
next
is
None
or
source
.
next
==
")"
:
break
break
source
.
get
()
source
.
get
()
else
:
else
:
# flags
# flags
while
FLAGS
.
has_key
(
source
.
next
):
while
FLAGS
.
has_key
(
source
.
next
):
state
.
flags
=
state
.
flags
|
FLAGS
[
source
.
get
()]
state
.
flags
=
state
.
flags
|
FLAGS
[
source
.
get
()]
if
group
:
if
group
:
# parse group contents
# parse group contents
b
=
[]
b
=
[]
if
group
==
2
:
if
group
==
2
:
# anonymous group
# anonymous group
group
=
None
group
=
None
else
:
else
:
group
=
state
.
getgroup
(
name
)
group
=
state
.
getgroup
(
name
)
while
1
:
while
1
:
p
=
_parse
(
source
,
state
,
flags
)
p
=
_parse
(
source
,
state
,
flags
)
if
source
.
match
(
")"
):
if
source
.
match
(
")"
):
if
b
:
if
b
:
b
.
append
(
p
)
b
.
append
(
p
)
p
=
_branch
(
state
,
b
)
p
=
_branch
(
state
,
b
)
subpattern
.
append
((
SUBPATTERN
,
(
group
,
p
)))
subpattern
.
append
((
SUBPATTERN
,
(
group
,
p
)))
break
break
elif
source
.
match
(
"|"
):
elif
source
.
match
(
"|"
):
b
.
append
(
p
)
b
.
append
(
p
)
else
:
else
:
raise
error
,
"group not properly closed"
raise
error
,
"group not properly closed"
else
:
else
:
while
1
:
while
1
:
char
=
source
.
get
()
char
=
source
.
get
()
if
char
is
None
or
char
==
")"
:
if
char
is
None
or
char
==
")"
:
break
break
raise
error
,
"unknown extension"
raise
error
,
"unknown extension"
elif
this
==
"^"
:
elif
this
==
"^"
:
subpattern
.
append
((
AT
,
AT_BEGINNING
))
subpattern
.
append
((
AT
,
AT_BEGINNING
))
elif
this
==
"$"
:
elif
this
==
"$"
:
subpattern
.
append
((
AT
,
AT_END
))
subpattern
.
append
((
AT
,
AT_END
))
elif
this
and
this
[
0
]
==
"
\\
"
:
elif
this
and
this
[
0
]
==
"
\\
"
:
code
=
_escape
(
source
,
this
,
state
)
code
=
_escape
(
source
,
this
,
state
)
subpattern
.
append
(
code
)
subpattern
.
append
(
code
)
else
:
else
:
raise
error
,
"parser error"
raise
error
,
"parser error"
return
subpattern
return
subpattern
...
@@ -509,19 +509,19 @@ def parse(pattern, flags=0):
...
@@ -509,19 +509,19 @@ def parse(pattern, flags=0):
state
=
State
()
state
=
State
()
b
=
[]
b
=
[]
while
1
:
while
1
:
p
=
_parse
(
source
,
state
,
flags
)
p
=
_parse
(
source
,
state
,
flags
)
tail
=
source
.
get
()
tail
=
source
.
get
()
if
tail
==
"|"
:
if
tail
==
"|"
:
b
.
append
(
p
)
b
.
append
(
p
)
elif
tail
==
")"
:
elif
tail
==
")"
:
raise
error
,
"unbalanced parenthesis"
raise
error
,
"unbalanced parenthesis"
elif
tail
is
None
:
elif
tail
is
None
:
if
b
:
if
b
:
b
.
append
(
p
)
b
.
append
(
p
)
p
=
_branch
(
state
,
b
)
p
=
_branch
(
state
,
b
)
break
break
else
:
else
:
raise
error
,
"bogus characters at end of regular expression"
raise
error
,
"bogus characters at end of regular expression"
return
p
return
p
def
parse_template
(
source
,
pattern
):
def
parse_template
(
source
,
pattern
):
...
@@ -531,59 +531,59 @@ def parse_template(source, pattern):
...
@@ -531,59 +531,59 @@ def parse_template(source, pattern):
p
=
[]
p
=
[]
a
=
p
.
append
a
=
p
.
append
while
1
:
while
1
:
this
=
s
.
get
()
this
=
s
.
get
()
if
this
is
None
:
if
this
is
None
:
break
# end of replacement string
break
# end of replacement string
if
this
and
this
[
0
]
==
"
\\
"
:
if
this
and
this
[
0
]
==
"
\\
"
:
# group
# group
if
this
==
"
\\
g"
:
if
this
==
"
\\
g"
:
name
=
""
name
=
""
if
s
.
match
(
"<"
):
if
s
.
match
(
"<"
):
while
1
:
while
1
:
char
=
s
.
get
()
char
=
s
.
get
()
if
char
is
None
:
if
char
is
None
:
raise
error
,
"unterminated group name"
raise
error
,
"unterminated group name"
if
char
==
">"
:
if
char
==
">"
:
break
break
name
=
name
+
char
name
=
name
+
char
if
not
name
:
if
not
name
:
raise
error
,
"bad group name"
raise
error
,
"bad group name"
try
:
try
:
index
=
int
(
name
)
index
=
int
(
name
)
except
ValueError
:
except
ValueError
:
if
not
isname
(
name
):
if
not
isname
(
name
):
raise
error
,
"illegal character in group name"
raise
error
,
"illegal character in group name"
try
:
try
:
index
=
pattern
.
groupindex
[
name
]
index
=
pattern
.
groupindex
[
name
]
except
KeyError
:
except
KeyError
:
raise
IndexError
,
"unknown group name"
raise
IndexError
,
"unknown group name"
a
((
MARK
,
index
))
a
((
MARK
,
index
))
elif
len
(
this
)
>
1
and
this
[
1
]
in
DIGITS
:
elif
len
(
this
)
>
1
and
this
[
1
]
in
DIGITS
:
code
=
None
code
=
None
while
1
:
while
1
:
group
=
_group
(
this
,
pattern
.
groups
+
1
)
group
=
_group
(
this
,
pattern
.
groups
+
1
)
if
group
:
if
group
:
if
(
not
s
.
next
or
if
(
not
s
.
next
or
not
_group
(
this
+
s
.
next
,
pattern
.
groups
+
1
)):
not
_group
(
this
+
s
.
next
,
pattern
.
groups
+
1
)):
code
=
MARK
,
int
(
group
)
code
=
MARK
,
int
(
group
)
break
break
elif
s
.
next
in
OCTDIGITS
:
elif
s
.
next
in
OCTDIGITS
:
this
=
this
+
s
.
get
()
this
=
this
+
s
.
get
()
else
:
else
:
break
break
if
not
code
:
if
not
code
:
this
=
this
[
1
:]
this
=
this
[
1
:]
# FIXME: support unicode characters!
# FIXME: support unicode characters!
code
=
LITERAL
,
chr
(
int
(
this
[
-
6
:],
8
)
&
0xff
)
code
=
LITERAL
,
chr
(
int
(
this
[
-
6
:],
8
)
&
0xff
)
a
(
code
)
a
(
code
)
else
:
else
:
try
:
try
:
a
(
ESCAPES
[
this
])
a
(
ESCAPES
[
this
])
except
KeyError
:
except
KeyError
:
for
c
in
this
:
for
c
in
this
:
a
((
LITERAL
,
c
))
a
((
LITERAL
,
c
))
else
:
else
:
a
((
LITERAL
,
this
))
a
((
LITERAL
,
this
))
return
p
return
p
def
expand_template
(
template
,
match
):
def
expand_template
(
template
,
match
):
...
@@ -592,11 +592,11 @@ def expand_template(template, match):
...
@@ -592,11 +592,11 @@ def expand_template(template, match):
p
=
[]
p
=
[]
a
=
p
.
append
a
=
p
.
append
for
c
,
s
in
template
:
for
c
,
s
in
template
:
if
c
is
LITERAL
:
if
c
is
LITERAL
:
a
(
s
)
a
(
s
)
elif
c
is
MARK
:
elif
c
is
MARK
:
s
=
match
.
group
(
s
)
s
=
match
.
group
(
s
)
if
s
is
None
:
if
s
is
None
:
raise
error
,
"empty group"
raise
error
,
"empty group"
a
(
s
)
a
(
s
)
return
match
.
string
[:
0
]
.
join
(
p
)
return
match
.
string
[:
0
]
.
join
(
p
)
Lib/test/test_sre.py
Dosyayı görüntüle @
90a07913
# FIXME: this is basically test_re.py, with a few
# FIXME: this is basically test_re.py, with a few
minor changes
import
sys
import
sys
sys
.
path
=
[
'.'
]
+
sys
.
path
sys
.
path
=
[
'.'
]
+
sys
.
path
...
@@ -337,7 +337,7 @@ for t in tests:
...
@@ -337,7 +337,7 @@ for t in tests:
print
repr
(
repl
)
+
' should be '
+
repr
(
expected
)
print
repr
(
repl
)
+
' should be '
+
repr
(
expected
)
else
:
else
:
print
'=== Failed incorrectly'
,
t
print
'=== Failed incorrectly'
,
t
continue
continue
# Try the match on a unicode string, and check that it
# Try the match on a unicode string, and check that it
# still succeeds.
# still succeeds.
...
@@ -359,9 +359,9 @@ for t in tests:
...
@@ -359,9 +359,9 @@ for t in tests:
if
pattern
[:
2
]
!=
'
\\
B'
and
pattern
[
-
2
:]
!=
'
\\
B'
:
if
pattern
[:
2
]
!=
'
\\
B'
and
pattern
[
-
2
:]
!=
'
\\
B'
:
obj
=
sre
.
compile
(
pattern
)
obj
=
sre
.
compile
(
pattern
)
result
=
obj
.
search
(
s
,
result
.
start
(
0
),
result
.
end
(
0
)
+
1
)
result
=
obj
.
search
(
s
,
result
.
start
(
0
),
result
.
end
(
0
)
+
1
)
if
result
==
None
:
if
result
==
None
:
print
'=== Failed on range-limited match'
,
t
print
'=== Failed on range-limited match'
,
t
# Try the match with IGNORECASE enabled, and check that it
# Try the match with IGNORECASE enabled, and check that it
# still succeeds.
# still succeeds.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment