Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
436c3d58
Kaydet (Commit)
436c3d58
authored
Haz 29, 2000
tarafından
Fredrik Lundh
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
towards 1.6b1
üst
102f3ad6
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
570 additions
and
338 deletions
+570
-338
sre.py
Lib/sre.py
+17
-6
sre_compile.py
Lib/sre_compile.py
+39
-27
sre_constants.py
Lib/sre_constants.py
+50
-22
sre_parse.py
Lib/sre_parse.py
+78
-35
_sre.c
Modules/_sre.c
+386
-248
No files found.
Lib/sre.py
Dosyayı görüntüle @
436c3d58
...
@@ -12,6 +12,7 @@
...
@@ -12,6 +12,7 @@
#
#
import
sre_compile
import
sre_compile
import
sre_parse
# flags
# flags
I
=
IGNORECASE
=
sre_compile
.
SRE_FLAG_IGNORECASE
I
=
IGNORECASE
=
sre_compile
.
SRE_FLAG_IGNORECASE
...
@@ -20,6 +21,13 @@ M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE
...
@@ -20,6 +21,13 @@ M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE
S
=
DOTALL
=
sre_compile
.
SRE_FLAG_DOTALL
S
=
DOTALL
=
sre_compile
.
SRE_FLAG_DOTALL
X
=
VERBOSE
=
sre_compile
.
SRE_FLAG_VERBOSE
X
=
VERBOSE
=
sre_compile
.
SRE_FLAG_VERBOSE
# sre extensions (may or may not be in 1.6 final)
T
=
TEMPLATE
=
sre_compile
.
SRE_FLAG_TEMPLATE
U
=
UNICODE
=
sre_compile
.
SRE_FLAG_UNICODE
# sre exception
error
=
sre_parse
.
error
# --------------------------------------------------------------------
# --------------------------------------------------------------------
# public interface
# public interface
...
@@ -46,6 +54,9 @@ def findall(pattern, string, maxsplit=0):
...
@@ -46,6 +54,9 @@ def findall(pattern, string, maxsplit=0):
def
compile
(
pattern
,
flags
=
0
):
def
compile
(
pattern
,
flags
=
0
):
return
_compile
(
pattern
,
flags
)
return
_compile
(
pattern
,
flags
)
def
template
(
pattern
,
flags
=
0
):
return
_compile
(
pattern
,
flags
|
T
)
def
escape
(
pattern
):
def
escape
(
pattern
):
s
=
list
(
pattern
)
s
=
list
(
pattern
)
for
i
in
range
(
len
(
pattern
)):
for
i
in
range
(
len
(
pattern
)):
...
@@ -83,18 +94,14 @@ def _sub(pattern, template, string, count=0):
...
@@ -83,18 +94,14 @@ def _sub(pattern, template, string, count=0):
# internal: pattern.sub implementation hook
# internal: pattern.sub implementation hook
return
_subn
(
pattern
,
template
,
string
,
count
)[
0
]
return
_subn
(
pattern
,
template
,
string
,
count
)[
0
]
def
_expand
(
match
,
template
):
# internal: expand template
return
template
# FIXME
def
_subn
(
pattern
,
template
,
string
,
count
=
0
):
def
_subn
(
pattern
,
template
,
string
,
count
=
0
):
# internal: pattern.subn implementation hook
# internal: pattern.subn implementation hook
if
callable
(
template
):
if
callable
(
template
):
filter
=
template
filter
=
template
else
:
else
:
# FIXME: prepare template
template
=
sre_parse
.
parse_template
(
template
,
pattern
)
def
filter
(
match
,
template
=
template
):
def
filter
(
match
,
template
=
template
):
return
_expand
(
match
,
template
)
return
sre_parse
.
expand_template
(
template
,
match
)
n
=
i
=
0
n
=
i
=
0
s
=
[]
s
=
[]
append
=
s
.
append
append
=
s
.
append
...
@@ -108,6 +115,8 @@ def _subn(pattern, template, string, count=0):
...
@@ -108,6 +115,8 @@ def _subn(pattern, template, string, count=0):
append
(
string
[
i
:
j
])
append
(
string
[
i
:
j
])
append
(
filter
(
m
))
append
(
filter
(
m
))
i
=
m
.
end
()
i
=
m
.
end
()
if
i
<=
j
:
break
n
=
n
+
1
n
=
n
+
1
if
i
<
len
(
string
):
if
i
<
len
(
string
):
append
(
string
[
i
:])
append
(
string
[
i
:])
...
@@ -126,6 +135,8 @@ def _split(pattern, string, maxsplit=0):
...
@@ -126,6 +135,8 @@ def _split(pattern, string, maxsplit=0):
j
=
m
.
start
()
j
=
m
.
start
()
append
(
string
[
i
:
j
])
append
(
string
[
i
:
j
])
i
=
m
.
end
()
i
=
m
.
end
()
if
i
<=
j
:
break
n
=
n
+
1
n
=
n
+
1
if
i
<
len
(
string
):
if
i
<
len
(
string
):
append
(
string
[
i
:])
append
(
string
[
i
:])
...
...
Lib/sre_compile.py
Dosyayı görüntüle @
436c3d58
...
@@ -48,7 +48,7 @@ class Code:
...
@@ -48,7 +48,7 @@ class Code:
print
self
.
data
print
self
.
data
raise
raise
def
_compile
(
code
,
pattern
,
flags
,
level
=
0
):
def
_compile
(
code
,
pattern
,
flags
):
append
=
code
.
append
append
=
code
.
append
for
op
,
av
in
pattern
:
for
op
,
av
in
pattern
:
if
op
is
ANY
:
if
op
is
ANY
:
...
@@ -70,23 +70,26 @@ def _compile(code, pattern, flags, level=0):
...
@@ -70,23 +70,26 @@ def _compile(code, pattern, flags, level=0):
tail
=
[]
tail
=
[]
for
av
in
av
[
1
]:
for
av
in
av
[
1
]:
skip
=
len
(
code
);
append
(
0
)
skip
=
len
(
code
);
append
(
0
)
_compile
(
code
,
av
,
flags
,
level
)
_compile
(
code
,
av
,
flags
)
append
(
OPCODES
[
JUMP
])
## append(OPCODES[SUCCESS])
tail
.
append
(
len
(
code
));
append
(
0
)
append
(
OPCODES
[
JUMP
])
tail
.
append
(
len
(
code
));
append
(
0
)
code
[
skip
]
=
len
(
code
)
-
skip
code
[
skip
]
=
len
(
code
)
-
skip
append
(
0
)
# end of branch
append
(
0
)
# end of branch
for
tail
in
tail
:
for
tail
in
tail
:
code
[
tail
]
=
len
(
code
)
-
tail
code
[
tail
]
=
len
(
code
)
-
tail
elif
op
is
CALL
:
elif
op
is
CALL
:
append
(
OPCODES
[
op
])
append
(
OPCODES
[
op
])
skip
=
len
(
code
);
append
(
0
)
skip
=
len
(
code
);
append
(
0
)
_compile
(
code
,
av
,
flags
,
level
+
1
)
_compile
(
code
,
av
,
flags
)
append
(
OPCODES
[
SUCCESS
])
append
(
OPCODES
[
SUCCESS
])
code
[
skip
]
=
len
(
code
)
-
skip
code
[
skip
]
=
len
(
code
)
-
skip
elif
op
is
CATEGORY
:
# not used by current parser
elif
op
is
CATEGORY
:
append
(
OPCODES
[
op
])
append
(
OPCODES
[
op
])
if
flags
&
SRE_FLAG_LOCALE
:
if
flags
&
SRE_FLAG_LOCALE
:
append
(
CH_LOCALE
[
CHCODES
[
av
]])
append
(
CH_LOCALE
[
CHCODES
[
av
]])
elif
flags
&
SRE_FLAG_UNICODE
:
append
(
CH_UNICODE
[
CHCODES
[
av
]])
else
:
else
:
append
(
CHCODES
[
av
])
append
(
CHCODES
[
av
])
elif
op
is
GROUP
:
elif
op
is
GROUP
:
...
@@ -98,8 +101,8 @@ def _compile(code, pattern, flags, level=0):
...
@@ -98,8 +101,8 @@ def _compile(code, pattern, flags, level=0):
elif
op
is
IN
:
elif
op
is
IN
:
if
flags
&
SRE_FLAG_IGNORECASE
:
if
flags
&
SRE_FLAG_IGNORECASE
:
append
(
OPCODES
[
OP_IGNORE
[
op
]])
append
(
OPCODES
[
OP_IGNORE
[
op
]])
def
fixup
(
literal
):
def
fixup
(
literal
,
flags
=
flags
):
return
ord
(
literal
.
lower
()
)
return
_sre
.
getlower
(
ord
(
literal
),
flags
)
else
:
else
:
append
(
OPCODES
[
op
])
append
(
OPCODES
[
op
])
fixup
=
ord
fixup
=
ord
...
@@ -116,6 +119,8 @@ def _compile(code, pattern, flags, level=0):
...
@@ -116,6 +119,8 @@ def _compile(code, pattern, flags, level=0):
elif
op
is
CATEGORY
:
elif
op
is
CATEGORY
:
if
flags
&
SRE_FLAG_LOCALE
:
if
flags
&
SRE_FLAG_LOCALE
:
append
(
CH_LOCALE
[
CHCODES
[
av
]])
append
(
CH_LOCALE
[
CHCODES
[
av
]])
elif
flags
&
SRE_FLAG_UNICODE
:
append
(
CH_UNICODE
[
CHCODES
[
av
]])
else
:
else
:
append
(
CHCODES
[
av
])
append
(
CHCODES
[
av
])
else
:
else
:
...
@@ -125,42 +130,49 @@ def _compile(code, pattern, flags, level=0):
...
@@ -125,42 +130,49 @@ def _compile(code, pattern, flags, level=0):
elif
op
in
(
LITERAL
,
NOT_LITERAL
):
elif
op
in
(
LITERAL
,
NOT_LITERAL
):
if
flags
&
SRE_FLAG_IGNORECASE
:
if
flags
&
SRE_FLAG_IGNORECASE
:
append
(
OPCODES
[
OP_IGNORE
[
op
]])
append
(
OPCODES
[
OP_IGNORE
[
op
]])
append
(
ord
(
av
.
lower
()))
else
:
else
:
append
(
OPCODES
[
op
])
append
(
OPCODES
[
op
])
append
(
ord
(
av
))
append
(
ord
(
av
))
elif
op
is
MARK
:
elif
op
is
MARK
:
append
(
OPCODES
[
op
])
append
(
OPCODES
[
op
])
append
(
av
)
append
(
av
)
elif
op
in
(
REPEAT
,
MIN_REPEAT
,
MAX_REPEAT
):
elif
op
in
(
REPEAT
,
MIN_REPEAT
,
MAX_REPEAT
):
lo
,
hi
=
av
[
2
]
.
getwidth
()
if
flags
&
SRE_FLAG_TEMPLATE
:
if
lo
==
0
:
append
(
OPCODES
[
REPEAT
])
raise
SyntaxError
,
"cannot repeat zero-width items"
if
lo
==
hi
==
1
and
op
is
MAX_REPEAT
:
append
(
OPCODES
[
MAX_REPEAT_ONE
])
skip
=
len
(
code
);
append
(
0
)
skip
=
len
(
code
);
append
(
0
)
append
(
av
[
0
])
append
(
av
[
0
])
append
(
av
[
1
])
append
(
av
[
1
])
_compile
(
code
,
av
[
2
],
flags
,
level
+
1
)
_compile
(
code
,
av
[
2
],
flags
)
append
(
OPCODES
[
SUCCESS
])
append
(
OPCODES
[
SUCCESS
])
code
[
skip
]
=
len
(
code
)
-
skip
code
[
skip
]
=
len
(
code
)
-
skip
else
:
else
:
append
(
OPCODES
[
op
])
lo
,
hi
=
av
[
2
]
.
getwidth
()
skip
=
len
(
code
);
append
(
0
)
if
lo
==
0
:
append
(
av
[
0
])
raise
error
,
"nothing to repeat"
append
(
av
[
1
])
if
0
and
lo
==
hi
==
1
and
op
is
MAX_REPEAT
:
_compile
(
code
,
av
[
2
],
flags
,
level
+
1
)
# FIXME: <fl> need a better way to figure out when
if
op
is
MIN_REPEAT
:
# it's safe to use this one (in the parser, probably)
append
(
OPCODES
[
MIN_UNTIL
])
append
(
OPCODES
[
MAX_REPEAT_ONE
])
skip
=
len
(
code
);
append
(
0
)
append
(
av
[
0
])
append
(
av
[
1
])
_compile
(
code
,
av
[
2
],
flags
)
append
(
OPCODES
[
SUCCESS
])
code
[
skip
]
=
len
(
code
)
-
skip
else
:
else
:
append
(
OPCODES
[
MAX_UNTIL
])
append
(
OPCODES
[
op
])
code
[
skip
]
=
len
(
code
)
-
skip
skip
=
len
(
code
);
append
(
0
)
append
(
av
[
0
])
append
(
av
[
1
])
_compile
(
code
,
av
[
2
],
flags
)
append
(
OPCODES
[
SUCCESS
])
code
[
skip
]
=
len
(
code
)
-
skip
elif
op
is
SUBPATTERN
:
elif
op
is
SUBPATTERN
:
group
=
av
[
0
]
group
=
av
[
0
]
if
group
:
if
group
:
append
(
OPCODES
[
MARK
])
append
(
OPCODES
[
MARK
])
append
((
group
-
1
)
*
2
)
append
((
group
-
1
)
*
2
)
_compile
(
code
,
av
[
1
],
flags
,
level
+
1
)
_compile
(
code
,
av
[
1
],
flags
)
if
group
:
if
group
:
append
(
OPCODES
[
MARK
])
append
(
OPCODES
[
MARK
])
append
((
group
-
1
)
*
2
+
1
)
append
((
group
-
1
)
*
2
+
1
)
...
...
Lib/sre_constants.py
Dosyayı görüntüle @
436c3d58
...
@@ -15,6 +15,11 @@
...
@@ -15,6 +15,11 @@
# other compatibility work.
# other compatibility work.
#
#
# should this really be here?
class
error
(
Exception
):
pass
# operators
# operators
FAILURE
=
"failure"
FAILURE
=
"failure"
...
@@ -30,20 +35,20 @@ GROUP = "group"
...
@@ -30,20 +35,20 @@ GROUP = "group"
GROUP_IGNORE
=
"group_ignore"
GROUP_IGNORE
=
"group_ignore"
IN
=
"in"
IN
=
"in"
IN_IGNORE
=
"in_ignore"
IN_IGNORE
=
"in_ignore"
INFO
=
"info"
JUMP
=
"jump"
JUMP
=
"jump"
LITERAL
=
"literal"
LITERAL
=
"literal"
LITERAL_IGNORE
=
"literal_ignore"
LITERAL_IGNORE
=
"literal_ignore"
MARK
=
"mark"
MARK
=
"mark"
MAX_REPEAT
=
"max_repeat"
MAX_REPEAT
=
"max_repeat"
MAX_REPEAT_ONE
=
"max_repeat_one"
MAX_REPEAT_ONE
=
"max_repeat_one"
MAX_UNTIL
=
"max_until"
MIN_REPEAT
=
"min_repeat"
MIN_REPEAT
=
"min_repeat"
MIN_UNTIL
=
"min_until"
NEGATE
=
"negate"
NEGATE
=
"negate"
NOT_LITERAL
=
"not_literal"
NOT_LITERAL
=
"not_literal"
NOT_LITERAL_IGNORE
=
"not_literal_ignore"
NOT_LITERAL_IGNORE
=
"not_literal_ignore"
RANGE
=
"range"
RANGE
=
"range"
REPEAT
=
"repeat"
REPEAT
=
"repeat"
REPEAT_ONE
=
"repeat_one"
SUBPATTERN
=
"subpattern"
SUBPATTERN
=
"subpattern"
# positions
# positions
...
@@ -63,14 +68,16 @@ CATEGORY_WORD = "category_word"
...
@@ -63,14 +68,16 @@ CATEGORY_WORD = "category_word"
CATEGORY_NOT_WORD
=
"category_not_word"
CATEGORY_NOT_WORD
=
"category_not_word"
CATEGORY_LINEBREAK
=
"category_linebreak"
CATEGORY_LINEBREAK
=
"category_linebreak"
CATEGORY_NOT_LINEBREAK
=
"category_not_linebreak"
CATEGORY_NOT_LINEBREAK
=
"category_not_linebreak"
CATEGORY_LOC_DIGIT
=
"category_loc_digit"
CATEGORY_LOC_NOT_DIGIT
=
"category_loc_not_digit"
CATEGORY_LOC_SPACE
=
"category_loc_space"
CATEGORY_LOC_NOT_SPACE
=
"category_loc_not_space"
CATEGORY_LOC_WORD
=
"category_loc_word"
CATEGORY_LOC_WORD
=
"category_loc_word"
CATEGORY_LOC_NOT_WORD
=
"category_loc_not_word"
CATEGORY_LOC_NOT_WORD
=
"category_loc_not_word"
CATEGORY_LOC_LINEBREAK
=
"category_loc_linebreak"
CATEGORY_UNI_DIGIT
=
"category_uni_digit"
CATEGORY_LOC_NOT_LINEBREAK
=
"category_loc_not_linebreak"
CATEGORY_UNI_NOT_DIGIT
=
"category_uni_not_digit"
CATEGORY_UNI_SPACE
=
"category_uni_space"
CATEGORY_UNI_NOT_SPACE
=
"category_uni_not_space"
CATEGORY_UNI_WORD
=
"category_uni_word"
CATEGORY_UNI_NOT_WORD
=
"category_uni_not_word"
CATEGORY_UNI_LINEBREAK
=
"category_uni_linebreak"
CATEGORY_UNI_NOT_LINEBREAK
=
"category_uni_not_linebreak"
OPCODES
=
[
OPCODES
=
[
...
@@ -85,12 +92,13 @@ OPCODES = [
...
@@ -85,12 +92,13 @@ OPCODES = [
CATEGORY
,
CATEGORY
,
GROUP
,
GROUP_IGNORE
,
GROUP
,
GROUP_IGNORE
,
IN
,
IN_IGNORE
,
IN
,
IN_IGNORE
,
INFO
,
JUMP
,
JUMP
,
LITERAL
,
LITERAL_IGNORE
,
LITERAL
,
LITERAL_IGNORE
,
MARK
,
MARK
,
MAX_REPEAT
,
MAX_UNTIL
,
MAX_REPEAT
,
MAX_REPEAT_ONE
,
MAX_REPEAT_ONE
,
MIN_REPEAT
,
MIN_UNTIL
,
MIN_REPEAT
,
NOT_LITERAL
,
NOT_LITERAL_IGNORE
,
NOT_LITERAL
,
NOT_LITERAL_IGNORE
,
NEGATE
,
NEGATE
,
RANGE
,
RANGE
,
...
@@ -106,10 +114,11 @@ ATCODES = [
...
@@ -106,10 +114,11 @@ ATCODES = [
CHCODES
=
[
CHCODES
=
[
CATEGORY_DIGIT
,
CATEGORY_NOT_DIGIT
,
CATEGORY_SPACE
,
CATEGORY_DIGIT
,
CATEGORY_NOT_DIGIT
,
CATEGORY_SPACE
,
CATEGORY_NOT_SPACE
,
CATEGORY_WORD
,
CATEGORY_NOT_WORD
,
CATEGORY_NOT_SPACE
,
CATEGORY_WORD
,
CATEGORY_NOT_WORD
,
CATEGORY_LINEBREAK
,
CATEGORY_NOT_LINEBREAK
,
CATEGORY_LOC_DIGIT
,
CATEGORY_LINEBREAK
,
CATEGORY_NOT_LINEBREAK
,
CATEGORY_LOC_WORD
,
CATEGORY_LOC_NOT_DIGIT
,
CATEGORY_LOC_SPACE
,
CATEGORY_LOC_NOT_WORD
,
CATEGORY_UNI_DIGIT
,
CATEGORY_UNI_NOT_DIGIT
,
CATEGORY_LOC_NOT_SPACE
,
CATEGORY_LOC_WORD
,
CATEGORY_LOC_NOT_WORD
,
CATEGORY_UNI_SPACE
,
CATEGORY_UNI_NOT_SPACE
,
CATEGORY_UNI_WORD
,
CATEGORY_LOC_LINEBREAK
,
CATEGORY_LOC_NOT_LINEBREAK
CATEGORY_UNI_NOT_WORD
,
CATEGORY_UNI_LINEBREAK
,
CATEGORY_UNI_NOT_LINEBREAK
]
]
def
makedict
(
list
):
def
makedict
(
list
):
...
@@ -138,23 +147,35 @@ AT_MULTILINE = {
...
@@ -138,23 +147,35 @@ AT_MULTILINE = {
}
}
CH_LOCALE
=
{
CH_LOCALE
=
{
CATEGORY_DIGIT
:
CATEGORY_
LOC_
DIGIT
,
CATEGORY_DIGIT
:
CATEGORY_DIGIT
,
CATEGORY_NOT_DIGIT
:
CATEGORY_
LOC_
NOT_DIGIT
,
CATEGORY_NOT_DIGIT
:
CATEGORY_NOT_DIGIT
,
CATEGORY_SPACE
:
CATEGORY_
LOC_
SPACE
,
CATEGORY_SPACE
:
CATEGORY_SPACE
,
CATEGORY_NOT_SPACE
:
CATEGORY_
LOC_
NOT_SPACE
,
CATEGORY_NOT_SPACE
:
CATEGORY_NOT_SPACE
,
CATEGORY_WORD
:
CATEGORY_LOC_WORD
,
CATEGORY_WORD
:
CATEGORY_LOC_WORD
,
CATEGORY_NOT_WORD
:
CATEGORY_LOC_NOT_WORD
,
CATEGORY_NOT_WORD
:
CATEGORY_LOC_NOT_WORD
,
CATEGORY_LINEBREAK
:
CATEGORY_LOC_LINEBREAK
,
CATEGORY_LINEBREAK
:
CATEGORY_LINEBREAK
,
CATEGORY_NOT_LINEBREAK
:
CATEGORY_LOC_NOT_LINEBREAK
CATEGORY_NOT_LINEBREAK
:
CATEGORY_NOT_LINEBREAK
}
CH_UNICODE
=
{
CATEGORY_DIGIT
:
CATEGORY_UNI_DIGIT
,
CATEGORY_NOT_DIGIT
:
CATEGORY_UNI_NOT_DIGIT
,
CATEGORY_SPACE
:
CATEGORY_UNI_SPACE
,
CATEGORY_NOT_SPACE
:
CATEGORY_UNI_NOT_SPACE
,
CATEGORY_WORD
:
CATEGORY_UNI_WORD
,
CATEGORY_NOT_WORD
:
CATEGORY_UNI_NOT_WORD
,
CATEGORY_LINEBREAK
:
CATEGORY_UNI_LINEBREAK
,
CATEGORY_NOT_LINEBREAK
:
CATEGORY_UNI_NOT_LINEBREAK
}
}
# flags
# flags
SRE_FLAG_TEMPLATE
=
1
# NYI
SRE_FLAG_TEMPLATE
=
1
SRE_FLAG_IGNORECASE
=
2
SRE_FLAG_IGNORECASE
=
2
SRE_FLAG_LOCALE
=
4
SRE_FLAG_LOCALE
=
4
SRE_FLAG_MULTILINE
=
8
SRE_FLAG_MULTILINE
=
8
SRE_FLAG_DOTALL
=
16
SRE_FLAG_DOTALL
=
16
SRE_FLAG_VERBOSE
=
32
SRE_FLAG_UNICODE
=
32
SRE_FLAG_VERBOSE
=
64
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
import
string
import
string
...
@@ -168,5 +189,12 @@ if __name__ == "__main__":
...
@@ -168,5 +189,12 @@ if __name__ == "__main__":
dump
(
f
,
OPCODES
,
"SRE_OP"
)
dump
(
f
,
OPCODES
,
"SRE_OP"
)
dump
(
f
,
ATCODES
,
"SRE"
)
dump
(
f
,
ATCODES
,
"SRE"
)
dump
(
f
,
CHCODES
,
"SRE"
)
dump
(
f
,
CHCODES
,
"SRE"
)
f
.
write
(
"#define SRE_FLAG_TEMPLATE
%
d
\n
"
%
SRE_FLAG_TEMPLATE
)
f
.
write
(
"#define SRE_FLAG_IGNORECASE
%
d
\n
"
%
SRE_FLAG_IGNORECASE
)
f
.
write
(
"#define SRE_FLAG_LOCALE
%
d
\n
"
%
SRE_FLAG_LOCALE
)
f
.
write
(
"#define SRE_FLAG_MULTILINE
%
d
\n
"
%
SRE_FLAG_MULTILINE
)
f
.
write
(
"#define SRE_FLAG_DOTALL
%
d
\n
"
%
SRE_FLAG_DOTALL
)
f
.
write
(
"#define SRE_FLAG_UNICODE
%
d
\n
"
%
SRE_FLAG_UNICODE
)
f
.
write
(
"#define SRE_FLAG_VERBOSE
%
d
\n
"
%
SRE_FLAG_VERBOSE
)
f
.
close
()
f
.
close
()
print
"done"
print
"done"
Lib/sre_parse.py
Dosyayı görüntüle @
436c3d58
...
@@ -20,14 +20,15 @@ import _sre
...
@@ -20,14 +20,15 @@ import _sre
from
sre_constants
import
*
from
sre_constants
import
*
# FIXME:
should be 65535, but the array module currently chokes on
# FIXME:
<fl> should be 65535, but the array module currently chokes
#
unsigned integers larger than 32767...
#
on unsigned integers larger than 32767 [fixed in 1.6b1?]
MAXREPEAT
=
int
(
2L
**
(
_sre
.
getcodesize
()
*
8
-
1
))
-
1
MAXREPEAT
=
int
(
2L
**
(
_sre
.
getcodesize
()
*
8
-
1
))
-
1
SPECIAL_CHARS
=
".
\\
[{()*+?^$|"
SPECIAL_CHARS
=
".
\\
[{()*+?^$|"
REPEAT_CHARS
=
"*+?{"
REPEAT_CHARS
=
"*+?{"
# FIXME: string in tuple tests may explode with if char is unicode :-(
# FIXME: <fl> string in tuple tests may explode with if char is
# unicode [fixed in 1.6b1?]
DIGITS
=
tuple
(
string
.
digits
)
DIGITS
=
tuple
(
string
.
digits
)
OCTDIGITS
=
tuple
(
"01234567"
)
OCTDIGITS
=
tuple
(
"01234567"
)
...
@@ -59,12 +60,15 @@ CATEGORIES = {
...
@@ -59,12 +60,15 @@ CATEGORIES = {
}
}
FLAGS
=
{
FLAGS
=
{
# standard flags
"i"
:
SRE_FLAG_IGNORECASE
,
"i"
:
SRE_FLAG_IGNORECASE
,
"L"
:
SRE_FLAG_LOCALE
,
"L"
:
SRE_FLAG_LOCALE
,
"m"
:
SRE_FLAG_MULTILINE
,
"m"
:
SRE_FLAG_MULTILINE
,
"s"
:
SRE_FLAG_DOTALL
,
"s"
:
SRE_FLAG_DOTALL
,
"t"
:
SRE_FLAG_TEMPLATE
,
"x"
:
SRE_FLAG_VERBOSE
,
"x"
:
SRE_FLAG_VERBOSE
,
# extensions
"t"
:
SRE_FLAG_TEMPLATE
,
"u"
:
SRE_FLAG_UNICODE
,
}
}
class
State
:
class
State
:
...
@@ -151,7 +155,7 @@ class Tokenizer:
...
@@ -151,7 +155,7 @@ class Tokenizer:
try
:
try
:
c
=
self
.
string
[
self
.
index
+
1
]
c
=
self
.
string
[
self
.
index
+
1
]
except
IndexError
:
except
IndexError
:
raise
SyntaxE
rror
,
"bogus escape"
raise
e
rror
,
"bogus escape"
char
=
char
+
c
char
=
char
+
c
self
.
index
=
self
.
index
+
len
(
char
)
self
.
index
=
self
.
index
+
len
(
char
)
return
char
return
char
...
@@ -205,7 +209,7 @@ def _class_escape(source, escape):
...
@@ -205,7 +209,7 @@ def _class_escape(source, escape):
return
LITERAL
,
escape
[
1
]
return
LITERAL
,
escape
[
1
]
except
ValueError
:
except
ValueError
:
pass
pass
raise
SyntaxE
rror
,
"bogus escape:
%
s"
%
repr
(
escape
)
raise
e
rror
,
"bogus escape:
%
s"
%
repr
(
escape
)
def
_escape
(
source
,
escape
,
state
):
def
_escape
(
source
,
escape
,
state
):
# handle escape code in expression
# handle escape code in expression
...
@@ -241,13 +245,12 @@ def _escape(source, escape, state):
...
@@ -241,13 +245,12 @@ def _escape(source, escape, state):
return
LITERAL
,
escape
[
1
]
return
LITERAL
,
escape
[
1
]
except
ValueError
:
except
ValueError
:
pass
pass
raise
SyntaxE
rror
,
"bogus escape:
%
s"
%
repr
(
escape
)
raise
e
rror
,
"bogus escape:
%
s"
%
repr
(
escape
)
def
_branch
(
pattern
,
items
):
def
_branch
(
pattern
,
items
):
# form a branch operator from a set of items (FIXME: move this
# form a branch operator from a set of items
# optimization to the compiler module!)
subpattern
=
SubPattern
(
pattern
)
subpattern
=
SubPattern
(
pattern
)
...
@@ -332,7 +335,7 @@ def _parse(source, state, flags=0):
...
@@ -332,7 +335,7 @@ def _parse(source, state, flags=0):
elif
this
:
elif
this
:
code1
=
LITERAL
,
this
code1
=
LITERAL
,
this
else
:
else
:
raise
SyntaxE
rror
,
"unexpected end of regular expression"
raise
e
rror
,
"unexpected end of regular expression"
if
source
.
match
(
"-"
):
if
source
.
match
(
"-"
):
# potential range
# potential range
this
=
source
.
get
()
this
=
source
.
get
()
...
@@ -346,9 +349,9 @@ def _parse(source, state, flags=0):
...
@@ -346,9 +349,9 @@ def _parse(source, state, flags=0):
else
:
else
:
code2
=
LITERAL
,
this
code2
=
LITERAL
,
this
if
code1
[
0
]
!=
LITERAL
or
code2
[
0
]
!=
LITERAL
:
if
code1
[
0
]
!=
LITERAL
or
code2
[
0
]
!=
LITERAL
:
raise
SyntaxE
rror
,
"illegal range"
raise
e
rror
,
"illegal range"
if
len
(
code1
[
1
])
!=
1
or
len
(
code2
[
1
])
!=
1
:
if
len
(
code1
[
1
])
!=
1
or
len
(
code2
[
1
])
!=
1
:
raise
SyntaxE
rror
,
"illegal range"
raise
e
rror
,
"illegal range"
set
.
append
((
RANGE
,
(
code1
[
1
],
code2
[
1
])))
set
.
append
((
RANGE
,
(
code1
[
1
],
code2
[
1
])))
else
:
else
:
if
code1
[
0
]
is
IN
:
if
code1
[
0
]
is
IN
:
...
@@ -383,19 +386,19 @@ def _parse(source, state, flags=0):
...
@@ -383,19 +386,19 @@ def _parse(source, state, flags=0):
else
:
else
:
hi
=
lo
hi
=
lo
if
not
source
.
match
(
"}"
):
if
not
source
.
match
(
"}"
):
raise
SyntaxE
rror
,
"bogus range"
raise
e
rror
,
"bogus range"
if
lo
:
if
lo
:
min
=
int
(
lo
)
min
=
int
(
lo
)
if
hi
:
if
hi
:
max
=
int
(
hi
)
max
=
int
(
hi
)
# FIXME: <fl> check that hi >= lo!
# FIXME: <fl> check that hi >= lo!
else
:
else
:
raise
SyntaxE
rror
,
"not supported"
raise
e
rror
,
"not supported"
# figure out which item to repeat
# figure out which item to repeat
if
subpattern
:
if
subpattern
:
item
=
subpattern
[
-
1
:]
item
=
subpattern
[
-
1
:]
else
:
else
:
raise
SyntaxE
rror
,
"nothing to repeat"
raise
e
rror
,
"nothing to repeat"
if
source
.
match
(
"?"
):
if
source
.
match
(
"?"
):
subpattern
[
-
1
]
=
(
MIN_REPEAT
,
(
min
,
max
,
item
))
subpattern
[
-
1
]
=
(
MIN_REPEAT
,
(
min
,
max
,
item
))
else
:
else
:
...
@@ -418,7 +421,7 @@ def _parse(source, state, flags=0):
...
@@ -418,7 +421,7 @@ def _parse(source, state, flags=0):
while
1
:
while
1
:
char
=
source
.
get
()
char
=
source
.
get
()
if
char
is
None
:
if
char
is
None
:
raise
SyntaxE
rror
,
"unterminated name"
raise
e
rror
,
"unterminated name"
if
char
==
">"
:
if
char
==
">"
:
break
break
# FIXME: check for valid character
# FIXME: check for valid character
...
@@ -426,22 +429,21 @@ def _parse(source, state, flags=0):
...
@@ -426,22 +429,21 @@ def _parse(source, state, flags=0):
group
=
1
group
=
1
elif
source
.
match
(
"="
):
elif
source
.
match
(
"="
):
# named backreference
# named backreference
raise
SyntaxError
,
"not yet implemented"
raise
error
,
"not yet implemented"
else
:
else
:
char
=
source
.
get
()
char
=
source
.
get
()
if
char
is
None
:
if
char
is
None
:
raise
SyntaxE
rror
,
"unexpected end of pattern"
raise
e
rror
,
"unexpected end of pattern"
raise
SyntaxE
rror
,
"unknown specifier: ?P
%
s"
%
char
raise
e
rror
,
"unknown specifier: ?P
%
s"
%
char
elif
source
.
match
(
":"
):
elif
source
.
match
(
":"
):
# non-capturing group
# non-capturing group
group
=
2
group
=
2
elif
source
.
match
(
"#"
):
elif
source
.
match
(
"#"
):
# comment
# comment
while
1
:
while
1
:
char
=
source
.
get
()
if
source
.
next
is
None
or
source
.
next
==
")"
:
if
char
is
None
or
char
==
")"
:
break
break
source
.
get
()
else
:
else
:
# flags
# flags
while
FLAGS
.
has_key
(
source
.
next
):
while
FLAGS
.
has_key
(
source
.
next
):
...
@@ -465,13 +467,13 @@ def _parse(source, state, flags=0):
...
@@ -465,13 +467,13 @@ def _parse(source, state, flags=0):
elif
source
.
match
(
"|"
):
elif
source
.
match
(
"|"
):
b
.
append
(
p
)
b
.
append
(
p
)
else
:
else
:
raise
SyntaxE
rror
,
"group not properly closed"
raise
e
rror
,
"group not properly closed"
else
:
else
:
while
1
:
while
1
:
char
=
source
.
get
()
char
=
source
.
get
()
if
char
is
None
or
char
==
")"
:
if
char
is
None
or
char
==
")"
:
break
break
# FIXME: skip characters?
raise
error
,
"unknown extension"
elif
this
==
"^"
:
elif
this
==
"^"
:
subpattern
.
append
((
AT
,
AT_BEGINNING
))
subpattern
.
append
((
AT
,
AT_BEGINNING
))
...
@@ -484,7 +486,7 @@ def _parse(source, state, flags=0):
...
@@ -484,7 +486,7 @@ def _parse(source, state, flags=0):
subpattern
.
append
(
code
)
subpattern
.
append
(
code
)
else
:
else
:
raise
SyntaxE
rror
,
"parser error"
raise
e
rror
,
"parser error"
return
subpattern
return
subpattern
...
@@ -499,17 +501,17 @@ def parse(pattern, flags=0):
...
@@ -499,17 +501,17 @@ def parse(pattern, flags=0):
if
tail
==
"|"
:
if
tail
==
"|"
:
b
.
append
(
p
)
b
.
append
(
p
)
elif
tail
==
")"
:
elif
tail
==
")"
:
raise
SyntaxE
rror
,
"unbalanced parenthesis"
raise
e
rror
,
"unbalanced parenthesis"
elif
tail
is
None
:
elif
tail
is
None
:
if
b
:
if
b
:
b
.
append
(
p
)
b
.
append
(
p
)
p
=
_branch
(
state
,
b
)
p
=
_branch
(
state
,
b
)
break
break
else
:
else
:
raise
SyntaxE
rror
,
"bogus characters at end of regular expression"
raise
e
rror
,
"bogus characters at end of regular expression"
return
p
return
p
def
parse_
replacement
(
source
,
pattern
):
def
parse_
template
(
source
,
pattern
):
# parse 're' replacement string into list of literals and
# parse 're' replacement string into list of literals and
# group references
# group references
s
=
Tokenizer
(
source
)
s
=
Tokenizer
(
source
)
...
@@ -520,15 +522,56 @@ def parse_replacement(source, pattern):
...
@@ -520,15 +522,56 @@ def parse_replacement(source, pattern):
if
this
is
None
:
if
this
is
None
:
break
# end of replacement string
break
# end of replacement string
if
this
and
this
[
0
]
==
"
\\
"
:
if
this
and
this
[
0
]
==
"
\\
"
:
try
:
if
this
==
"
\\
g"
:
a
(
LITERAL
,
ESCAPES
[
this
])
name
=
""
except
KeyError
:
if
s
.
match
(
"<"
):
for
char
in
this
:
while
1
:
a
(
LITERAL
,
char
)
char
=
s
.
get
()
if
char
is
None
:
raise
error
,
"unterminated index"
if
char
==
">"
:
break
# FIXME: check for valid character
name
=
name
+
char
if
not
name
:
raise
error
,
"bad index"
try
:
index
=
int
(
name
)
except
ValueError
:
try
:
index
=
pattern
.
groupindex
[
name
]
except
KeyError
:
raise
IndexError
,
"unknown index"
a
((
MARK
,
index
))
elif
len
(
this
)
>
1
and
this
[
1
]
in
DIGITS
:
while
s
.
next
in
DIGITS
:
this
=
this
+
s
.
get
()
a
((
MARK
,
int
(
this
[
1
:])))
else
:
try
:
a
(
ESCAPES
[
this
])
except
KeyError
:
for
char
in
this
:
a
((
LITERAL
,
char
))
else
:
else
:
a
(
LITERAL
,
this
)
a
(
(
LITERAL
,
this
)
)
return
p
return
p
def
expand_template
(
template
,
match
):
# FIXME: <fl> this is sooooo slow. drop in the slicelist
# code instead
p
=
[]
a
=
p
.
append
for
c
,
s
in
template
:
if
c
is
LITERAL
:
a
(
s
)
elif
c
is
MARK
:
s
=
match
.
group
(
s
)
if
s
is
None
:
raise
error
,
"empty group"
a
(
s
)
return
match
.
string
[:
0
]
.
join
(
p
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
from
pprint
import
pprint
from
pprint
import
pprint
from
testpatterns
import
PATTERNS
from
testpatterns
import
PATTERNS
...
@@ -548,7 +591,7 @@ if __name__ == "__main__":
...
@@ -548,7 +591,7 @@ if __name__ == "__main__":
except
:
except
:
pass
pass
a
=
a
+
1
a
=
a
+
1
except
SyntaxE
rror
,
v
:
except
e
rror
,
v
:
print
"**"
,
repr
(
pattern
),
v
print
"**"
,
repr
(
pattern
),
v
b
=
b
+
1
b
=
b
+
1
print
"-"
*
68
print
"-"
*
68
...
...
Modules/_sre.c
Dosyayı görüntüle @
436c3d58
...
@@ -3,19 +3,22 @@
...
@@ -3,19 +3,22 @@
* Secret Labs' Regular Expression Engine
* Secret Labs' Regular Expression Engine
* $Id$
* $Id$
*
*
* simple regular expression matching engine
n
* simple regular expression matching engine
*
*
* partial history:
* partial history:
* 99-10-24 fl created (based on
the template matcher
)
* 99-10-24 fl created (based on
existing template matcher code
)
* 99-11-13 fl added categories, branching, and more (0.2)
* 99-11-13 fl added categories, branching, and more (0.2)
* 99-11-16 fl some tweaks to compile on non-Windows platforms
* 99-11-16 fl some tweaks to compile on non-Windows platforms
* 99-12-18 fl non-literals, generic maximizing repeat (0.3)
* 99-12-18 fl non-literals, generic maximizing repeat (0.3)
* 99-02-28 fl tons of changes (not all to the better ;-) (0.4)
* 00-02-28 fl tons of changes (not all to the better ;-) (0.4)
* 99-03-06 fl first alpha, sort of (0.5)
* 00-03-06 fl first alpha, sort of (0.5)
* 99-03-14 fl removed most compatibility stuff (0.6)
* 00-03-14 fl removed most compatibility stuff (0.6)
* 99-05-10 fl towards third alpha (0.8.2)
* 00-05-10 fl towards third alpha (0.8.2)
* 99-05-13 fl added experimental cursor stuff (0.8.3)
* 00-05-13 fl added experimental cursor stuff (0.8.3)
* 99-05-27 fl final bug hunt (0.8.4)
* 00-05-27 fl final bug hunt (0.8.4)
* 00-06-21 fl less bugs, more taste (0.8.5)
* 00-06-25 fl major changes to better deal with nested repeats (0.9)
* 00-06-28 fl fixed findall (0.9.1)
*
*
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
*
*
...
@@ -27,16 +30,21 @@
...
@@ -27,16 +30,21 @@
* other compatibility work.
* other compatibility work.
*/
*/
/*
* FIXME: repeated groups don't work (they're usually come out empty)
* FIXME: rename to 're'
* FIXME: enable repeat_one optimization
*/
#ifndef SRE_RECURSIVE
#ifndef SRE_RECURSIVE
char
copyright
[]
=
" SRE 0.8.4 Copyright (c) 1997-2000 by Secret Labs AB "
;
static
char
copyright
[]
=
" SRE 0.9.1 Copyright (c) 1997-2000 by Secret Labs AB "
;
#include "Python.h"
#include "Python.h"
#include "sre.h"
#include "sre.h"
#include "unicodeobject.h"
#if defined(HAVE_LIMITS_H)
#if defined(HAVE_LIMITS_H)
#include <limits.h>
#include <limits.h>
#else
#else
...
@@ -45,10 +53,18 @@ char copyright[] = " SRE 0.8.4 Copyright (c) 1997-2000 by Secret Labs AB ";
...
@@ -45,10 +53,18 @@ char copyright[] = " SRE 0.8.4 Copyright (c) 1997-2000 by Secret Labs AB ";
#include <ctype.h>
#include <ctype.h>
/* name of this module, minus the leading underscore */
#define MODULE "sre"
/* defining this one enables tracing */
/* defining this one enables tracing */
#undef DEBUG
#undef DEBUG
#ifdef WIN32
/* FIXME: <fl> don't assume Windows == MSVC */
#if PY_VERSION_HEX >= 0x01060000
/* defining this enables unicode support (default under 1.6) */
#define HAVE_UNICODE
#endif
#if defined(WIN32)
/* FIXME: <fl> don't assume Windows == MSVC */
#pragma optimize("agtw", on)
/* doesn't seem to make much difference... */
#pragma optimize("agtw", on)
/* doesn't seem to make much difference... */
/* fastest possible local call under MSVC */
/* fastest possible local call under MSVC */
#define LOCAL(type) static __inline type __fastcall
#define LOCAL(type) static __inline type __fastcall
...
@@ -60,39 +76,91 @@ char copyright[] = " SRE 0.8.4 Copyright (c) 1997-2000 by Secret Labs AB ";
...
@@ -60,39 +76,91 @@ char copyright[] = " SRE 0.8.4 Copyright (c) 1997-2000 by Secret Labs AB ";
#define SRE_ERROR_ILLEGAL -1
/* illegal opcode */
#define SRE_ERROR_ILLEGAL -1
/* illegal opcode */
#define SRE_ERROR_MEMORY -9
/* out of memory */
#define SRE_ERROR_MEMORY -9
/* out of memory */
#if
def DEBUG
#if
defined(DEBUG)
#define TRACE(v) printf v
#define TRACE(v) printf v
#else
#else
#define TRACE(v)
#define TRACE(v)
#endif
#endif
#define PTR(ptr) ((SRE_CHAR*) (ptr) - (SRE_CHAR*) state->beginning)
#define
SRE_CODE unsigned short
/* unsigned short or larger */
#define
PTR(ptr) ((SRE_CHAR*) (ptr) - (SRE_CHAR*) state->beginning)
/* -------------------------------------------------------------------- */
/* -------------------------------------------------------------------- */
/* search engine state */
/* search engine state */
/* unicode character predicates */
/* default character predicates (run sre_chars.py to regenerate tables) */
#define SRE_TO_LOWER(ch) Py_UNICODE_TOLOWER((Py_UNICODE)(ch))
#define SRE_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
#define SRE_DIGIT_MASK 1
#define SRE_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
#define SRE_SPACE_MASK 2
#define SRE_IS_LINEBREAK(ch) ((ch) == '\n')
#define SRE_LINEBREAK_MASK 4
/* #define SRE_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch)) */
#define SRE_ALNUM_MASK 8
#define SRE_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
#define SRE_WORD_MASK 16
#define SRE_IS_WORD(ch) (SRE_IS_ALNUM((ch)) || (ch) == '_')
static
char
sre_char_info
[
128
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
2
,
6
,
2
,
2
,
2
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
2
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
0
,
0
,
0
,
0
,
16
,
0
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
24
,
0
,
0
,
0
,
0
,
0
};
static
char
sre_char_tolower
[
128
]
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
,
32
,
33
,
34
,
35
,
36
,
37
,
38
,
39
,
40
,
41
,
42
,
43
,
44
,
45
,
46
,
47
,
48
,
49
,
50
,
51
,
52
,
53
,
54
,
55
,
56
,
57
,
58
,
59
,
60
,
61
,
62
,
63
,
64
,
97
,
98
,
99
,
100
,
101
,
102
,
103
,
104
,
105
,
106
,
107
,
108
,
109
,
110
,
111
,
112
,
113
,
114
,
115
,
116
,
117
,
118
,
119
,
120
,
121
,
122
,
91
,
92
,
93
,
94
,
95
,
96
,
97
,
98
,
99
,
100
,
101
,
102
,
103
,
104
,
105
,
106
,
107
,
108
,
109
,
110
,
111
,
112
,
113
,
114
,
115
,
116
,
117
,
118
,
119
,
120
,
121
,
122
,
123
,
124
,
125
,
126
,
127
};
static
unsigned
int
sre_tolower
(
unsigned
int
ch
)
{
return
((
ch
)
<
128
?
sre_char_tolower
[
ch
]
:
ch
);
}
#define SRE_IS_DIGIT(ch)\
((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
#define SRE_IS_SPACE(ch)\
((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
#define SRE_IS_LINEBREAK(ch)\
((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
#define SRE_IS_ALNUM(ch)\
((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
#define SRE_IS_WORD(ch)\
((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
/* locale-specific character predicates */
/* locale-specific character predicates */
#define SRE_LOC_TO_LOWER(ch) ((ch) < 256 ? tolower((ch)) : ch)
static
unsigned
int
sre_tolower_locale
(
unsigned
int
ch
)
{
return
((
ch
)
<
256
?
tolower
((
ch
))
:
ch
);
}
#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
/* unicode-specific character predicates */
#if defined(HAVE_UNICODE)
static
unsigned
int
sre_tolower_unicode
(
unsigned
int
ch
)
{
return
(
unsigned
int
)
Py_UNICODE_TOLOWER
((
Py_UNICODE
)(
ch
));
}
#define SRE_UNI_TO_LOWER(ch) Py_UNICODE_TOLOWER((Py_UNICODE)(ch))
#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
#define SRE_UNI_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
#define SRE_UNI_IS_WORD(ch) (SRE_IS_ALNUM((ch)) || (ch) == '_')
#endif
LOCAL
(
int
)
LOCAL
(
int
)
sre_category
(
SRE_CODE
category
,
unsigned
int
ch
)
sre_category
(
SRE_CODE
category
,
unsigned
int
ch
)
{
{
switch
(
category
)
{
switch
(
category
)
{
case
SRE_CATEGORY_DIGIT
:
case
SRE_CATEGORY_DIGIT
:
return
SRE_IS_DIGIT
(
ch
);
return
SRE_IS_DIGIT
(
ch
);
case
SRE_CATEGORY_NOT_DIGIT
:
case
SRE_CATEGORY_NOT_DIGIT
:
...
@@ -109,22 +177,30 @@ sre_category(SRE_CODE category, unsigned int ch)
...
@@ -109,22 +177,30 @@ sre_category(SRE_CODE category, unsigned int ch)
return
SRE_IS_LINEBREAK
(
ch
);
return
SRE_IS_LINEBREAK
(
ch
);
case
SRE_CATEGORY_NOT_LINEBREAK
:
case
SRE_CATEGORY_NOT_LINEBREAK
:
return
!
SRE_IS_LINEBREAK
(
ch
);
return
!
SRE_IS_LINEBREAK
(
ch
);
case
SRE_CATEGORY_LOC_DIGIT
:
return
SRE_LOC_IS_DIGIT
(
ch
);
case
SRE_CATEGORY_LOC_NOT_DIGIT
:
return
!
SRE_LOC_IS_DIGIT
(
ch
);
case
SRE_CATEGORY_LOC_SPACE
:
return
SRE_LOC_IS_SPACE
(
ch
);
case
SRE_CATEGORY_LOC_NOT_SPACE
:
return
!
SRE_LOC_IS_SPACE
(
ch
);
case
SRE_CATEGORY_LOC_WORD
:
case
SRE_CATEGORY_LOC_WORD
:
return
SRE_LOC_IS_WORD
(
ch
);
return
SRE_LOC_IS_WORD
(
ch
);
case
SRE_CATEGORY_LOC_NOT_WORD
:
case
SRE_CATEGORY_LOC_NOT_WORD
:
return
!
SRE_LOC_IS_WORD
(
ch
);
return
!
SRE_LOC_IS_WORD
(
ch
);
case
SRE_CATEGORY_LOC_LINEBREAK
:
return
SRE_LOC_IS_LINEBREAK
(
ch
);
#if defined(HAVE_UNICODE)
case
SRE_CATEGORY_LOC_NOT_LINEBREAK
:
case
SRE_CATEGORY_UNI_DIGIT
:
return
!
SRE_LOC_IS_LINEBREAK
(
ch
);
return
SRE_UNI_IS_DIGIT
(
ch
);
case
SRE_CATEGORY_UNI_NOT_DIGIT
:
return
!
SRE_UNI_IS_DIGIT
(
ch
);
case
SRE_CATEGORY_UNI_SPACE
:
return
SRE_UNI_IS_SPACE
(
ch
);
case
SRE_CATEGORY_UNI_NOT_SPACE
:
return
!
SRE_UNI_IS_SPACE
(
ch
);
case
SRE_CATEGORY_UNI_WORD
:
return
SRE_UNI_IS_WORD
(
ch
);
case
SRE_CATEGORY_UNI_NOT_WORD
:
return
!
SRE_UNI_IS_WORD
(
ch
);
case
SRE_CATEGORY_UNI_LINEBREAK
:
return
SRE_UNI_IS_LINEBREAK
(
ch
);
case
SRE_CATEGORY_UNI_NOT_LINEBREAK
:
return
!
SRE_UNI_IS_LINEBREAK
(
ch
);
#endif
}
}
return
0
;
return
0
;
}
}
...
@@ -146,7 +222,7 @@ _stack_free(SRE_STATE* state)
...
@@ -146,7 +222,7 @@ _stack_free(SRE_STATE* state)
static
int
/* shouldn't be LOCAL */
static
int
/* shouldn't be LOCAL */
_stack_extend
(
SRE_STATE
*
state
,
int
lo
,
int
hi
)
_stack_extend
(
SRE_STATE
*
state
,
int
lo
,
int
hi
)
{
{
void
*
*
stack
;
SRE_STACK
*
stack
;
int
stacksize
;
int
stacksize
;
/* grow the stack to a suitable size; we need at least lo entries,
/* grow the stack to a suitable size; we need at least lo entries,
...
@@ -163,7 +239,7 @@ _stack_extend(SRE_STATE* state, int lo, int hi)
...
@@ -163,7 +239,7 @@ _stack_extend(SRE_STATE* state, int lo, int hi)
else
if
(
stacksize
>
hi
)
else
if
(
stacksize
>
hi
)
stacksize
=
hi
;
stacksize
=
hi
;
TRACE
((
"allocate stack %d
\n
"
,
stacksize
));
TRACE
((
"allocate stack %d
\n
"
,
stacksize
));
stack
=
malloc
(
sizeof
(
void
*
)
*
stacksize
);
stack
=
malloc
(
sizeof
(
SRE_STACK
)
*
stacksize
);
}
else
{
}
else
{
/* grow the stack (typically by a factor of two) */
/* grow the stack (typically by a factor of two) */
while
(
stacksize
<
lo
)
while
(
stacksize
<
lo
)
...
@@ -171,7 +247,7 @@ _stack_extend(SRE_STATE* state, int lo, int hi)
...
@@ -171,7 +247,7 @@ _stack_extend(SRE_STATE* state, int lo, int hi)
/* FIXME: <fl> could trim size if it's larger than lo, and
/* FIXME: <fl> could trim size if it's larger than lo, and
much larger than hi */
much larger than hi */
TRACE
((
"grow stack to %d
\n
"
,
stacksize
));
TRACE
((
"grow stack to %d
\n
"
,
stacksize
));
stack
=
realloc
(
state
->
stack
,
sizeof
(
void
*
)
*
stacksize
);
stack
=
realloc
(
state
->
stack
,
sizeof
(
SRE_STACK
)
*
stacksize
);
}
}
if
(
!
stack
)
{
if
(
!
stack
)
{
...
@@ -192,11 +268,13 @@ _stack_extend(SRE_STATE* state, int lo, int hi)
...
@@ -192,11 +268,13 @@ _stack_extend(SRE_STATE* state, int lo, int hi)
#define SRE_MEMBER sre_member
#define SRE_MEMBER sre_member
#define SRE_MATCH sre_match
#define SRE_MATCH sre_match
#define SRE_SEARCH sre_search
#define SRE_SEARCH sre_search
#define SRE_RECURSIVE
#i
nclude "_sre.c"
#i
f defined(HAVE_UNICODE)
#define SRE_RECURSIVE
#include "_sre.c"
#undef SRE_RECURSIVE
#undef SRE_RECURSIVE
#undef SRE_SEARCH
#undef SRE_SEARCH
#undef SRE_MATCH
#undef SRE_MATCH
#undef SRE_MEMBER
#undef SRE_MEMBER
...
@@ -210,6 +288,7 @@ _stack_extend(SRE_STATE* state, int lo, int hi)
...
@@ -210,6 +288,7 @@ _stack_extend(SRE_STATE* state, int lo, int hi)
#define SRE_MEMBER sre_umember
#define SRE_MEMBER sre_umember
#define SRE_MATCH sre_umatch
#define SRE_MATCH sre_umatch
#define SRE_SEARCH sre_usearch
#define SRE_SEARCH sre_usearch
#endif
#endif
/* SRE_RECURSIVE */
#endif
/* SRE_RECURSIVE */
...
@@ -308,13 +387,21 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
...
@@ -308,13 +387,21 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
SRE_CHAR
*
end
=
state
->
end
;
SRE_CHAR
*
end
=
state
->
end
;
SRE_CHAR
*
ptr
=
state
->
ptr
;
SRE_CHAR
*
ptr
=
state
->
ptr
;
int
stack
size
;
int
stack
;
int
stackbase
;
int
stackbase
;
int
lastmark
;
int
i
,
count
;
int
i
,
count
;
/* FIXME: this is one ugly hack */
/* FIXME: this is a hack! */
void
*
*
mark
=
NULL
;
void
*
mark_copy
[
64
];
void
*
mark_data
[
64
];
void
*
mark
=
NULL
;
TRACE
((
"%8d: enter
\n
"
,
PTR
(
ptr
)));
stackbase
=
stack
=
state
->
stackbase
;
lastmark
=
state
->
lastmark
;
retry
:
for
(;;)
{
for
(;;)
{
...
@@ -334,7 +421,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
...
@@ -334,7 +421,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
case
SRE_OP_AT
:
case
SRE_OP_AT
:
/* match at given position */
/* match at given position */
/* args: <at> */
/* args: <at> */
TRACE
((
"%8d:
match at
\\
%c
\n
"
,
PTR
(
ptr
),
*
pattern
));
TRACE
((
"%8d:
position %d
\n
"
,
PTR
(
ptr
),
*
pattern
));
if
(
!
SRE_AT
(
state
,
ptr
,
*
pattern
))
if
(
!
SRE_AT
(
state
,
ptr
,
*
pattern
))
goto
failure
;
goto
failure
;
pattern
++
;
pattern
++
;
...
@@ -343,18 +430,20 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
...
@@ -343,18 +430,20 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
case
SRE_OP_CATEGORY
:
case
SRE_OP_CATEGORY
:
/* match at given category */
/* match at given category */
/* args: <category> */
/* args: <category> */
TRACE
((
"%8d: category match at
\\
%c
\n
"
,
PTR
(
ptr
),
*
pattern
));
TRACE
((
"%8d: category %d [category %d]
\n
"
,
PTR
(
ptr
),
*
ptr
,
*
pattern
));
if
(
ptr
>=
end
||
!
sre_category
(
pattern
[
0
],
ptr
[
0
]))
if
(
ptr
>=
end
||
!
sre_category
(
pattern
[
0
],
ptr
[
0
]))
goto
failure
;
goto
failure
;
TRACE
((
"%8d: category ok
\n
"
,
PTR
(
ptr
)));
pattern
++
;
pattern
++
;
ptr
++
;
ptr
++
;
break
;
break
;
case
SRE_OP_LITERAL
:
case
SRE_OP_LITERAL
:
/* match literal
character
*/
/* match literal
string
*/
/* args: <code> */
/* args: <code> */
TRACE
((
"%8d: literal %c
\n
"
,
PTR
(
ptr
),
(
SRE_CHAR
)
*
pattern
));
TRACE
((
"%8d: literal %c
\n
"
,
PTR
(
ptr
),
(
SRE_CHAR
)
pattern
[
0
]
));
if
(
ptr
>=
end
||
*
ptr
!=
(
SRE_CHAR
)
*
pattern
)
if
(
ptr
>=
end
||
*
ptr
!=
(
SRE_CHAR
)
pattern
[
0
]
)
goto
failure
;
goto
failure
;
pattern
++
;
pattern
++
;
ptr
++
;
ptr
++
;
...
@@ -363,8 +452,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
...
@@ -363,8 +452,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
case
SRE_OP_NOT_LITERAL
:
case
SRE_OP_NOT_LITERAL
:
/* match anything that is not literal character */
/* match anything that is not literal character */
/* args: <code> */
/* args: <code> */
TRACE
((
"%8d: literal not %c
\n
"
,
PTR
(
ptr
),
(
SRE_CHAR
)
*
pattern
));
TRACE
((
"%8d: literal not %c
\n
"
,
PTR
(
ptr
),
(
SRE_CHAR
)
pattern
[
0
]
));
if
(
ptr
>=
end
||
*
ptr
==
(
SRE_CHAR
)
*
pattern
)
if
(
ptr
>=
end
||
*
ptr
==
(
SRE_CHAR
)
pattern
[
0
]
)
goto
failure
;
goto
failure
;
pattern
++
;
pattern
++
;
ptr
++
;
ptr
++
;
...
@@ -372,7 +461,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
...
@@ -372,7 +461,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
case
SRE_OP_ANY
:
case
SRE_OP_ANY
:
/* match anything */
/* match anything */
TRACE
((
"%8d: any
\n
"
,
PTR
(
ptr
)));
TRACE
((
"%8d: any
thing
\n
"
,
PTR
(
ptr
)));
if
(
ptr
>=
end
)
if
(
ptr
>=
end
)
goto
failure
;
goto
failure
;
ptr
++
;
ptr
++
;
...
@@ -393,14 +482,11 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
...
@@ -393,14 +482,11 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
TRACE
((
"%8d: group %d
\n
"
,
PTR
(
ptr
),
pattern
[
0
]));
TRACE
((
"%8d: group %d
\n
"
,
PTR
(
ptr
),
pattern
[
0
]));
i
=
pattern
[
0
];
i
=
pattern
[
0
];
{
{
/* FIXME: optimize! */
SRE_CHAR
*
p
=
(
SRE_CHAR
*
)
state
->
mark
[
i
+
i
];
SRE_CHAR
*
p
=
(
SRE_CHAR
*
)
state
->
mark
[
i
+
i
];
SRE_CHAR
*
e
=
(
SRE_CHAR
*
)
state
->
mark
[
i
+
i
+
1
];
SRE_CHAR
*
e
=
(
SRE_CHAR
*
)
state
->
mark
[
i
+
i
+
1
];
TRACE
((
"%8d: group %p %p
\n
"
,
PTR
(
ptr
),
p
,
e
));
if
(
!
p
||
!
e
||
e
<
p
)
if
(
!
p
||
!
e
||
e
<
p
)
goto
failure
;
goto
failure
;
while
(
p
<
e
)
{
while
(
p
<
e
)
{
TRACE
((
"%8d: group test %c %c
\n
"
,
PTR
(
ptr
),
*
ptr
,
*
p
));
if
(
ptr
>=
end
||
*
ptr
!=
*
p
)
if
(
ptr
>=
end
||
*
ptr
!=
*
p
)
goto
failure
;
goto
failure
;
p
++
;
ptr
++
;
p
++
;
ptr
++
;
...
@@ -414,15 +500,13 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
...
@@ -414,15 +500,13 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
TRACE
((
"%8d: group ignore %d
\n
"
,
PTR
(
ptr
),
pattern
[
0
]));
TRACE
((
"%8d: group ignore %d
\n
"
,
PTR
(
ptr
),
pattern
[
0
]));
i
=
pattern
[
0
];
i
=
pattern
[
0
];
{
{
/* FIXME: optimize! */
SRE_CHAR
*
p
=
(
SRE_CHAR
*
)
state
->
mark
[
i
+
i
];
SRE_CHAR
*
p
=
(
SRE_CHAR
*
)
state
->
mark
[
i
+
i
];
SRE_CHAR
*
e
=
(
SRE_CHAR
*
)
state
->
mark
[
i
+
i
+
1
];
SRE_CHAR
*
e
=
(
SRE_CHAR
*
)
state
->
mark
[
i
+
i
+
1
];
TRACE
((
"%8d: group %p %p
\n
"
,
PTR
(
ptr
),
p
,
e
));
if
(
!
p
||
!
e
||
e
<
p
)
if
(
!
p
||
!
e
||
e
<
p
)
goto
failure
;
goto
failure
;
while
(
p
<
e
)
{
while
(
p
<
e
)
{
TRACE
((
"%8d: group test %c %c
\n
"
,
PTR
(
ptr
),
*
ptr
,
*
p
));
if
(
ptr
>=
end
||
if
(
ptr
>=
end
||
SRE_TO_LOWER
(
*
ptr
)
!=
SRE_TO_LOWER
(
*
p
))
state
->
tolower
(
*
ptr
)
!=
state
->
tolower
(
*
p
))
goto
failure
;
goto
failure
;
p
++
;
ptr
++
;
p
++
;
ptr
++
;
}
}
...
@@ -432,7 +516,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
...
@@ -432,7 +516,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
case
SRE_OP_LITERAL_IGNORE
:
case
SRE_OP_LITERAL_IGNORE
:
TRACE
((
"%8d: literal lower(%c)
\n
"
,
PTR
(
ptr
),
(
SRE_CHAR
)
*
pattern
));
TRACE
((
"%8d: literal lower(%c)
\n
"
,
PTR
(
ptr
),
(
SRE_CHAR
)
*
pattern
));
if
(
ptr
>=
end
||
SRE_TO_LOWER
(
*
ptr
)
!=
(
SRE_CHAR
)
*
pattern
)
if
(
ptr
>=
end
||
state
->
tolower
(
*
ptr
)
!=
state
->
tolower
(
*
pattern
))
goto
failure
;
goto
failure
;
pattern
++
;
pattern
++
;
ptr
++
;
ptr
++
;
...
@@ -440,8 +525,9 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
...
@@ -440,8 +525,9 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
case
SRE_OP_NOT_LITERAL_IGNORE
:
case
SRE_OP_NOT_LITERAL_IGNORE
:
TRACE
((
"%8d: literal not lower(%c)
\n
"
,
PTR
(
ptr
),
TRACE
((
"%8d: literal not lower(%c)
\n
"
,
PTR
(
ptr
),
(
SRE_CHAR
)
*
pattern
));
(
SRE_CHAR
)
*
pattern
));
if
(
ptr
>=
end
||
SRE_TO_LOWER
(
*
ptr
)
==
(
SRE_CHAR
)
*
pattern
)
if
(
ptr
>=
end
||
state
->
tolower
(
*
ptr
)
==
state
->
tolower
(
*
pattern
))
goto
failure
;
goto
failure
;
pattern
++
;
pattern
++
;
ptr
++
;
ptr
++
;
...
@@ -450,7 +536,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
...
@@ -450,7 +536,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
case
SRE_OP_IN_IGNORE
:
case
SRE_OP_IN_IGNORE
:
TRACE
((
"%8d: set lower(%c)
\n
"
,
PTR
(
ptr
),
*
ptr
));
TRACE
((
"%8d: set lower(%c)
\n
"
,
PTR
(
ptr
),
*
ptr
));
if
(
ptr
>=
end
if
(
ptr
>=
end
||
!
SRE_MEMBER
(
pattern
+
1
,
(
SRE_CHAR
)
SRE_TO_LOWER
(
*
ptr
)))
||
!
SRE_MEMBER
(
pattern
+
1
,
(
SRE_CHAR
)
state
->
tolower
(
*
ptr
)))
goto
failure
;
goto
failure
;
pattern
+=
pattern
[
0
];
pattern
+=
pattern
[
0
];
ptr
++
;
ptr
++
;
...
@@ -459,39 +545,50 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
...
@@ -459,39 +545,50 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
case
SRE_OP_MARK
:
case
SRE_OP_MARK
:
/* set mark */
/* set mark */
/* args: <mark> */
/* args: <mark> */
TRACE
((
"%8d: set mark(%d)
\n
"
,
PTR
(
ptr
),
pattern
[
0
]));
TRACE
((
"%8d: set mark %d
\n
"
,
PTR
(
ptr
),
pattern
[
0
]));
if
(
state
->
lastmark
<
pattern
[
0
])
state
->
lastmark
=
pattern
[
0
];
if
(
!
mark
)
{
if
(
!
mark
)
{
mark
=
mark_
data
;
mark
=
mark_
copy
;
memcpy
(
mark
,
state
->
mark
,
s
izeof
(
state
->
mark
));
memcpy
(
mark
,
state
->
mark
,
s
tate
->
lastmark
*
sizeof
(
void
*
));
}
}
state
->
mark
[
pattern
[
0
]]
=
ptr
;
state
->
mark
[
pattern
[
0
]]
=
ptr
;
pattern
++
;
pattern
++
;
break
;
break
;
case
SRE_OP_JUMP
:
case
SRE_OP_JUMP
:
case
SRE_OP_INFO
:
/* jump forward */
/* jump forward */
/* args: <skip> */
/* args: <skip> */
TRACE
((
"%8d: jump +%d
\n
"
,
PTR
(
ptr
),
pattern
[
0
]));
TRACE
((
"%8d: jump +%d
\n
"
,
PTR
(
ptr
),
pattern
[
0
]));
pattern
+=
pattern
[
0
];
pattern
+=
pattern
[
0
];
break
;
break
;
#if 0
case SRE_OP_CALL:
case SRE_OP_CALL:
/* match subpattern, without backtracking */
/* match subpattern, without backtracking */
/* args: <skip> <pattern> */
/* args: <skip> <pattern> */
TRACE
((
"%8d:
match
subpattern
\n
"
,
PTR
(
ptr
)));
TRACE(("%8d: subpattern\n", PTR(ptr)));
state->ptr = ptr;
state->ptr = ptr;
if
(
!
SRE_MATCH
(
state
,
pattern
+
1
))
i = SRE_MATCH(state, pattern + 1);
if (i < 0)
return i;
if (!i)
goto failure;
goto failure;
pattern += pattern[0];
pattern += pattern[0];
ptr = state->ptr;
ptr = state->ptr;
break;
break;
#endif
#if 0
case SRE_OP_MAX_REPEAT_ONE:
case SRE_OP_MAX_REPEAT_ONE:
/* match repeated sequence (maximizing regexp) */
/* match repeated sequence (maximizing regexp) */
/* this variant only works if the repeated item is exactly
one character wide, and we're not already collecting
/* this operator only works if the repeated item is
backtracking points. for other cases, use the
exactly one character wide, and we're not already
MAX_REPEAT operator instead */
collecting backtracking points. for other cases,
use the MAX_REPEAT operator instead */
/* args: <skip> <min> <max> <step> */
/* args: <skip> <min> <max> <step> */
TRACE(("%8d: max repeat one {%d,%d}\n", PTR(ptr),
TRACE(("%8d: max repeat one {%d,%d}\n", PTR(ptr),
pattern[1], pattern[2]));
pattern[1], pattern[2]));
...
@@ -523,7 +620,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
...
@@ -523,7 +620,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
/* repeated literal */
/* repeated literal */
SRE_CHAR chr = (SRE_CHAR) pattern[4];
SRE_CHAR chr = (SRE_CHAR) pattern[4];
while (count < (int) pattern[2]) {
while (count < (int) pattern[2]) {
if
(
ptr
>=
end
||
(
SRE_CHAR
)
SRE_TO_LOWER
(
*
ptr
)
!=
chr
)
if (ptr >= end || (SRE_CHAR)
state->tolower
(*ptr) != chr)
break;
break;
ptr++;
ptr++;
count++;
count++;
...
@@ -543,7 +640,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
...
@@ -543,7 +640,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
/* repeated non-literal */
/* repeated non-literal */
SRE_CHAR chr = (SRE_CHAR) pattern[4];
SRE_CHAR chr = (SRE_CHAR) pattern[4];
while (count < (int) pattern[2]) {
while (count < (int) pattern[2]) {
if
(
ptr
>=
end
||
(
SRE_CHAR
)
SRE_TO_LOWER
(
*
ptr
)
==
chr
)
if (ptr >= end || (SRE_CHAR)
state->tolower
(*ptr) == chr)
break;
break;
ptr++;
ptr++;
count++;
count++;
...
@@ -564,8 +661,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
...
@@ -564,8 +661,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
while (count < (int) pattern[2]) {
while (count < (int) pattern[2]) {
i = SRE_MATCH(state, pattern + 3);
i = SRE_MATCH(state, pattern + 3);
if (i < 0)
if (i < 0)
goto
failure
;
return i
;
if
(
i
==
0
)
if (
!i
)
break;
break;
count++;
count++;
}
}
...
@@ -621,7 +718,9 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
...
@@ -621,7 +718,9 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
while (count >= (int) pattern[1]) {
while (count >= (int) pattern[1]) {
state->ptr = ptr;
state->ptr = ptr;
i = SRE_MATCH(state, pattern + pattern[0]);
i = SRE_MATCH(state, pattern + pattern[0]);
if
(
i
>
0
)
{
if (i < 0)
return i;
if (i) {
TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
goto success;
goto success;
}
}
...
@@ -631,108 +730,84 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
...
@@ -631,108 +730,84 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
}
}
}
}
goto failure;
goto failure;
#endif
case
SRE_OP_MAX_REPEAT
:
case
SRE_OP_MAX_REPEAT
:
/* match repeated sequence (maximizing regexp). repeated
/* match repeated sequence (maximizing regexp). repeated
group should end with a MAX_UNTIL code */
group should end with a MAX_UNTIL code */
TRACE
((
"%8d: max repeat %d %d
\n
"
,
PTR
(
ptr
),
/* args: <skip> <min> <max> <item> */
TRACE
((
"%8d: max repeat (%d %d)
\n
"
,
PTR
(
ptr
),
pattern
[
1
],
pattern
[
2
]));
pattern
[
1
],
pattern
[
2
]));
count
=
0
;
count
=
0
;
state
->
ptr
=
ptr
;
state
->
ptr
=
ptr
;
/* FIXME: <fl> umm. what about matching the minimum
/* match minimum number of items */
number of items before starting to collect backtracking
while
(
count
<
(
int
)
pattern
[
1
])
{
positions? */
i
=
SRE_MATCH
(
state
,
pattern
+
3
);
if
(
i
<
0
)
return
i
;
if
(
!
i
)
goto
failure
;
if
(
state
->
ptr
==
ptr
)
{
/* if the match was successful but empty, set the
count to max and terminate the scanning loop */
count
=
(
int
)
pattern
[
2
];
break
;
}
count
++
;
ptr
=
state
->
ptr
;
}
stackbase
=
state
->
stackbase
;
TRACE
((
"%8d: found %d leading items
\n
"
,
PTR
(
ptr
),
count
))
;
while
(
count
<
(
int
)
pattern
[
2
])
{
if
(
count
<
(
int
)
pattern
[
1
])
/* store current position on the stack */
goto
failure
;
TRACE
((
"%8d: push mark at index %d
\n
"
,
PTR
(
ptr
),
count
));
if
(
stackbase
+
count
>=
state
->
stacksize
)
{
/* match maximum number of items, pushing alternate end
i
=
_stack_extend
(
state
,
stackbase
+
count
+
1
,
points to the stack */
stackbase
+
pattern
[
2
]);
if
(
i
<
0
)
while
(
pattern
[
2
]
==
32767
||
count
<
(
int
)
pattern
[
2
])
{
goto
failure
;
state
->
stackbase
=
stack
;
}
state
->
stack
[
stackbase
+
count
]
=
ptr
;
/* check if we can match another item */
state
->
stackbase
+=
count
+
1
;
i
=
SRE_MATCH
(
state
,
pattern
+
3
);
i
=
SRE_MATCH
(
state
,
pattern
+
3
);
state
->
stackbase
=
stackbase
;
/* rewind */
state
->
stackbase
=
stackbase
;
/* rewind */
if
(
i
!=
2
)
if
(
i
<
0
)
return
i
;
if
(
!
i
)
break
;
break
;
if
(
state
->
ptr
==
ptr
)
{
if
(
state
->
ptr
==
ptr
)
{
/* if the match was successful but empty, set the
count to max and terminate the scanning loop */
stacksize
=
count
;
/* actual size of stack */
count
=
(
int
)
pattern
[
2
];
count
=
(
int
)
pattern
[
2
];
goto
check_tail
;
/* FIXME: <fl> eliminate goto */
break
;
}
}
count
++
;
/* this position was valid; add it to the retry
stack */
if
(
stack
>=
state
->
stacksize
)
{
i
=
_stack_extend
(
state
,
stack
+
1
,
stackbase
+
pattern
[
2
]);
if
(
i
<
0
)
return
i
;
/* out of memory */
}
TRACE
((
"%8d: stack[%d] = %d
\n
"
,
PTR
(
ptr
),
stack
,
PTR
(
ptr
)));
state
->
stack
[
stack
].
ptr
=
ptr
;
state
->
stack
[
stack
].
pattern
=
pattern
+
pattern
[
0
];
stack
++
;
/* move forward */
ptr
=
state
->
ptr
;
ptr
=
state
->
ptr
;
count
++
;
}
stacksize
=
count
;
/* actual number of entries on the stack */
check_tail
:
/* when we get here, count is the number of matches,
stacksize is the number of match points on the stack
(usually same as count, but it might be smaller) and
ptr points to the tail. */
if
(
count
<
(
int
)
pattern
[
1
])
goto
failure
;
/* make sure that rest of the expression matches. if it
doesn't, backtrack */
TRACE
((
"%8d: repeat %d found (stack size = %d)
\n
"
,
PTR
(
ptr
),
count
,
stacksize
+
1
));
TRACE
((
"%8d: tail is pattern
\n
"
,
PTR
(
ptr
)));
/* hope for the best */
state
->
ptr
=
ptr
;
state
->
stackbase
+=
stacksize
+
1
;
i
=
SRE_MATCH
(
state
,
pattern
+
pattern
[
0
]);
state
->
stackbase
=
stackbase
;
if
(
i
>
0
)
{
TRACE
((
"%8d: repeat %d picked
\n
"
,
PTR
(
ptr
),
count
));
goto
success
;
}
}
/* backtrack! */
/* when we get here, count is the number of successful
while
(
count
>=
(
int
)
pattern
[
1
])
{
matches, and ptr points to the tail. */
ptr
=
state
->
stack
[
stackbase
+
(
count
<
stacksize
?
count
:
stacksize
)];
state
->
ptr
=
ptr
;
count
--
;
TRACE
((
"%8d: BACKTRACK
\n
"
,
PTR
(
ptr
)));
state
->
stackbase
+=
stacksize
+
1
;
i
=
SRE_MATCH
(
state
,
pattern
+
pattern
[
0
]);
state
->
stackbase
=
stackbase
;
if
(
i
>
0
)
{
TRACE
((
"%8d: repeat %d picked
\n
"
,
PTR
(
ptr
),
count
));
goto
success
;
}
}
goto
failure
;
case
SRE_OP_MAX_UNTIL
:
TRACE
((
"%8d: skip +%d
\n
"
,
PTR
(
ptr
),
pattern
[
0
]));
/* match repeated sequence (maximizing regexp). repeated
group should end with a MAX_UNTIL code */
TRACE
((
"%8d: max until
\n
"
,
PTR
(
ptr
)));
pattern
+=
pattern
[
0
];
state
->
ptr
=
ptr
;
break
;
goto
success
;
/* always succeeds, for now... */
case
SRE_OP_MIN_REPEAT
:
case
SRE_OP_MIN_REPEAT
:
/* match repeated sequence (minimizing regexp) */
/* match repeated sequence (minimizing regexp) */
/* FIXME: HERE BE BUGS! */
TRACE
((
"%8d: min repeat %d %d
\n
"
,
PTR
(
ptr
),
TRACE
((
"%8d: min repeat %d %d
\n
"
,
PTR
(
ptr
),
pattern
[
1
],
pattern
[
2
]));
pattern
[
1
],
pattern
[
2
]));
count
=
0
;
count
=
0
;
...
@@ -740,7 +815,9 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
...
@@ -740,7 +815,9 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
/* match minimum number of items */
/* match minimum number of items */
while
(
count
<
(
int
)
pattern
[
1
])
{
while
(
count
<
(
int
)
pattern
[
1
])
{
i
=
SRE_MATCH
(
state
,
pattern
+
3
);
i
=
SRE_MATCH
(
state
,
pattern
+
3
);
if
(
i
<=
0
)
if
(
i
<
0
)
return
i
;
if
(
!
i
)
goto
failure
;
goto
failure
;
count
++
;
count
++
;
}
}
...
@@ -752,21 +829,16 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
...
@@ -752,21 +829,16 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
TRACE
((
"%8d: repeat %d picked
\n
"
,
PTR
(
ptr
),
count
));
TRACE
((
"%8d: repeat %d picked
\n
"
,
PTR
(
ptr
),
count
));
goto
success
;
goto
success
;
}
}
TRACE
((
"%8d: BACKTRACK
\n
"
,
PTR
(
ptr
)));
state
->
ptr
=
ptr
;
/* backtrack */
state
->
ptr
=
ptr
;
/* backtrack */
i
=
SRE_MATCH
(
state
,
pattern
+
3
);
i
=
SRE_MATCH
(
state
,
pattern
+
3
);
if
(
i
<=
0
)
if
(
i
<
0
)
return
i
;
if
(
!
i
)
goto
failure
;
goto
failure
;
count
++
;
count
++
;
}
}
goto
failure
;
goto
failure
;
case
SRE_OP_MIN_UNTIL
:
/* end of repeat group */
TRACE
((
"%8d: min until
\n
"
,
PTR
(
ptr
)));
state
->
ptr
=
ptr
;
goto
success
;
/* always succeeds, for now... */
case
SRE_OP_BRANCH
:
case
SRE_OP_BRANCH
:
/* match one of several subpatterns */
/* match one of several subpatterns */
/* format: <branch> <size> <head> ... <null> <tail> */
/* format: <branch> <size> <head> ... <null> <tail> */
...
@@ -777,7 +849,9 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
...
@@ -777,7 +849,9 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
TRACE
((
"%8d: branch check
\n
"
,
PTR
(
ptr
)));
TRACE
((
"%8d: branch check
\n
"
,
PTR
(
ptr
)));
state
->
ptr
=
ptr
;
state
->
ptr
=
ptr
;
i
=
SRE_MATCH
(
state
,
pattern
+
1
);
i
=
SRE_MATCH
(
state
,
pattern
+
1
);
if
(
i
>
0
)
{
if
(
i
<
0
)
return
i
;
if
(
i
)
{
TRACE
((
"%8d: branch succeeded
\n
"
,
PTR
(
ptr
)));
TRACE
((
"%8d: branch succeeded
\n
"
,
PTR
(
ptr
)));
goto
success
;
goto
success
;
}
}
...
@@ -789,14 +863,20 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
...
@@ -789,14 +863,20 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
case
SRE_OP_REPEAT
:
case
SRE_OP_REPEAT
:
/* TEMPLATE: match repeated sequence (no backtracking) */
/* TEMPLATE: match repeated sequence (no backtracking) */
/*
format: <repeat>
<skip> <min> <max> */
/*
args:
<skip> <min> <max> */
TRACE
((
"%8d: repeat %d %d
\n
"
,
PTR
(
ptr
),
pattern
[
1
],
pattern
[
2
]));
TRACE
((
"%8d: repeat %d %d
\n
"
,
PTR
(
ptr
),
pattern
[
1
],
pattern
[
2
]));
count
=
0
;
count
=
0
;
state
->
ptr
=
ptr
;
state
->
ptr
=
ptr
;
while
(
count
<
(
int
)
pattern
[
2
])
{
while
(
count
<
(
int
)
pattern
[
2
])
{
i
=
SRE_MATCH
(
state
,
pattern
+
3
);
i
=
SRE_MATCH
(
state
,
pattern
+
3
);
if
(
i
<=
0
)
if
(
i
<
0
)
return
i
;
if
(
!
i
)
break
;
break
;
if
(
state
->
ptr
==
ptr
)
{
count
=
(
int
)
pattern
[
2
];
break
;
}
count
++
;
count
++
;
}
}
if
(
count
<=
(
int
)
pattern
[
1
])
if
(
count
<=
(
int
)
pattern
[
1
])
...
@@ -807,16 +887,28 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
...
@@ -807,16 +887,28 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
break
;
break
;
default
:
default
:
TRACE
((
"%8d: unknown opcode %d
\n
"
,
PTR
(
ptr
),
pattern
[
-
1
]));
return
SRE_ERROR_ILLEGAL
;
return
SRE_ERROR_ILLEGAL
;
}
}
}
}
failure
:
failure
:
if
(
stack
--
>
stackbase
)
{
ptr
=
state
->
stack
[
stack
].
ptr
;
pattern
=
state
->
stack
[
stack
].
pattern
;
TRACE
((
"%8d: retry (%d)
\n
"
,
PTR
(
ptr
),
stack
));
goto
retry
;
}
TRACE
((
"%8d: leave (failure)
\n
"
,
PTR
(
ptr
)));
state
->
stackbase
=
stackbase
;
state
->
lastmark
=
lastmark
;
if
(
mark
)
if
(
mark
)
memcpy
(
state
->
mark
,
mark
,
s
izeof
(
state
->
mark
));
memcpy
(
state
->
mark
,
mark
,
s
tate
->
lastmark
*
sizeof
(
void
*
));
return
0
;
return
0
;
success
:
success
:
TRACE
((
"%8d: leave (success)
\n
"
,
PTR
(
ptr
)));
state
->
stackbase
=
stackbase
;
return
1
;
return
1
;
}
}
...
@@ -827,7 +919,12 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
...
@@ -827,7 +919,12 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
SRE_CHAR
*
end
=
state
->
end
;
SRE_CHAR
*
end
=
state
->
end
;
int
status
=
0
;
int
status
=
0
;
/* FIXME: <fl> add IGNORE cases (or implement full ASSERT support? */
if
(
pattern
[
0
]
==
SRE_OP_INFO
)
{
/* don't look too far */
end
-=
pattern
[
2
];
pattern
+=
pattern
[
1
];
/* FIXME: add support for fast scan */
}
if
(
pattern
[
0
]
==
SRE_OP_LITERAL
)
{
if
(
pattern
[
0
]
==
SRE_OP_LITERAL
)
{
/* pattern starts with a literal */
/* pattern starts with a literal */
...
@@ -837,7 +934,7 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
...
@@ -837,7 +934,7 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
ptr
++
;
ptr
++
;
if
(
ptr
==
end
)
if
(
ptr
==
end
)
return
0
;
return
0
;
TRACE
((
"%8d:
search found
literal
\n
"
,
PTR
(
ptr
)));
TRACE
((
"%8d:
=== SEARCH ===
literal
\n
"
,
PTR
(
ptr
)));
state
->
start
=
ptr
;
state
->
start
=
ptr
;
state
->
ptr
=
++
ptr
;
state
->
ptr
=
++
ptr
;
status
=
SRE_MATCH
(
state
,
pattern
+
2
);
status
=
SRE_MATCH
(
state
,
pattern
+
2
);
...
@@ -845,25 +942,9 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
...
@@ -845,25 +942,9 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
break
;
break
;
}
}
}
else
if
(
pattern
[
0
]
==
SRE_OP_IN
)
{
/* pattern starts with a set */
for
(;;)
{
/* format: <in> <skip> <data> */
while
(
ptr
<
end
&&
!
SRE_MEMBER
(
pattern
+
2
,
*
ptr
))
ptr
++
;
if
(
ptr
==
end
)
return
0
;
TRACE
((
"%8d: search found set
\n
"
,
PTR
(
ptr
)));
state
->
start
=
ptr
;
state
->
ptr
=
++
ptr
;
status
=
SRE_MATCH
(
state
,
pattern
+
pattern
[
1
]
+
1
);
if
(
status
!=
0
)
break
;
}
}
else
}
else
while
(
ptr
<=
end
)
{
while
(
ptr
<=
end
)
{
TRACE
((
"%8d:
search
\n
"
,
PTR
(
ptr
)));
TRACE
((
"%8d:
=== SEARCH ===
\n
"
,
PTR
(
ptr
)));
state
->
start
=
state
->
ptr
=
ptr
++
;
state
->
start
=
state
->
ptr
=
ptr
++
;
status
=
SRE_MATCH
(
state
,
pattern
);
status
=
SRE_MATCH
(
state
,
pattern
);
if
(
status
!=
0
)
if
(
status
!=
0
)
...
@@ -873,7 +954,7 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
...
@@ -873,7 +954,7 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
return
status
;
return
status
;
}
}
#if
ndef SRE_RECURSIVE
#if
!defined(SRE_RECURSIVE)
/* -------------------------------------------------------------------- */
/* -------------------------------------------------------------------- */
/* factories and destructors */
/* factories and destructors */
...
@@ -923,13 +1004,28 @@ _compile(PyObject* self_, PyObject* args)
...
@@ -923,13 +1004,28 @@ _compile(PyObject* self_, PyObject* args)
}
}
static
PyObject
*
static
PyObject
*
_getcodesize
(
PyObject
*
self_
,
PyObject
*
args
)
sre_codesize
(
PyObject
*
self
,
PyObject
*
args
)
{
{
return
Py_BuildValue
(
"i"
,
sizeof
(
SRE_CODE
));
return
Py_BuildValue
(
"i"
,
sizeof
(
SRE_CODE
));
}
}
static
PyObject
*
sre_lower
(
PyObject
*
self
,
PyObject
*
args
)
{
int
character
,
flags
;
if
(
!
PyArg_ParseTuple
(
args
,
"ii"
,
&
character
,
&
flags
))
return
NULL
;
if
(
flags
&
SRE_FLAG_LOCALE
)
return
Py_BuildValue
(
"i"
,
sre_tolower_locale
(
character
));
#if defined(HAVE_UNICODE)
if
(
flags
&
SRE_FLAG_UNICODE
)
return
Py_BuildValue
(
"i"
,
sre_tolower_unicode
(
character
));
#endif
return
Py_BuildValue
(
"i"
,
sre_tolower
(
character
));
}
LOCAL
(
PyObject
*
)
LOCAL
(
PyObject
*
)
_setup
(
SRE_STATE
*
state
,
PyObject
*
args
)
_setup
(
SRE_STATE
*
state
,
P
atternObject
*
pattern
,
P
yObject
*
args
)
{
{
/* prepare state object */
/* prepare state object */
...
@@ -960,7 +1056,11 @@ _setup(SRE_STATE* state, PyObject* args)
...
@@ -960,7 +1056,11 @@ _setup(SRE_STATE* state, PyObject* args)
}
}
/* determine character size */
/* determine character size */
#if defined(HAVE_UNICODE)
state
->
charsize
=
(
PyUnicode_Check
(
string
)
?
sizeof
(
Py_UNICODE
)
:
1
);
state
->
charsize
=
(
PyUnicode_Check
(
string
)
?
sizeof
(
Py_UNICODE
)
:
1
);
#else
state
->
charsize
=
1
;
#endif
count
/=
state
->
charsize
;
count
/=
state
->
charsize
;
...
@@ -980,6 +1080,8 @@ _setup(SRE_STATE* state, PyObject* args)
...
@@ -980,6 +1080,8 @@ _setup(SRE_STATE* state, PyObject* args)
state
->
start
=
(
void
*
)
((
char
*
)
ptr
+
start
*
state
->
charsize
);
state
->
start
=
(
void
*
)
((
char
*
)
ptr
+
start
*
state
->
charsize
);
state
->
end
=
(
void
*
)
((
char
*
)
ptr
+
end
*
state
->
charsize
);
state
->
end
=
(
void
*
)
((
char
*
)
ptr
+
end
*
state
->
charsize
);
state
->
lastmark
=
0
;
/* FIXME: dynamic! */
/* FIXME: dynamic! */
for
(
i
=
0
;
i
<
64
;
i
++
)
for
(
i
=
0
;
i
<
64
;
i
++
)
state
->
mark
[
i
]
=
NULL
;
state
->
mark
[
i
]
=
NULL
;
...
@@ -988,6 +1090,15 @@ _setup(SRE_STATE* state, PyObject* args)
...
@@ -988,6 +1090,15 @@ _setup(SRE_STATE* state, PyObject* args)
state
->
stackbase
=
0
;
state
->
stackbase
=
0
;
state
->
stacksize
=
0
;
state
->
stacksize
=
0
;
if
(
pattern
->
flags
&
SRE_FLAG_LOCALE
)
state
->
tolower
=
sre_tolower_locale
;
#if defined(HAVE_UNICODE)
else
if
(
pattern
->
flags
&
SRE_FLAG_UNICODE
)
state
->
tolower
=
sre_tolower_unicode
;
#endif
else
state
->
tolower
=
sre_tolower
;
return
string
;
return
string
;
}
}
...
@@ -999,8 +1110,8 @@ _pattern_new_match(PatternObject* pattern, SRE_STATE* state,
...
@@ -999,8 +1110,8 @@ _pattern_new_match(PatternObject* pattern, SRE_STATE* state,
MatchObject
*
match
;
MatchObject
*
match
;
int
i
,
j
;
int
i
,
j
;
char
*
base
;
TRACE
((
"status = %d
\n
"
,
status
))
;
int
n
;
if
(
status
>
0
)
{
if
(
status
>
0
)
{
...
@@ -1017,19 +1128,18 @@ _pattern_new_match(PatternObject* pattern, SRE_STATE* state,
...
@@ -1017,19 +1128,18 @@ _pattern_new_match(PatternObject* pattern, SRE_STATE* state,
match
->
groups
=
pattern
->
groups
+
1
;
match
->
groups
=
pattern
->
groups
+
1
;
base
=
(
char
*
)
state
->
beginning
;
n
=
state
->
charsize
;
/* group zero */
/* group zero */
match
->
mark
[
0
]
=
((
char
*
)
state
->
start
-
match
->
mark
[
0
]
=
((
char
*
)
state
->
start
-
base
)
/
n
;
(
char
*
)
state
->
beginning
)
/
state
->
charsize
;
match
->
mark
[
1
]
=
((
char
*
)
state
->
ptr
-
base
)
/
n
;
match
->
mark
[
1
]
=
((
char
*
)
state
->
ptr
-
(
char
*
)
state
->
beginning
)
/
state
->
charsize
;
/* fill in the rest of the groups */
/* fill in the rest of the groups */
for
(
i
=
j
=
0
;
i
<
pattern
->
groups
;
i
++
,
j
+=
2
)
for
(
i
=
j
=
0
;
i
<
pattern
->
groups
;
i
++
,
j
+=
2
)
if
(
state
->
mark
[
j
]
!=
NULL
&&
state
->
mark
[
j
+
1
]
!=
NULL
)
{
if
(
j
+
1
<=
state
->
lastmark
&&
state
->
mark
[
j
]
&&
state
->
mark
[
j
+
1
])
{
match
->
mark
[
j
+
2
]
=
((
char
*
)
state
->
mark
[
j
]
-
match
->
mark
[
j
+
2
]
=
((
char
*
)
state
->
mark
[
j
]
-
base
)
/
n
;
(
char
*
)
state
->
beginning
)
/
state
->
charsize
;
match
->
mark
[
j
+
3
]
=
((
char
*
)
state
->
mark
[
j
+
1
]
-
base
)
/
n
;
match
->
mark
[
j
+
3
]
=
((
char
*
)
state
->
mark
[
j
+
1
]
-
(
char
*
)
state
->
beginning
)
/
state
->
charsize
;
}
else
}
else
match
->
mark
[
j
+
2
]
=
match
->
mark
[
j
+
3
]
=
-
1
;
/* undefined */
match
->
mark
[
j
+
2
]
=
match
->
mark
[
j
+
3
]
=
-
1
;
/* undefined */
...
@@ -1050,7 +1160,7 @@ _pattern_new_match(PatternObject* pattern, SRE_STATE* state,
...
@@ -1050,7 +1160,7 @@ _pattern_new_match(PatternObject* pattern, SRE_STATE* state,
}
}
static
PyObject
*
static
PyObject
*
_pattern_cursor
(
P
y
Object
*
pattern
,
PyObject
*
args
)
_pattern_cursor
(
P
attern
Object
*
pattern
,
PyObject
*
args
)
{
{
/* create search state object */
/* create search state object */
...
@@ -1062,14 +1172,14 @@ _pattern_cursor(PyObject* pattern, PyObject* args)
...
@@ -1062,14 +1172,14 @@ _pattern_cursor(PyObject* pattern, PyObject* args)
if
(
self
==
NULL
)
if
(
self
==
NULL
)
return
NULL
;
return
NULL
;
string
=
_setup
(
&
self
->
state
,
args
);
string
=
_setup
(
&
self
->
state
,
pattern
,
args
);
if
(
!
string
)
{
if
(
!
string
)
{
/* FIXME: dealloc cursor object */
PyObject_DEL
(
self
);
return
NULL
;
return
NULL
;
}
}
Py_INCREF
(
pattern
);
Py_INCREF
(
pattern
);
self
->
pattern
=
pattern
;
self
->
pattern
=
(
PyObject
*
)
pattern
;
Py_INCREF
(
string
);
Py_INCREF
(
string
);
self
->
string
=
string
;
self
->
string
=
string
;
...
@@ -1093,7 +1203,7 @@ _pattern_match(PatternObject* self, PyObject* args)
...
@@ -1093,7 +1203,7 @@ _pattern_match(PatternObject* self, PyObject* args)
PyObject
*
string
;
PyObject
*
string
;
int
status
;
int
status
;
string
=
_setup
(
&
state
,
args
);
string
=
_setup
(
&
state
,
self
,
args
);
if
(
!
string
)
if
(
!
string
)
return
NULL
;
return
NULL
;
...
@@ -1102,7 +1212,9 @@ _pattern_match(PatternObject* self, PyObject* args)
...
@@ -1102,7 +1212,9 @@ _pattern_match(PatternObject* self, PyObject* args)
if
(
state
.
charsize
==
1
)
{
if
(
state
.
charsize
==
1
)
{
status
=
sre_match
(
&
state
,
PatternObject_GetCode
(
self
));
status
=
sre_match
(
&
state
,
PatternObject_GetCode
(
self
));
}
else
{
}
else
{
#if defined(HAVE_UNICODE)
status
=
sre_umatch
(
&
state
,
PatternObject_GetCode
(
self
));
status
=
sre_umatch
(
&
state
,
PatternObject_GetCode
(
self
));
#endif
}
}
_stack_free
(
&
state
);
_stack_free
(
&
state
);
...
@@ -1117,14 +1229,16 @@ _pattern_search(PatternObject* self, PyObject* args)
...
@@ -1117,14 +1229,16 @@ _pattern_search(PatternObject* self, PyObject* args)
PyObject
*
string
;
PyObject
*
string
;
int
status
;
int
status
;
string
=
_setup
(
&
state
,
args
);
string
=
_setup
(
&
state
,
self
,
args
);
if
(
!
string
)
if
(
!
string
)
return
NULL
;
return
NULL
;
if
(
state
.
charsize
==
1
)
{
if
(
state
.
charsize
==
1
)
{
status
=
sre_search
(
&
state
,
PatternObject_GetCode
(
self
));
status
=
sre_search
(
&
state
,
PatternObject_GetCode
(
self
));
}
else
{
}
else
{
#if defined(HAVE_UNICODE)
status
=
sre_usearch
(
&
state
,
PatternObject_GetCode
(
self
));
status
=
sre_usearch
(
&
state
,
PatternObject_GetCode
(
self
));
#endif
}
}
_stack_free
(
&
state
);
_stack_free
(
&
state
);
...
@@ -1140,7 +1254,7 @@ call(char* function, PyObject* args)
...
@@ -1140,7 +1254,7 @@ call(char* function, PyObject* args)
PyObject
*
func
;
PyObject
*
func
;
PyObject
*
result
;
PyObject
*
result
;
name
=
PyString_FromString
(
"sre"
);
name
=
PyString_FromString
(
MODULE
);
if
(
!
name
)
if
(
!
name
)
return
NULL
;
return
NULL
;
module
=
PyImport_Import
(
name
);
module
=
PyImport_Import
(
name
);
...
@@ -1203,46 +1317,47 @@ _pattern_findall(PatternObject* self, PyObject* args)
...
@@ -1203,46 +1317,47 @@ _pattern_findall(PatternObject* self, PyObject* args)
PyObject
*
list
;
PyObject
*
list
;
int
status
;
int
status
;
string
=
_setup
(
&
state
,
args
);
string
=
_setup
(
&
state
,
self
,
args
);
if
(
!
string
)
if
(
!
string
)
return
NULL
;
return
NULL
;
list
=
PyList_New
(
0
);
list
=
PyList_New
(
0
);
while
(
state
.
start
<
state
.
end
)
{
while
(
state
.
start
<
=
state
.
end
)
{
PyObject
*
item
;
PyObject
*
item
;
state
.
ptr
=
state
.
start
;
state
.
ptr
=
state
.
start
;
if
(
state
.
charsize
==
1
)
{
if
(
state
.
charsize
==
1
)
{
status
=
sre_
mat
ch
(
&
state
,
PatternObject_GetCode
(
self
));
status
=
sre_
sear
ch
(
&
state
,
PatternObject_GetCode
(
self
));
}
else
{
}
else
{
status
=
sre_umatch
(
&
state
,
PatternObject_GetCode
(
self
));
#if defined(HAVE_UNICODE)
status
=
sre_usearch
(
&
state
,
PatternObject_GetCode
(
self
));
#endif
}
}
if
(
status
>=
0
)
{
if
(
status
>
0
)
{
if
(
status
==
0
)
state
.
ptr
=
(
void
*
)
((
char
*
)
state
.
start
+
1
);
/* FIXME: if one group is defined, slice that group
instead. if multiple groups are defined, add tuple
containing all slices */
item
=
PySequence_GetSlice
(
item
=
PySequence_GetSlice
(
string
,
string
,
((
char
*
)
state
.
start
-
(
char
*
)
state
.
beginning
)
,
((
char
*
)
state
.
start
-
(
char
*
)
state
.
beginning
)
/
state
.
charsize
,
((
char
*
)
state
.
ptr
-
(
char
*
)
state
.
beginning
)
);
((
char
*
)
state
.
ptr
-
(
char
*
)
state
.
beginning
)
/
state
.
charsize
);
if
(
!
item
)
if
(
!
item
)
goto
error
;
goto
error
;
if
(
PyList_Append
(
list
,
item
)
<
0
)
if
(
PyList_Append
(
list
,
item
)
<
0
)
goto
error
;
goto
error
;
state
.
start
=
state
.
ptr
;
if
(
state
.
ptr
==
state
.
start
)
state
.
start
=
(
void
*
)
((
char
*
)
state
.
ptr
+
state
.
charsize
);
else
state
.
start
=
state
.
ptr
;
}
else
{
}
else
{
if
(
status
==
0
)
break
;
/* internal error */
/* internal error */
PyErr_SetString
(
PyErr_SetString
(
PyExc_RuntimeError
,
PyExc_RuntimeError
,
...
@@ -1347,20 +1462,26 @@ getslice_by_index(MatchObject* self, int index)
...
@@ -1347,20 +1462,26 @@ getslice_by_index(MatchObject* self, int index)
);
);
}
}
static
PyObject
*
static
int
get
slice
(
MatchObject
*
self
,
PyObject
*
index
)
get
index
(
MatchObject
*
self
,
PyObject
*
index
)
{
{
if
(
!
PyInt_Check
(
index
)
&&
self
->
pattern
->
groupindex
!=
NULL
)
{
if
(
!
PyInt_Check
(
index
)
&&
self
->
pattern
->
groupindex
!=
NULL
)
{
/* FIXME: resource leak? */
/* FIXME: resource leak? */
index
=
PyObject_GetItem
(
self
->
pattern
->
groupindex
,
index
);
index
=
PyObject_GetItem
(
self
->
pattern
->
groupindex
,
index
);
if
(
!
index
)
if
(
!
index
)
return
NULL
;
return
-
1
;
}
}
if
(
PyInt_Check
(
index
))
if
(
PyInt_Check
(
index
))
return
getslice_by_index
(
self
,
(
int
)
PyInt_AS_LONG
(
index
)
);
return
(
int
)
PyInt_AS_LONG
(
index
);
return
getslice_by_index
(
self
,
-
1
);
/* signal error */
return
-
1
;
}
static
PyObject
*
getslice
(
MatchObject
*
self
,
PyObject
*
index
)
{
return
getslice_by_index
(
self
,
getindex
(
self
,
index
));
}
}
static
PyObject
*
static
PyObject
*
...
@@ -1441,10 +1562,10 @@ _match_groupdict(MatchObject* self, PyObject* args)
...
@@ -1441,10 +1562,10 @@ _match_groupdict(MatchObject* self, PyObject* args)
if
(
!
keys
)
if
(
!
keys
)
return
NULL
;
return
NULL
;
for
(
index
=
0
;
index
<
Py
Sequence_Length
(
keys
);
index
++
)
{
for
(
index
=
0
;
index
<
Py
List_GET_SIZE
(
keys
);
index
++
)
{
PyObject
*
key
;
PyObject
*
key
;
PyObject
*
item
;
PyObject
*
item
;
key
=
Py
Sequence_GetItem
(
keys
,
index
);
key
=
Py
List_GET_ITEM
(
keys
,
index
);
if
(
!
key
)
{
if
(
!
key
)
{
Py_DECREF
(
keys
);
Py_DECREF
(
keys
);
Py_DECREF
(
result
);
Py_DECREF
(
result
);
...
@@ -1469,10 +1590,14 @@ _match_groupdict(MatchObject* self, PyObject* args)
...
@@ -1469,10 +1590,14 @@ _match_groupdict(MatchObject* self, PyObject* args)
static
PyObject
*
static
PyObject
*
_match_start
(
MatchObject
*
self
,
PyObject
*
args
)
_match_start
(
MatchObject
*
self
,
PyObject
*
args
)
{
{
int
index
=
0
;
int
index
;
if
(
!
PyArg_ParseTuple
(
args
,
"|i"
,
&
index
))
PyObject
*
index_
=
Py_False
;
if
(
!
PyArg_ParseTuple
(
args
,
"|O"
,
&
index_
))
return
NULL
;
return
NULL
;
index
=
getindex
(
self
,
index_
);
if
(
index
<
0
||
index
>=
self
->
groups
)
{
if
(
index
<
0
||
index
>=
self
->
groups
)
{
PyErr_SetString
(
PyErr_SetString
(
PyExc_IndexError
,
PyExc_IndexError
,
...
@@ -1492,10 +1617,14 @@ _match_start(MatchObject* self, PyObject* args)
...
@@ -1492,10 +1617,14 @@ _match_start(MatchObject* self, PyObject* args)
static
PyObject
*
static
PyObject
*
_match_end
(
MatchObject
*
self
,
PyObject
*
args
)
_match_end
(
MatchObject
*
self
,
PyObject
*
args
)
{
{
int
index
=
0
;
int
index
;
if
(
!
PyArg_ParseTuple
(
args
,
"|i"
,
&
index
))
PyObject
*
index_
=
Py_False
;
if
(
!
PyArg_ParseTuple
(
args
,
"|O"
,
&
index_
))
return
NULL
;
return
NULL
;
index
=
getindex
(
self
,
index_
);
if
(
index
<
0
||
index
>=
self
->
groups
)
{
if
(
index
<
0
||
index
>=
self
->
groups
)
{
PyErr_SetString
(
PyErr_SetString
(
PyExc_IndexError
,
PyExc_IndexError
,
...
@@ -1515,10 +1644,14 @@ _match_end(MatchObject* self, PyObject* args)
...
@@ -1515,10 +1644,14 @@ _match_end(MatchObject* self, PyObject* args)
static
PyObject
*
static
PyObject
*
_match_span
(
MatchObject
*
self
,
PyObject
*
args
)
_match_span
(
MatchObject
*
self
,
PyObject
*
args
)
{
{
int
index
=
0
;
int
index
;
if
(
!
PyArg_ParseTuple
(
args
,
"|i"
,
&
index
))
PyObject
*
index_
=
Py_False
;
if
(
!
PyArg_ParseTuple
(
args
,
"|O"
,
&
index_
))
return
NULL
;
return
NULL
;
index
=
getindex
(
self
,
index_
);
if
(
index
<
0
||
index
>=
self
->
groups
)
{
if
(
index
<
0
||
index
>=
self
->
groups
)
{
PyErr_SetString
(
PyErr_SetString
(
PyExc_IndexError
,
PyExc_IndexError
,
...
@@ -1615,16 +1748,18 @@ _cursor_match(CursorObject* self, PyObject* args)
...
@@ -1615,16 +1748,18 @@ _cursor_match(CursorObject* self, PyObject* args)
if
(
state
->
charsize
==
1
)
{
if
(
state
->
charsize
==
1
)
{
status
=
sre_match
(
state
,
PatternObject_GetCode
(
self
->
pattern
));
status
=
sre_match
(
state
,
PatternObject_GetCode
(
self
->
pattern
));
}
else
{
}
else
{
#if defined(HAVE_UNICODE)
status
=
sre_umatch
(
state
,
PatternObject_GetCode
(
self
->
pattern
));
status
=
sre_umatch
(
state
,
PatternObject_GetCode
(
self
->
pattern
));
#endif
}
}
match
=
_pattern_new_match
((
PatternObject
*
)
self
->
pattern
,
match
=
_pattern_new_match
((
PatternObject
*
)
self
->
pattern
,
state
,
self
->
string
,
status
);
state
,
self
->
string
,
status
);
if
(
status
>=
0
)
if
(
status
==
0
||
state
->
ptr
==
state
->
start
)
state
->
start
=
state
->
ptr
;
state
->
start
=
(
void
*
)
((
char
*
)
state
->
ptr
+
state
->
charsize
)
;
else
else
state
->
start
=
(
char
*
)
state
->
ptr
+
state
->
charsize
;
state
->
start
=
state
->
ptr
;
return
match
;
return
match
;
}
}
...
@@ -1642,7 +1777,9 @@ _cursor_search(CursorObject* self, PyObject* args)
...
@@ -1642,7 +1777,9 @@ _cursor_search(CursorObject* self, PyObject* args)
if
(
state
->
charsize
==
1
)
{
if
(
state
->
charsize
==
1
)
{
status
=
sre_search
(
state
,
PatternObject_GetCode
(
self
->
pattern
));
status
=
sre_search
(
state
,
PatternObject_GetCode
(
self
->
pattern
));
}
else
{
}
else
{
#if defined(HAVE_UNICODE)
status
=
sre_usearch
(
state
,
PatternObject_GetCode
(
self
->
pattern
));
status
=
sre_usearch
(
state
,
PatternObject_GetCode
(
self
->
pattern
));
#endif
}
}
match
=
_pattern_new_match
((
PatternObject
*
)
self
->
pattern
,
match
=
_pattern_new_match
((
PatternObject
*
)
self
->
pattern
,
...
@@ -1693,12 +1830,13 @@ statichere PyTypeObject Cursor_Type = {
...
@@ -1693,12 +1830,13 @@ statichere PyTypeObject Cursor_Type = {
static
PyMethodDef
_functions
[]
=
{
static
PyMethodDef
_functions
[]
=
{
{
"compile"
,
_compile
,
1
},
{
"compile"
,
_compile
,
1
},
{
"getcodesize"
,
_getcodesize
,
1
},
{
"getcodesize"
,
sre_codesize
,
1
},
{
"getlower"
,
sre_lower
,
1
},
{
NULL
,
NULL
}
{
NULL
,
NULL
}
};
};
void
void
#if
def WIN32
#if
defined(WIN32)
__declspec
(
dllexport
)
__declspec
(
dllexport
)
#endif
#endif
init_sre
()
init_sre
()
...
@@ -1707,7 +1845,7 @@ init_sre()
...
@@ -1707,7 +1845,7 @@ init_sre()
Pattern_Type
.
ob_type
=
Match_Type
.
ob_type
=
Pattern_Type
.
ob_type
=
Match_Type
.
ob_type
=
Cursor_Type
.
ob_type
=
&
PyType_Type
;
Cursor_Type
.
ob_type
=
&
PyType_Type
;
Py_InitModule
(
"_
sre"
,
_functions
);
Py_InitModule
(
"_
"
MODULE
,
_functions
);
}
}
#endif
#endif
/* !defined(SRE_RECURSIVE) */
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment