Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
436c3d58
Kaydet (Commit)
436c3d58
authored
Haz 29, 2000
tarafından
Fredrik Lundh
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
towards 1.6b1
üst
102f3ad6
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
184 additions
and
90 deletions
+184
-90
sre.py
Lib/sre.py
+17
-6
sre_compile.py
Lib/sre_compile.py
+39
-27
sre_constants.py
Lib/sre_constants.py
+50
-22
sre_parse.py
Lib/sre_parse.py
+78
-35
_sre.c
Modules/_sre.c
+0
-0
No files found.
Lib/sre.py
Dosyayı görüntüle @
436c3d58
...
...
@@ -12,6 +12,7 @@
#
import
sre_compile
import
sre_parse
# flags
I
=
IGNORECASE
=
sre_compile
.
SRE_FLAG_IGNORECASE
...
...
@@ -20,6 +21,13 @@ M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE
S
=
DOTALL
=
sre_compile
.
SRE_FLAG_DOTALL
X
=
VERBOSE
=
sre_compile
.
SRE_FLAG_VERBOSE
# sre extensions (may or may not be in 1.6 final)
T
=
TEMPLATE
=
sre_compile
.
SRE_FLAG_TEMPLATE
U
=
UNICODE
=
sre_compile
.
SRE_FLAG_UNICODE
# sre exception
error
=
sre_parse
.
error
# --------------------------------------------------------------------
# public interface
...
...
@@ -46,6 +54,9 @@ def findall(pattern, string, maxsplit=0):
def
compile
(
pattern
,
flags
=
0
):
return
_compile
(
pattern
,
flags
)
def
template
(
pattern
,
flags
=
0
):
return
_compile
(
pattern
,
flags
|
T
)
def
escape
(
pattern
):
s
=
list
(
pattern
)
for
i
in
range
(
len
(
pattern
)):
...
...
@@ -83,18 +94,14 @@ def _sub(pattern, template, string, count=0):
# internal: pattern.sub implementation hook
return
_subn
(
pattern
,
template
,
string
,
count
)[
0
]
def
_expand
(
match
,
template
):
# internal: expand template
return
template
# FIXME
def
_subn
(
pattern
,
template
,
string
,
count
=
0
):
# internal: pattern.subn implementation hook
if
callable
(
template
):
filter
=
template
else
:
# FIXME: prepare template
template
=
sre_parse
.
parse_template
(
template
,
pattern
)
def
filter
(
match
,
template
=
template
):
return
_expand
(
match
,
template
)
return
sre_parse
.
expand_template
(
template
,
match
)
n
=
i
=
0
s
=
[]
append
=
s
.
append
...
...
@@ -108,6 +115,8 @@ def _subn(pattern, template, string, count=0):
append
(
string
[
i
:
j
])
append
(
filter
(
m
))
i
=
m
.
end
()
if
i
<=
j
:
break
n
=
n
+
1
if
i
<
len
(
string
):
append
(
string
[
i
:])
...
...
@@ -126,6 +135,8 @@ def _split(pattern, string, maxsplit=0):
j
=
m
.
start
()
append
(
string
[
i
:
j
])
i
=
m
.
end
()
if
i
<=
j
:
break
n
=
n
+
1
if
i
<
len
(
string
):
append
(
string
[
i
:])
...
...
Lib/sre_compile.py
Dosyayı görüntüle @
436c3d58
...
...
@@ -48,7 +48,7 @@ class Code:
print
self
.
data
raise
def
_compile
(
code
,
pattern
,
flags
,
level
=
0
):
def
_compile
(
code
,
pattern
,
flags
):
append
=
code
.
append
for
op
,
av
in
pattern
:
if
op
is
ANY
:
...
...
@@ -70,23 +70,26 @@ def _compile(code, pattern, flags, level=0):
tail
=
[]
for
av
in
av
[
1
]:
skip
=
len
(
code
);
append
(
0
)
_compile
(
code
,
av
,
flags
,
level
)
append
(
OPCODES
[
JUMP
])
tail
.
append
(
len
(
code
));
append
(
0
)
_compile
(
code
,
av
,
flags
)
## append(OPCODES[SUCCESS])
append
(
OPCODES
[
JUMP
])
tail
.
append
(
len
(
code
));
append
(
0
)
code
[
skip
]
=
len
(
code
)
-
skip
append
(
0
)
# end of branch
for
tail
in
tail
:
for
tail
in
tail
:
code
[
tail
]
=
len
(
code
)
-
tail
elif
op
is
CALL
:
append
(
OPCODES
[
op
])
skip
=
len
(
code
);
append
(
0
)
_compile
(
code
,
av
,
flags
,
level
+
1
)
_compile
(
code
,
av
,
flags
)
append
(
OPCODES
[
SUCCESS
])
code
[
skip
]
=
len
(
code
)
-
skip
elif
op
is
CATEGORY
:
# not used by current parser
elif
op
is
CATEGORY
:
append
(
OPCODES
[
op
])
if
flags
&
SRE_FLAG_LOCALE
:
append
(
CH_LOCALE
[
CHCODES
[
av
]])
elif
flags
&
SRE_FLAG_UNICODE
:
append
(
CH_UNICODE
[
CHCODES
[
av
]])
else
:
append
(
CHCODES
[
av
])
elif
op
is
GROUP
:
...
...
@@ -98,8 +101,8 @@ def _compile(code, pattern, flags, level=0):
elif
op
is
IN
:
if
flags
&
SRE_FLAG_IGNORECASE
:
append
(
OPCODES
[
OP_IGNORE
[
op
]])
def
fixup
(
literal
):
return
ord
(
literal
.
lower
()
)
def
fixup
(
literal
,
flags
=
flags
):
return
_sre
.
getlower
(
ord
(
literal
),
flags
)
else
:
append
(
OPCODES
[
op
])
fixup
=
ord
...
...
@@ -116,6 +119,8 @@ def _compile(code, pattern, flags, level=0):
elif
op
is
CATEGORY
:
if
flags
&
SRE_FLAG_LOCALE
:
append
(
CH_LOCALE
[
CHCODES
[
av
]])
elif
flags
&
SRE_FLAG_UNICODE
:
append
(
CH_UNICODE
[
CHCODES
[
av
]])
else
:
append
(
CHCODES
[
av
])
else
:
...
...
@@ -125,42 +130,49 @@ def _compile(code, pattern, flags, level=0):
elif
op
in
(
LITERAL
,
NOT_LITERAL
):
if
flags
&
SRE_FLAG_IGNORECASE
:
append
(
OPCODES
[
OP_IGNORE
[
op
]])
append
(
ord
(
av
.
lower
()))
else
:
append
(
OPCODES
[
op
])
append
(
ord
(
av
))
append
(
ord
(
av
))
elif
op
is
MARK
:
append
(
OPCODES
[
op
])
append
(
av
)
elif
op
in
(
REPEAT
,
MIN_REPEAT
,
MAX_REPEAT
):
lo
,
hi
=
av
[
2
]
.
getwidth
()
if
lo
==
0
:
raise
SyntaxError
,
"cannot repeat zero-width items"
if
lo
==
hi
==
1
and
op
is
MAX_REPEAT
:
append
(
OPCODES
[
MAX_REPEAT_ONE
])
if
flags
&
SRE_FLAG_TEMPLATE
:
append
(
OPCODES
[
REPEAT
])
skip
=
len
(
code
);
append
(
0
)
append
(
av
[
0
])
append
(
av
[
1
])
_compile
(
code
,
av
[
2
],
flags
,
level
+
1
)
_compile
(
code
,
av
[
2
],
flags
)
append
(
OPCODES
[
SUCCESS
])
code
[
skip
]
=
len
(
code
)
-
skip
else
:
append
(
OPCODES
[
op
])
skip
=
len
(
code
);
append
(
0
)
append
(
av
[
0
])
append
(
av
[
1
])
_compile
(
code
,
av
[
2
],
flags
,
level
+
1
)
if
op
is
MIN_REPEAT
:
append
(
OPCODES
[
MIN_UNTIL
])
lo
,
hi
=
av
[
2
]
.
getwidth
()
if
lo
==
0
:
raise
error
,
"nothing to repeat"
if
0
and
lo
==
hi
==
1
and
op
is
MAX_REPEAT
:
# FIXME: <fl> need a better way to figure out when
# it's safe to use this one (in the parser, probably)
append
(
OPCODES
[
MAX_REPEAT_ONE
])
skip
=
len
(
code
);
append
(
0
)
append
(
av
[
0
])
append
(
av
[
1
])
_compile
(
code
,
av
[
2
],
flags
)
append
(
OPCODES
[
SUCCESS
])
code
[
skip
]
=
len
(
code
)
-
skip
else
:
append
(
OPCODES
[
MAX_UNTIL
])
code
[
skip
]
=
len
(
code
)
-
skip
append
(
OPCODES
[
op
])
skip
=
len
(
code
);
append
(
0
)
append
(
av
[
0
])
append
(
av
[
1
])
_compile
(
code
,
av
[
2
],
flags
)
append
(
OPCODES
[
SUCCESS
])
code
[
skip
]
=
len
(
code
)
-
skip
elif
op
is
SUBPATTERN
:
group
=
av
[
0
]
if
group
:
append
(
OPCODES
[
MARK
])
append
((
group
-
1
)
*
2
)
_compile
(
code
,
av
[
1
],
flags
,
level
+
1
)
_compile
(
code
,
av
[
1
],
flags
)
if
group
:
append
(
OPCODES
[
MARK
])
append
((
group
-
1
)
*
2
+
1
)
...
...
Lib/sre_constants.py
Dosyayı görüntüle @
436c3d58
...
...
@@ -15,6 +15,11 @@
# other compatibility work.
#
# should this really be here?
class
error
(
Exception
):
pass
# operators
FAILURE
=
"failure"
...
...
@@ -30,20 +35,20 @@ GROUP = "group"
GROUP_IGNORE
=
"group_ignore"
IN
=
"in"
IN_IGNORE
=
"in_ignore"
INFO
=
"info"
JUMP
=
"jump"
LITERAL
=
"literal"
LITERAL_IGNORE
=
"literal_ignore"
MARK
=
"mark"
MAX_REPEAT
=
"max_repeat"
MAX_REPEAT_ONE
=
"max_repeat_one"
MAX_UNTIL
=
"max_until"
MIN_REPEAT
=
"min_repeat"
MIN_UNTIL
=
"min_until"
NEGATE
=
"negate"
NOT_LITERAL
=
"not_literal"
NOT_LITERAL_IGNORE
=
"not_literal_ignore"
RANGE
=
"range"
REPEAT
=
"repeat"
REPEAT_ONE
=
"repeat_one"
SUBPATTERN
=
"subpattern"
# positions
...
...
@@ -63,14 +68,16 @@ CATEGORY_WORD = "category_word"
CATEGORY_NOT_WORD
=
"category_not_word"
CATEGORY_LINEBREAK
=
"category_linebreak"
CATEGORY_NOT_LINEBREAK
=
"category_not_linebreak"
CATEGORY_LOC_DIGIT
=
"category_loc_digit"
CATEGORY_LOC_NOT_DIGIT
=
"category_loc_not_digit"
CATEGORY_LOC_SPACE
=
"category_loc_space"
CATEGORY_LOC_NOT_SPACE
=
"category_loc_not_space"
CATEGORY_LOC_WORD
=
"category_loc_word"
CATEGORY_LOC_NOT_WORD
=
"category_loc_not_word"
CATEGORY_LOC_LINEBREAK
=
"category_loc_linebreak"
CATEGORY_LOC_NOT_LINEBREAK
=
"category_loc_not_linebreak"
CATEGORY_UNI_DIGIT
=
"category_uni_digit"
CATEGORY_UNI_NOT_DIGIT
=
"category_uni_not_digit"
CATEGORY_UNI_SPACE
=
"category_uni_space"
CATEGORY_UNI_NOT_SPACE
=
"category_uni_not_space"
CATEGORY_UNI_WORD
=
"category_uni_word"
CATEGORY_UNI_NOT_WORD
=
"category_uni_not_word"
CATEGORY_UNI_LINEBREAK
=
"category_uni_linebreak"
CATEGORY_UNI_NOT_LINEBREAK
=
"category_uni_not_linebreak"
OPCODES
=
[
...
...
@@ -85,12 +92,13 @@ OPCODES = [
CATEGORY
,
GROUP
,
GROUP_IGNORE
,
IN
,
IN_IGNORE
,
INFO
,
JUMP
,
LITERAL
,
LITERAL_IGNORE
,
MARK
,
MAX_REPEAT
,
MAX_UNTIL
,
MAX_REPEAT
,
MAX_REPEAT_ONE
,
MIN_REPEAT
,
MIN_UNTIL
,
MIN_REPEAT
,
NOT_LITERAL
,
NOT_LITERAL_IGNORE
,
NEGATE
,
RANGE
,
...
...
@@ -106,10 +114,11 @@ ATCODES = [
CHCODES
=
[
CATEGORY_DIGIT
,
CATEGORY_NOT_DIGIT
,
CATEGORY_SPACE
,
CATEGORY_NOT_SPACE
,
CATEGORY_WORD
,
CATEGORY_NOT_WORD
,
CATEGORY_LINEBREAK
,
CATEGORY_NOT_LINEBREAK
,
CATEGORY_LOC_DIGIT
,
CATEGORY_LOC_NOT_DIGIT
,
CATEGORY_LOC_SPACE
,
CATEGORY_LOC_NOT_SPACE
,
CATEGORY_LOC_WORD
,
CATEGORY_LOC_NOT_WORD
,
CATEGORY_LOC_LINEBREAK
,
CATEGORY_LOC_NOT_LINEBREAK
CATEGORY_LINEBREAK
,
CATEGORY_NOT_LINEBREAK
,
CATEGORY_LOC_WORD
,
CATEGORY_LOC_NOT_WORD
,
CATEGORY_UNI_DIGIT
,
CATEGORY_UNI_NOT_DIGIT
,
CATEGORY_UNI_SPACE
,
CATEGORY_UNI_NOT_SPACE
,
CATEGORY_UNI_WORD
,
CATEGORY_UNI_NOT_WORD
,
CATEGORY_UNI_LINEBREAK
,
CATEGORY_UNI_NOT_LINEBREAK
]
def
makedict
(
list
):
...
...
@@ -138,23 +147,35 @@ AT_MULTILINE = {
}
CH_LOCALE
=
{
CATEGORY_DIGIT
:
CATEGORY_
LOC_
DIGIT
,
CATEGORY_NOT_DIGIT
:
CATEGORY_
LOC_
NOT_DIGIT
,
CATEGORY_SPACE
:
CATEGORY_
LOC_
SPACE
,
CATEGORY_NOT_SPACE
:
CATEGORY_
LOC_
NOT_SPACE
,
CATEGORY_DIGIT
:
CATEGORY_DIGIT
,
CATEGORY_NOT_DIGIT
:
CATEGORY_NOT_DIGIT
,
CATEGORY_SPACE
:
CATEGORY_SPACE
,
CATEGORY_NOT_SPACE
:
CATEGORY_NOT_SPACE
,
CATEGORY_WORD
:
CATEGORY_LOC_WORD
,
CATEGORY_NOT_WORD
:
CATEGORY_LOC_NOT_WORD
,
CATEGORY_LINEBREAK
:
CATEGORY_LOC_LINEBREAK
,
CATEGORY_NOT_LINEBREAK
:
CATEGORY_LOC_NOT_LINEBREAK
CATEGORY_LINEBREAK
:
CATEGORY_LINEBREAK
,
CATEGORY_NOT_LINEBREAK
:
CATEGORY_NOT_LINEBREAK
}
CH_UNICODE
=
{
CATEGORY_DIGIT
:
CATEGORY_UNI_DIGIT
,
CATEGORY_NOT_DIGIT
:
CATEGORY_UNI_NOT_DIGIT
,
CATEGORY_SPACE
:
CATEGORY_UNI_SPACE
,
CATEGORY_NOT_SPACE
:
CATEGORY_UNI_NOT_SPACE
,
CATEGORY_WORD
:
CATEGORY_UNI_WORD
,
CATEGORY_NOT_WORD
:
CATEGORY_UNI_NOT_WORD
,
CATEGORY_LINEBREAK
:
CATEGORY_UNI_LINEBREAK
,
CATEGORY_NOT_LINEBREAK
:
CATEGORY_UNI_NOT_LINEBREAK
}
# flags
SRE_FLAG_TEMPLATE
=
1
# NYI
SRE_FLAG_TEMPLATE
=
1
SRE_FLAG_IGNORECASE
=
2
SRE_FLAG_LOCALE
=
4
SRE_FLAG_MULTILINE
=
8
SRE_FLAG_DOTALL
=
16
SRE_FLAG_VERBOSE
=
32
SRE_FLAG_UNICODE
=
32
SRE_FLAG_VERBOSE
=
64
if
__name__
==
"__main__"
:
import
string
...
...
@@ -168,5 +189,12 @@ if __name__ == "__main__":
dump
(
f
,
OPCODES
,
"SRE_OP"
)
dump
(
f
,
ATCODES
,
"SRE"
)
dump
(
f
,
CHCODES
,
"SRE"
)
f
.
write
(
"#define SRE_FLAG_TEMPLATE
%
d
\n
"
%
SRE_FLAG_TEMPLATE
)
f
.
write
(
"#define SRE_FLAG_IGNORECASE
%
d
\n
"
%
SRE_FLAG_IGNORECASE
)
f
.
write
(
"#define SRE_FLAG_LOCALE
%
d
\n
"
%
SRE_FLAG_LOCALE
)
f
.
write
(
"#define SRE_FLAG_MULTILINE
%
d
\n
"
%
SRE_FLAG_MULTILINE
)
f
.
write
(
"#define SRE_FLAG_DOTALL
%
d
\n
"
%
SRE_FLAG_DOTALL
)
f
.
write
(
"#define SRE_FLAG_UNICODE
%
d
\n
"
%
SRE_FLAG_UNICODE
)
f
.
write
(
"#define SRE_FLAG_VERBOSE
%
d
\n
"
%
SRE_FLAG_VERBOSE
)
f
.
close
()
print
"done"
Lib/sre_parse.py
Dosyayı görüntüle @
436c3d58
...
...
@@ -20,14 +20,15 @@ import _sre
from
sre_constants
import
*
# FIXME:
should be 65535, but the array module currently chokes on
#
unsigned integers larger than 32767...
# FIXME:
<fl> should be 65535, but the array module currently chokes
#
on unsigned integers larger than 32767 [fixed in 1.6b1?]
MAXREPEAT
=
int
(
2L
**
(
_sre
.
getcodesize
()
*
8
-
1
))
-
1
SPECIAL_CHARS
=
".
\\
[{()*+?^$|"
REPEAT_CHARS
=
"*+?{"
# FIXME: string in tuple tests may explode with if char is unicode :-(
# FIXME: <fl> string in tuple tests may explode with if char is
# unicode [fixed in 1.6b1?]
DIGITS
=
tuple
(
string
.
digits
)
OCTDIGITS
=
tuple
(
"01234567"
)
...
...
@@ -59,12 +60,15 @@ CATEGORIES = {
}
FLAGS
=
{
# standard flags
"i"
:
SRE_FLAG_IGNORECASE
,
"L"
:
SRE_FLAG_LOCALE
,
"m"
:
SRE_FLAG_MULTILINE
,
"s"
:
SRE_FLAG_DOTALL
,
"t"
:
SRE_FLAG_TEMPLATE
,
"x"
:
SRE_FLAG_VERBOSE
,
# extensions
"t"
:
SRE_FLAG_TEMPLATE
,
"u"
:
SRE_FLAG_UNICODE
,
}
class
State
:
...
...
@@ -151,7 +155,7 @@ class Tokenizer:
try
:
c
=
self
.
string
[
self
.
index
+
1
]
except
IndexError
:
raise
SyntaxE
rror
,
"bogus escape"
raise
e
rror
,
"bogus escape"
char
=
char
+
c
self
.
index
=
self
.
index
+
len
(
char
)
return
char
...
...
@@ -205,7 +209,7 @@ def _class_escape(source, escape):
return
LITERAL
,
escape
[
1
]
except
ValueError
:
pass
raise
SyntaxE
rror
,
"bogus escape:
%
s"
%
repr
(
escape
)
raise
e
rror
,
"bogus escape:
%
s"
%
repr
(
escape
)
def
_escape
(
source
,
escape
,
state
):
# handle escape code in expression
...
...
@@ -241,13 +245,12 @@ def _escape(source, escape, state):
return
LITERAL
,
escape
[
1
]
except
ValueError
:
pass
raise
SyntaxE
rror
,
"bogus escape:
%
s"
%
repr
(
escape
)
raise
e
rror
,
"bogus escape:
%
s"
%
repr
(
escape
)
def
_branch
(
pattern
,
items
):
# form a branch operator from a set of items (FIXME: move this
# optimization to the compiler module!)
# form a branch operator from a set of items
subpattern
=
SubPattern
(
pattern
)
...
...
@@ -332,7 +335,7 @@ def _parse(source, state, flags=0):
elif
this
:
code1
=
LITERAL
,
this
else
:
raise
SyntaxE
rror
,
"unexpected end of regular expression"
raise
e
rror
,
"unexpected end of regular expression"
if
source
.
match
(
"-"
):
# potential range
this
=
source
.
get
()
...
...
@@ -346,9 +349,9 @@ def _parse(source, state, flags=0):
else
:
code2
=
LITERAL
,
this
if
code1
[
0
]
!=
LITERAL
or
code2
[
0
]
!=
LITERAL
:
raise
SyntaxE
rror
,
"illegal range"
raise
e
rror
,
"illegal range"
if
len
(
code1
[
1
])
!=
1
or
len
(
code2
[
1
])
!=
1
:
raise
SyntaxE
rror
,
"illegal range"
raise
e
rror
,
"illegal range"
set
.
append
((
RANGE
,
(
code1
[
1
],
code2
[
1
])))
else
:
if
code1
[
0
]
is
IN
:
...
...
@@ -383,19 +386,19 @@ def _parse(source, state, flags=0):
else
:
hi
=
lo
if
not
source
.
match
(
"}"
):
raise
SyntaxE
rror
,
"bogus range"
raise
e
rror
,
"bogus range"
if
lo
:
min
=
int
(
lo
)
if
hi
:
max
=
int
(
hi
)
# FIXME: <fl> check that hi >= lo!
else
:
raise
SyntaxE
rror
,
"not supported"
raise
e
rror
,
"not supported"
# figure out which item to repeat
if
subpattern
:
item
=
subpattern
[
-
1
:]
else
:
raise
SyntaxE
rror
,
"nothing to repeat"
raise
e
rror
,
"nothing to repeat"
if
source
.
match
(
"?"
):
subpattern
[
-
1
]
=
(
MIN_REPEAT
,
(
min
,
max
,
item
))
else
:
...
...
@@ -418,7 +421,7 @@ def _parse(source, state, flags=0):
while
1
:
char
=
source
.
get
()
if
char
is
None
:
raise
SyntaxE
rror
,
"unterminated name"
raise
e
rror
,
"unterminated name"
if
char
==
">"
:
break
# FIXME: check for valid character
...
...
@@ -426,22 +429,21 @@ def _parse(source, state, flags=0):
group
=
1
elif
source
.
match
(
"="
):
# named backreference
raise
SyntaxError
,
"not yet implemented"
raise
error
,
"not yet implemented"
else
:
char
=
source
.
get
()
if
char
is
None
:
raise
SyntaxE
rror
,
"unexpected end of pattern"
raise
SyntaxE
rror
,
"unknown specifier: ?P
%
s"
%
char
raise
e
rror
,
"unexpected end of pattern"
raise
e
rror
,
"unknown specifier: ?P
%
s"
%
char
elif
source
.
match
(
":"
):
# non-capturing group
group
=
2
elif
source
.
match
(
"#"
):
# comment
while
1
:
char
=
source
.
get
()
if
char
is
None
or
char
==
")"
:
if
source
.
next
is
None
or
source
.
next
==
")"
:
break
source
.
get
()
else
:
# flags
while
FLAGS
.
has_key
(
source
.
next
):
...
...
@@ -465,13 +467,13 @@ def _parse(source, state, flags=0):
elif
source
.
match
(
"|"
):
b
.
append
(
p
)
else
:
raise
SyntaxE
rror
,
"group not properly closed"
raise
e
rror
,
"group not properly closed"
else
:
while
1
:
char
=
source
.
get
()
if
char
is
None
or
char
==
")"
:
break
# FIXME: skip characters?
raise
error
,
"unknown extension"
elif
this
==
"^"
:
subpattern
.
append
((
AT
,
AT_BEGINNING
))
...
...
@@ -484,7 +486,7 @@ def _parse(source, state, flags=0):
subpattern
.
append
(
code
)
else
:
raise
SyntaxE
rror
,
"parser error"
raise
e
rror
,
"parser error"
return
subpattern
...
...
@@ -499,17 +501,17 @@ def parse(pattern, flags=0):
if
tail
==
"|"
:
b
.
append
(
p
)
elif
tail
==
")"
:
raise
SyntaxE
rror
,
"unbalanced parenthesis"
raise
e
rror
,
"unbalanced parenthesis"
elif
tail
is
None
:
if
b
:
b
.
append
(
p
)
p
=
_branch
(
state
,
b
)
break
else
:
raise
SyntaxE
rror
,
"bogus characters at end of regular expression"
raise
e
rror
,
"bogus characters at end of regular expression"
return
p
def
parse_
replacement
(
source
,
pattern
):
def
parse_
template
(
source
,
pattern
):
# parse 're' replacement string into list of literals and
# group references
s
=
Tokenizer
(
source
)
...
...
@@ -520,15 +522,56 @@ def parse_replacement(source, pattern):
if
this
is
None
:
break
# end of replacement string
if
this
and
this
[
0
]
==
"
\\
"
:
try
:
a
(
LITERAL
,
ESCAPES
[
this
])
except
KeyError
:
for
char
in
this
:
a
(
LITERAL
,
char
)
if
this
==
"
\\
g"
:
name
=
""
if
s
.
match
(
"<"
):
while
1
:
char
=
s
.
get
()
if
char
is
None
:
raise
error
,
"unterminated index"
if
char
==
">"
:
break
# FIXME: check for valid character
name
=
name
+
char
if
not
name
:
raise
error
,
"bad index"
try
:
index
=
int
(
name
)
except
ValueError
:
try
:
index
=
pattern
.
groupindex
[
name
]
except
KeyError
:
raise
IndexError
,
"unknown index"
a
((
MARK
,
index
))
elif
len
(
this
)
>
1
and
this
[
1
]
in
DIGITS
:
while
s
.
next
in
DIGITS
:
this
=
this
+
s
.
get
()
a
((
MARK
,
int
(
this
[
1
:])))
else
:
try
:
a
(
ESCAPES
[
this
])
except
KeyError
:
for
char
in
this
:
a
((
LITERAL
,
char
))
else
:
a
(
LITERAL
,
this
)
a
(
(
LITERAL
,
this
)
)
return
p
def
expand_template
(
template
,
match
):
# FIXME: <fl> this is sooooo slow. drop in the slicelist
# code instead
p
=
[]
a
=
p
.
append
for
c
,
s
in
template
:
if
c
is
LITERAL
:
a
(
s
)
elif
c
is
MARK
:
s
=
match
.
group
(
s
)
if
s
is
None
:
raise
error
,
"empty group"
a
(
s
)
return
match
.
string
[:
0
]
.
join
(
p
)
if
__name__
==
"__main__"
:
from
pprint
import
pprint
from
testpatterns
import
PATTERNS
...
...
@@ -548,7 +591,7 @@ if __name__ == "__main__":
except
:
pass
a
=
a
+
1
except
SyntaxE
rror
,
v
:
except
e
rror
,
v
:
print
"**"
,
repr
(
pattern
),
v
b
=
b
+
1
print
"-"
*
68
...
...
Modules/_sre.c
Dosyayı görüntüle @
436c3d58
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment