Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
ad3fc44c
Kaydet (Commit)
ad3fc44c
authored
Eki 17, 2003
tarafından
Gustavo Niemeyer
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
Implemented non-recursive SRE matching.
üst
41e2809f
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
101 additions
and
33 deletions
+101
-33
libre.tex
Doc/lib/libre.tex
+9
-0
sre_compile.py
Lib/sre_compile.py
+13
-0
sre_constants.py
Lib/sre_constants.py
+3
-2
sre_parse.py
Lib/sre_parse.py
+39
-1
test_re.py
Lib/test/test_re.py
+7
-7
NEWS
Misc/NEWS
+4
-0
_sre.c
Modules/_sre.c
+0
-0
sre.h
Modules/sre.h
+6
-4
sre_constants.h
Modules/sre_constants.h
+20
-19
No files found.
Doc/lib/libre.tex
Dosyayı görüntüle @
ad3fc44c
...
...
@@ -297,6 +297,15 @@ assertions, the contained pattern must only match strings of some
fixed length. Patterns which start with negative lookbehind
assertions may match at the beginning of the string being searched.
\item
[\code{(?(\var{id/name})yes-pattern|no-pattern)}]
Will try to match
with
\regexp
{
yes-pattern
}
if the group with given
\var
{
id
}
or
\var
{
name
}
exists, and with
\regexp
{
no-pattern
}
if it doesn't.
\regexp
{
|no-pattern
}
is optional and can be omitted. For example,
\regexp
{
(<)?(
\e
w+@
\e
w+(?:
\e
.
\e
w+)+)(?(1)>)
}
is a poor email matching
pattern, which will match with
\code
{
'<user@host.com>'
}
as well as
\code
{
'user@host.com'
}
, but not with
\code
{
'<user@host.com'
}
.
\versionadded
{
2.3
}
\end{list}
The special sequences consist of
\character
{
\e
}
and a character from the
...
...
Lib/sre_compile.py
Dosyayı görüntüle @
ad3fc44c
...
...
@@ -145,6 +145,19 @@ def _compile(code, pattern, flags):
else
:
emit
(
OPCODES
[
op
])
emit
(
av
-
1
)
elif
op
is
GROUPREF_EXISTS
:
emit
(
OPCODES
[
op
])
emit
((
av
[
0
]
-
1
)
*
2
)
skipyes
=
len
(
code
);
emit
(
0
)
_compile
(
code
,
av
[
1
],
flags
)
if
av
[
2
]:
emit
(
OPCODES
[
JUMP
])
skipno
=
len
(
code
);
emit
(
0
)
code
[
skipyes
]
=
len
(
code
)
-
skipyes
+
1
_compile
(
code
,
av
[
2
],
flags
)
code
[
skipno
]
=
len
(
code
)
-
skipno
else
:
code
[
skipyes
]
=
len
(
code
)
-
skipyes
+
1
else
:
raise
ValueError
,
(
"unsupported operand type"
,
op
)
...
...
Lib/sre_constants.py
Dosyayı görüntüle @
ad3fc44c
...
...
@@ -13,7 +13,7 @@
# update when constants are added or removed
MAGIC
=
2003
0419
MAGIC
=
2003
1017
# max code word in this release
...
...
@@ -42,6 +42,7 @@ CATEGORY = "category"
CHARSET
=
"charset"
GROUPREF
=
"groupref"
GROUPREF_IGNORE
=
"groupref_ignore"
GROUPREF_EXISTS
=
"groupref_exists"
IN
=
"in"
IN_IGNORE
=
"in_ignore"
INFO
=
"info"
...
...
@@ -108,7 +109,7 @@ OPCODES = [
CALL
,
CATEGORY
,
CHARSET
,
BIGCHARSET
,
GROUPREF
,
GROUPREF_IGNORE
,
GROUPREF
,
GROUPREF_
EXISTS
,
GROUPREF_
IGNORE
,
IN
,
IN_IGNORE
,
INFO
,
JUMP
,
...
...
Lib/sre_parse.py
Dosyayı görüntüle @
ad3fc44c
...
...
@@ -364,6 +364,20 @@ def _parse_sub(source, state, nested=1):
subpattern
.
append
((
BRANCH
,
(
None
,
items
)))
return
subpattern
def
_parse_sub_cond
(
source
,
state
,
condgroup
):
item_yes
=
_parse
(
source
,
state
)
if
source
.
match
(
"|"
):
item_no
=
_parse
(
source
,
state
)
if
source
.
match
(
"|"
):
raise
error
,
"conditional backref with more than two branches"
else
:
item_no
=
None
if
source
.
next
and
not
source
.
match
(
")"
,
0
):
raise
error
,
"pattern not properly closed"
subpattern
=
SubPattern
(
state
)
subpattern
.
append
((
GROUPREF_EXISTS
,
(
condgroup
,
item_yes
,
item_no
)))
return
subpattern
def
_parse
(
source
,
state
):
# parse a simple pattern
...
...
@@ -499,6 +513,7 @@ def _parse(source, state):
elif
this
==
"("
:
group
=
1
name
=
None
condgroup
=
None
if
source
.
match
(
"?"
):
group
=
0
# options
...
...
@@ -568,6 +583,26 @@ def _parse(source, state):
else
:
subpattern
.
append
((
ASSERT_NOT
,
(
dir
,
p
)))
continue
elif
source
.
match
(
"("
):
# conditional backreference group
condname
=
""
while
1
:
char
=
source
.
get
()
if
char
is
None
:
raise
error
,
"unterminated name"
if
char
==
")"
:
break
condname
=
condname
+
char
group
=
2
if
isname
(
condname
):
condgroup
=
state
.
groupdict
.
get
(
condname
)
if
condgroup
is
None
:
raise
error
,
"unknown group name"
else
:
try
:
condgroup
=
atoi
(
condname
)
except
ValueError
:
raise
error
,
"bad character in group name"
else
:
# flags
if
not
source
.
next
in
FLAGS
:
...
...
@@ -581,7 +616,10 @@ def _parse(source, state):
group
=
None
else
:
group
=
state
.
opengroup
(
name
)
p
=
_parse_sub
(
source
,
state
)
if
condgroup
:
p
=
_parse_sub_cond
(
source
,
state
,
condgroup
)
else
:
p
=
_parse_sub
(
source
,
state
)
if
not
source
.
match
(
")"
):
raise
error
,
"unbalanced parenthesis"
if
group
is
not
None
:
...
...
Lib/test/test_re.py
Dosyayı görüntüle @
ad3fc44c
...
...
@@ -169,7 +169,6 @@ class ReTests(unittest.TestCase):
self
.
assertEqual
(
pat
.
match
(
'ac'
)
.
group
(
1
,
'b2'
,
3
),
(
'a'
,
None
,
'c'
))
def
test_re_groupref_exists
(
self
):
return
# not yet
self
.
assertEqual
(
re
.
match
(
'^(
\
()?([^()]+)(?(1)
\
))$'
,
'(a)'
)
.
groups
(),
(
'('
,
'a'
))
self
.
assertEqual
(
re
.
match
(
'^(
\
()?([^()]+)(?(1)
\
))$'
,
'a'
)
.
groups
(),
...
...
@@ -405,19 +404,20 @@ class ReTests(unittest.TestCase):
self
.
assertEqual
(
re
.
match
(
'.*?cd'
,
5000
*
'ab'
+
'c'
+
5000
*
'ab'
+
'cde'
)
.
end
(
0
),
20003
)
self
.
assertEqual
(
re
.
match
(
'.*?cd'
,
20000
*
'abc'
+
'de'
)
.
end
(
0
),
60001
)
# non-simple '*?' still recurses and hits the recursion limit
self
.
assertRaises
(
RuntimeError
,
re
.
search
,
'(a|b)*?c'
,
10000
*
'ab'
+
'cd'
)
# non-simple '*?' still used to hit the recursion limit, before the
# non-recursive scheme was implemented.
self
.
assertEqual
(
re
.
search
(
'(a|b)*?c'
,
10000
*
'ab'
+
'cd'
)
.
end
(
0
),
20001
)
def
test_bug_612074
(
self
):
pat
=
u"["
+
re
.
escape
(
u"
\u2039
"
)
+
u"]"
self
.
assertEqual
(
re
.
compile
(
pat
)
and
1
,
1
)
def
test_stack_overflow
(
self
):
# nasty case
that overflows
the straightforward recursive
# nasty case
s that used to overflow
the straightforward recursive
# implementation of repeated groups.
self
.
assert
Raises
(
RuntimeError
,
re
.
match
,
'(x)*'
,
50000
*
'x'
)
self
.
assert
Raises
(
RuntimeError
,
re
.
match
,
'(x)*y'
,
50000
*
'x'
+
'y
'
)
self
.
assert
Raises
(
RuntimeError
,
re
.
match
,
'(x)*?y'
,
50000
*
'x'
+
'y
'
)
self
.
assert
Equal
(
re
.
match
(
'(x)*'
,
50000
*
'x'
)
.
group
(
1
),
'x'
)
self
.
assert
Equal
(
re
.
match
(
'(x)*y'
,
50000
*
'x'
+
'y'
)
.
group
(
1
),
'x
'
)
self
.
assert
Equal
(
re
.
match
(
'(x)*?y'
,
50000
*
'x'
+
'y'
)
.
group
(
1
),
'x
'
)
def
test_scanner
(
self
):
def
s_ident
(
scanner
,
token
):
return
token
...
...
Misc/NEWS
Dosyayı görüntüle @
ad3fc44c
...
...
@@ -61,6 +61,10 @@ Extension modules
-
Bug
#
814613
:
INET_ADDRSTRLEN
fix
needed
for
all
compilers
on
SGI
-
Implemented
non
-
recursive
SRE
matching
scheme
(#
757624
).
-
Implemented
(?(
id
/
name
)
yes
|
no
)
support
in
SRE
(#
572936
).
Library
-------
...
...
Modules/_sre.c
Dosyayı görüntüle @
ad3fc44c
This diff is collapsed.
Click to expand it.
Modules/sre.h
Dosyayı görüntüle @
ad3fc44c
...
...
@@ -55,6 +55,7 @@ typedef unsigned int (*SRE_TOLOWER_HOOK)(unsigned int ch);
typedef
struct
SRE_REPEAT_T
{
int
count
;
SRE_CODE
*
pattern
;
/* points to REPEAT operator arguments */
void
*
last_ptr
;
/* helper to check for infinite loops */
struct
SRE_REPEAT_T
*
prev
;
/* points to previous repeat context */
}
SRE_REPEAT
;
...
...
@@ -74,10 +75,11 @@ typedef struct {
int
lastmark
;
void
*
mark
[
SRE_MARK_SIZE
];
/* dynamically allocated stuff */
void
**
mark_stack
;
int
mark_stack_size
;
int
mark_stack_base
;
SRE_REPEAT
*
repeat
;
/* current repeat context */
char
*
data_stack
;
int
data_stack_size
;
int
data_stack_base
;
/* current repeat context */
SRE_REPEAT
*
repeat
;
/* hooks */
SRE_TOLOWER_HOOK
lower
;
}
SRE_STATE
;
...
...
Modules/sre_constants.h
Dosyayı görüntüle @
ad3fc44c
...
...
@@ -11,7 +11,7 @@
* See the _sre.c file for information on usage and redistribution.
*/
#define SRE_MAGIC 2003
0419
#define SRE_MAGIC 2003
1017
#define SRE_OP_FAILURE 0
#define SRE_OP_SUCCESS 1
#define SRE_OP_ANY 2
...
...
@@ -25,24 +25,25 @@
#define SRE_OP_CHARSET 10
#define SRE_OP_BIGCHARSET 11
#define SRE_OP_GROUPREF 12
#define SRE_OP_GROUPREF_IGNORE 13
#define SRE_OP_IN 14
#define SRE_OP_IN_IGNORE 15
#define SRE_OP_INFO 16
#define SRE_OP_JUMP 17
#define SRE_OP_LITERAL 18
#define SRE_OP_LITERAL_IGNORE 19
#define SRE_OP_MARK 20
#define SRE_OP_MAX_UNTIL 21
#define SRE_OP_MIN_UNTIL 22
#define SRE_OP_NOT_LITERAL 23
#define SRE_OP_NOT_LITERAL_IGNORE 24
#define SRE_OP_NEGATE 25
#define SRE_OP_RANGE 26
#define SRE_OP_REPEAT 27
#define SRE_OP_REPEAT_ONE 28
#define SRE_OP_SUBPATTERN 29
#define SRE_OP_MIN_REPEAT_ONE 30
#define SRE_OP_GROUPREF_EXISTS 13
#define SRE_OP_GROUPREF_IGNORE 14
#define SRE_OP_IN 15
#define SRE_OP_IN_IGNORE 16
#define SRE_OP_INFO 17
#define SRE_OP_JUMP 18
#define SRE_OP_LITERAL 19
#define SRE_OP_LITERAL_IGNORE 20
#define SRE_OP_MARK 21
#define SRE_OP_MAX_UNTIL 22
#define SRE_OP_MIN_UNTIL 23
#define SRE_OP_NOT_LITERAL 24
#define SRE_OP_NOT_LITERAL_IGNORE 25
#define SRE_OP_NEGATE 26
#define SRE_OP_RANGE 27
#define SRE_OP_REPEAT 28
#define SRE_OP_REPEAT_ONE 29
#define SRE_OP_SUBPATTERN 30
#define SRE_OP_MIN_REPEAT_ONE 31
#define SRE_AT_BEGINNING 0
#define SRE_AT_BEGINNING_LINE 1
#define SRE_AT_BEGINNING_STRING 2
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment