Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
a01a2ee9
Kaydet (Commit)
a01a2ee9
authored
Eyl 03, 2004
tarafından
Gustavo Niemeyer
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
Applying modified version of patch #1018386, which fixes
some escaping bugs in SRE.
üst
ab9351bf
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
91 additions
and
43 deletions
+91
-43
libre.tex
Doc/lib/libre.tex
+2
-1
sre_parse.py
Lib/sre_parse.py
+36
-42
test_re.py
Lib/test/test_re.py
+53
-0
No files found.
Doc/lib/libre.tex
Dosyayı görüntüle @
a01a2ee9
...
...
@@ -387,7 +387,8 @@ also accepted by the regular expression parser:
Octal escapes are included in a limited form: If the first digit is a
0, or if there are three octal digits, it is considered an octal
escape. Otherwise, it is a group reference.
escape. Otherwise, it is a group reference. As for string literals,
octal escapes are always at most three digits in length.
% Note the lack of a period in the section title; it causes problems
...
...
Lib/sre_parse.py
Dosyayı görüntüle @
a01a2ee9
...
...
@@ -217,21 +217,11 @@ def isname(name):
# check that group name is a valid string
if
not
isident
(
name
[
0
]):
return
False
for
char
in
name
:
for
char
in
name
[
1
:]
:
if
not
isident
(
char
)
and
not
isdigit
(
char
):
return
False
return
True
def
_group
(
escape
,
groups
):
# check if the escape string represents a valid group
try
:
gid
=
int
(
escape
[
1
:])
if
gid
and
gid
<
groups
:
return
gid
except
ValueError
:
pass
return
None
# not a valid group
def
_class_escape
(
source
,
escape
):
# handle escape code inside character class
code
=
ESCAPES
.
get
(
escape
)
...
...
@@ -241,7 +231,8 @@ def _class_escape(source, escape):
if
code
:
return
code
try
:
if
escape
[
1
:
2
]
==
"x"
:
c
=
escape
[
1
:
2
]
if
c
==
"x"
:
# hexadecimal escape (exactly two digits)
while
source
.
next
in
HEXDIGITS
and
len
(
escape
)
<
4
:
escape
=
escape
+
source
.
get
()
...
...
@@ -249,12 +240,14 @@ def _class_escape(source, escape):
if
len
(
escape
)
!=
2
:
raise
error
,
"bogus escape:
%
s"
%
repr
(
"
\\
"
+
escape
)
return
LITERAL
,
int
(
escape
,
16
)
&
0xff
elif
escape
[
1
:
2
]
in
OCTDIGITS
:
elif
c
in
OCTDIGITS
:
# octal escape (up to three digits)
while
source
.
next
in
OCTDIGITS
and
len
(
escape
)
<
5
:
while
source
.
next
in
OCTDIGITS
and
len
(
escape
)
<
4
:
escape
=
escape
+
source
.
get
()
escape
=
escape
[
1
:]
return
LITERAL
,
int
(
escape
,
8
)
&
0xff
elif
c
in
DIGITS
:
raise
error
,
"bogus escape:
%
s"
%
repr
(
escape
)
if
len
(
escape
)
==
2
:
return
LITERAL
,
ord
(
escape
[
1
])
except
ValueError
:
...
...
@@ -270,19 +263,20 @@ def _escape(source, escape, state):
if
code
:
return
code
try
:
if
escape
[
1
:
2
]
==
"x"
:
c
=
escape
[
1
:
2
]
if
c
==
"x"
:
# hexadecimal escape
while
source
.
next
in
HEXDIGITS
and
len
(
escape
)
<
4
:
escape
=
escape
+
source
.
get
()
if
len
(
escape
)
!=
4
:
raise
ValueError
return
LITERAL
,
int
(
escape
[
2
:],
16
)
&
0xff
elif
escape
[
1
:
2
]
==
"0"
:
elif
c
==
"0"
:
# octal escape
while
source
.
next
in
OCTDIGITS
and
len
(
escape
)
<
4
:
escape
=
escape
+
source
.
get
()
return
LITERAL
,
int
(
escape
[
1
:],
8
)
&
0xff
elif
escape
[
1
:
2
]
in
DIGITS
:
elif
c
in
DIGITS
:
# octal escape *or* decimal group reference (sigh)
if
source
.
next
in
DIGITS
:
escape
=
escape
+
source
.
get
()
...
...
@@ -291,9 +285,9 @@ def _escape(source, escape, state):
# got three octal digits; this is an octal escape
escape
=
escape
+
source
.
get
()
return
LITERAL
,
int
(
escape
[
1
:],
8
)
&
0xff
#
got at least one decimal digit;
this is a group reference
group
=
_group
(
escape
,
state
.
groups
)
if
group
:
#
not an octal escape, so
this is a group reference
group
=
int
(
escape
[
1
:]
)
if
group
<
state
.
groups
:
if
not
state
.
checkgroup
(
group
):
raise
error
,
"cannot refer to open group"
return
GROUPREF
,
group
...
...
@@ -709,7 +703,8 @@ def parse_template(source, pattern):
break
# end of replacement string
if
this
and
this
[
0
]
==
"
\\
"
:
# group
if
this
==
"
\\
g"
:
c
=
this
[
1
:
2
]
if
c
==
"g"
:
name
=
""
if
s
.
match
(
"<"
):
while
1
:
...
...
@@ -723,6 +718,8 @@ def parse_template(source, pattern):
raise
error
,
"bad group name"
try
:
index
=
int
(
name
)
if
index
<
0
:
raise
error
,
"negative group number"
except
ValueError
:
if
not
isname
(
name
):
raise
error
,
"bad character in group name"
...
...
@@ -731,26 +728,23 @@ def parse_template(source, pattern):
except
KeyError
:
raise
IndexError
,
"unknown group name"
a
((
MARK
,
index
))
elif
len
(
this
)
>
1
and
this
[
1
]
in
DIGITS
:
code
=
None
while
1
:
group
=
_group
(
this
,
pattern
.
groups
+
1
)
if
group
:
if
(
s
.
next
not
in
DIGITS
or
not
_group
(
this
+
s
.
next
,
pattern
.
groups
+
1
)):
code
=
MARK
,
group
break
elif
s
.
next
in
OCTDIGITS
:
elif
c
==
"0"
:
if
s
.
next
in
OCTDIGITS
:
this
=
this
+
sget
()
else
:
break
if
not
code
:
this
=
this
[
1
:]
code
=
LITERAL
,
makechar
(
int
(
this
[
-
6
:],
8
)
&
0xff
)
if
code
[
0
]
is
LITERAL
:
literal
(
code
[
1
])
else
:
a
(
code
)
if
s
.
next
in
OCTDIGITS
:
this
=
this
+
sget
()
literal
(
makechar
(
int
(
this
[
1
:],
8
)
&
0xff
))
elif
c
in
DIGITS
:
isoctal
=
False
if
s
.
next
in
DIGITS
:
this
=
this
+
sget
()
if
(
c
in
OCTDIGITS
and
s
.
next
in
OCTDIGITS
and
this
[
2
]
in
OCTDIGITS
):
this
=
this
+
sget
()
isoctal
=
True
literal
(
makechar
(
int
(
this
[
1
:],
8
)
&
0xff
))
if
not
isoctal
:
a
((
MARK
,
int
(
this
[
1
:])))
else
:
try
:
this
=
makechar
(
ESCAPES
[
this
][
1
])
...
...
@@ -782,7 +776,7 @@ def expand_template(template, match):
for
index
,
group
in
groups
:
literals
[
index
]
=
s
=
g
(
group
)
if
s
is
None
:
raise
IndexError
raise
error
,
"unmatched group"
except
IndexError
:
raise
error
,
"
empty group
"
raise
error
,
"
invalid group reference
"
return
sep
.
join
(
literals
)
Lib/test/test_re.py
Dosyayı görüntüle @
a01a2ee9
...
...
@@ -83,6 +83,48 @@ class ReTests(unittest.TestCase):
self
.
assertEqual
(
re
.
sub
(
'
\r\n
'
,
'
\n
'
,
'abc
\r\n
def
\r\n
'
),
'abc
\n
def
\n
'
)
def
test_sub_template_numeric_escape
(
self
):
# bug 776311 and friends
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\0'
,
'x'
),
'
\0
'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\000'
,
'x'
),
'
\000
'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\001'
,
'x'
),
'
\001
'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\008'
,
'x'
),
'
\0
'
+
'8'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\009'
,
'x'
),
'
\0
'
+
'9'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\111'
,
'x'
),
'
\111
'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\117'
,
'x'
),
'
\117
'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\1111'
,
'x'
),
'
\111
1'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\1111'
,
'x'
),
'
\111
'
+
'1'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\00'
,
'x'
),
'
\x00
'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\07'
,
'x'
),
'
\x07
'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\08'
,
'x'
),
'
\0
'
+
'8'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\09'
,
'x'
),
'
\0
'
+
'9'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\0a'
,
'x'
),
'
\0
'
+
'a'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\400'
,
'x'
),
'
\0
'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\777'
,
'x'
),
'
\377
'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\1'
,
'x'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\8'
,
'x'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\9'
,
'x'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\11'
,
'x'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\18'
,
'x'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\1a'
,
'x'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\90'
,
'x'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\99'
,
'x'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\118'
,
'x'
)
# r'\11' + '8'
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\11a'
,
'x'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\181'
,
'x'
)
# r'\18' + '1'
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\800'
,
'x'
)
# r'\80' + '0'
# in python2.3 (etc), these loop endlessly in sre_parser.py
self
.
assertEqual
(
re
.
sub
(
'(((((((((((x)))))))))))'
,
r'\11'
,
'x'
),
'x'
)
self
.
assertEqual
(
re
.
sub
(
'((((((((((y))))))))))(.)'
,
r'\118'
,
'xyz'
),
'xz8'
)
self
.
assertEqual
(
re
.
sub
(
'((((((((((y))))))))))(.)'
,
r'\11a'
,
'xyz'
),
'xza'
)
def
test_qualified_re_sub
(
self
):
self
.
assertEqual
(
re
.
sub
(
'a'
,
'b'
,
'aaaaa'
),
'bbbbb'
)
self
.
assertEqual
(
re
.
sub
(
'a'
,
'b'
,
'aaaaa'
,
1
),
'baaaa'
)
...
...
@@ -105,6 +147,7 @@ class ReTests(unittest.TestCase):
self
.
assertRaises
(
IndexError
,
re
.
sub
,
'(?P<a>x)'
,
'
\
g<ab>'
,
'xx'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'(?P<a>x)|(?P<b>y)'
,
'
\
g<b>'
,
'xx'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'(?P<a>x)|(?P<b>y)'
,
'
\\
2'
,
'xx'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'(?P<a>x)'
,
'
\
g<-1>'
,
'xx'
)
def
test_re_subn
(
self
):
self
.
assertEqual
(
re
.
subn
(
"(?i)b+"
,
"x"
,
"bbbb BBBB"
),
(
'x x'
,
2
))
...
...
@@ -386,6 +429,16 @@ class ReTests(unittest.TestCase):
self
.
assertNotEqual
(
re
.
match
(
r"\x
%02
xz"
%
i
,
chr
(
i
)
+
"z"
),
None
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
"
\
911"
,
""
)
def
test_sre_character_class_literals
(
self
):
for
i
in
[
0
,
8
,
16
,
32
,
64
,
127
,
128
,
255
]:
self
.
assertNotEqual
(
re
.
match
(
r"[\%03o]"
%
i
,
chr
(
i
)),
None
)
self
.
assertNotEqual
(
re
.
match
(
r"[\%03o0]"
%
i
,
chr
(
i
)),
None
)
self
.
assertNotEqual
(
re
.
match
(
r"[\%03o8]"
%
i
,
chr
(
i
)),
None
)
self
.
assertNotEqual
(
re
.
match
(
r"[\x
%02
x]"
%
i
,
chr
(
i
)),
None
)
self
.
assertNotEqual
(
re
.
match
(
r"[\x
%02
x0]"
%
i
,
chr
(
i
)),
None
)
self
.
assertNotEqual
(
re
.
match
(
r"[\x
%02
xz]"
%
i
,
chr
(
i
)),
None
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
"[
\
911]"
,
""
)
def
test_bug_113254
(
self
):
self
.
assertEqual
(
re
.
match
(
r'(a)|(b)'
,
'b'
)
.
start
(
1
),
-
1
)
self
.
assertEqual
(
re
.
match
(
r'(a)|(b)'
,
'b'
)
.
end
(
1
),
-
1
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment