Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
632a77e6
Kaydet (Commit)
632a77e6
authored
Mar 25, 2015
tarafından
Serhiy Storchaka
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
Issue #22364: Improved some re error messages using regex for hints.
üst
7c316a18
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
285 additions
and
181 deletions
+285
-181
re.py
Lib/re.py
+1
-1
sre_compile.py
Lib/sre_compile.py
+3
-3
sre_parse.py
Lib/sre_parse.py
+97
-100
test_re.py
Lib/test/test_re.py
+179
-74
NEWS
Misc/NEWS
+2
-0
_sre.c
Modules/_sre.c
+3
-3
No files found.
Lib/re.py
Dosyayı görüntüle @
632a77e6
...
@@ -286,7 +286,7 @@ def _compile(pattern, flags):
...
@@ -286,7 +286,7 @@ def _compile(pattern, flags):
if
isinstance
(
pattern
,
_pattern_type
):
if
isinstance
(
pattern
,
_pattern_type
):
if
flags
:
if
flags
:
raise
ValueError
(
raise
ValueError
(
"
C
annot process flags argument with a compiled pattern"
)
"
c
annot process flags argument with a compiled pattern"
)
return
pattern
return
pattern
if
not
sre_compile
.
isstring
(
pattern
):
if
not
sre_compile
.
isstring
(
pattern
):
raise
TypeError
(
"first argument must be string or compiled pattern"
)
raise
TypeError
(
"first argument must be string or compiled pattern"
)
...
...
Lib/sre_compile.py
Dosyayı görüntüle @
632a77e6
...
@@ -113,7 +113,7 @@ def _compile(code, pattern, flags):
...
@@ -113,7 +113,7 @@ def _compile(code, pattern, flags):
emit
(
ANY
)
emit
(
ANY
)
elif
op
in
REPEATING_CODES
:
elif
op
in
REPEATING_CODES
:
if
flags
&
SRE_FLAG_TEMPLATE
:
if
flags
&
SRE_FLAG_TEMPLATE
:
raise
error
(
"internal: unsupported template operator
"
)
raise
error
(
"internal: unsupported template operator
%
r"
%
(
op
,)
)
elif
_simple
(
av
)
and
op
is
not
REPEAT
:
elif
_simple
(
av
)
and
op
is
not
REPEAT
:
if
op
is
MAX_REPEAT
:
if
op
is
MAX_REPEAT
:
emit
(
REPEAT_ONE
)
emit
(
REPEAT_ONE
)
...
@@ -216,7 +216,7 @@ def _compile(code, pattern, flags):
...
@@ -216,7 +216,7 @@ def _compile(code, pattern, flags):
else
:
else
:
code
[
skipyes
]
=
_len
(
code
)
-
skipyes
+
1
code
[
skipyes
]
=
_len
(
code
)
-
skipyes
+
1
else
:
else
:
raise
ValueError
(
"unsupported operand type"
,
op
)
raise
error
(
"internal: unsupported operand type
%
r"
%
(
op
,)
)
def
_compile_charset
(
charset
,
flags
,
code
,
fixup
=
None
,
fixes
=
None
):
def
_compile_charset
(
charset
,
flags
,
code
,
fixup
=
None
,
fixes
=
None
):
# compile charset subprogram
# compile charset subprogram
...
@@ -242,7 +242,7 @@ def _compile_charset(charset, flags, code, fixup=None, fixes=None):
...
@@ -242,7 +242,7 @@ def _compile_charset(charset, flags, code, fixup=None, fixes=None):
else
:
else
:
emit
(
av
)
emit
(
av
)
else
:
else
:
raise
error
(
"internal: unsupported set operator
"
)
raise
error
(
"internal: unsupported set operator
%
r"
%
(
op
,)
)
emit
(
FAILURE
)
emit
(
FAILURE
)
def
_optimize_charset
(
charset
,
fixup
,
fixes
):
def
_optimize_charset
(
charset
,
fixup
,
fixes
):
...
...
Lib/sre_parse.py
Dosyayı görüntüle @
632a77e6
...
@@ -79,7 +79,7 @@ class Pattern:
...
@@ -79,7 +79,7 @@ class Pattern:
gid
=
self
.
groups
gid
=
self
.
groups
self
.
subpatterns
.
append
(
None
)
self
.
subpatterns
.
append
(
None
)
if
self
.
groups
>
MAXGROUPS
:
if
self
.
groups
>
MAXGROUPS
:
raise
error
(
"
groups number is too large
"
)
raise
error
(
"
too many groups
"
)
if
name
is
not
None
:
if
name
is
not
None
:
ogid
=
self
.
groupdict
.
get
(
name
,
None
)
ogid
=
self
.
groupdict
.
get
(
name
,
None
)
if
ogid
is
not
None
:
if
ogid
is
not
None
:
...
@@ -235,7 +235,7 @@ class Tokenizer:
...
@@ -235,7 +235,7 @@ class Tokenizer:
try
:
try
:
char
+=
self
.
decoded_string
[
index
]
char
+=
self
.
decoded_string
[
index
]
except
IndexError
:
except
IndexError
:
raise
error
(
"b
ogus escape (end of line
)"
,
raise
error
(
"b
ad escape (end of pattern
)"
,
self
.
string
,
len
(
self
.
string
)
-
1
)
from
None
self
.
string
,
len
(
self
.
string
)
-
1
)
from
None
self
.
index
=
index
+
1
self
.
index
=
index
+
1
self
.
next
=
char
self
.
next
=
char
...
@@ -263,8 +263,13 @@ class Tokenizer:
...
@@ -263,8 +263,13 @@ class Tokenizer:
c
=
self
.
next
c
=
self
.
next
self
.
__next
()
self
.
__next
()
if
c
is
None
:
if
c
is
None
:
raise
self
.
error
(
"unterminated name"
)
if
not
result
:
raise
self
.
error
(
"missing group name"
)
raise
self
.
error
(
"missing
%
s, unterminated name"
%
terminator
,
len
(
result
))
if
c
==
terminator
:
if
c
==
terminator
:
if
not
result
:
raise
self
.
error
(
"missing group name"
,
1
)
break
break
result
+=
c
result
+=
c
return
result
return
result
...
@@ -318,19 +323,19 @@ def _class_escape(source, escape):
...
@@ -318,19 +323,19 @@ def _class_escape(source, escape):
# hexadecimal escape (exactly two digits)
# hexadecimal escape (exactly two digits)
escape
+=
source
.
getwhile
(
2
,
HEXDIGITS
)
escape
+=
source
.
getwhile
(
2
,
HEXDIGITS
)
if
len
(
escape
)
!=
4
:
if
len
(
escape
)
!=
4
:
raise
ValueError
raise
source
.
error
(
"incomplete escape
%
s"
%
escape
,
len
(
escape
))
return
LITERAL
,
int
(
escape
[
2
:],
16
)
return
LITERAL
,
int
(
escape
[
2
:],
16
)
elif
c
==
"u"
and
source
.
istext
:
elif
c
==
"u"
and
source
.
istext
:
# unicode escape (exactly four digits)
# unicode escape (exactly four digits)
escape
+=
source
.
getwhile
(
4
,
HEXDIGITS
)
escape
+=
source
.
getwhile
(
4
,
HEXDIGITS
)
if
len
(
escape
)
!=
6
:
if
len
(
escape
)
!=
6
:
raise
ValueError
raise
source
.
error
(
"incomplete escape
%
s"
%
escape
,
len
(
escape
))
return
LITERAL
,
int
(
escape
[
2
:],
16
)
return
LITERAL
,
int
(
escape
[
2
:],
16
)
elif
c
==
"U"
and
source
.
istext
:
elif
c
==
"U"
and
source
.
istext
:
# unicode escape (exactly eight digits)
# unicode escape (exactly eight digits)
escape
+=
source
.
getwhile
(
8
,
HEXDIGITS
)
escape
+=
source
.
getwhile
(
8
,
HEXDIGITS
)
if
len
(
escape
)
!=
10
:
if
len
(
escape
)
!=
10
:
raise
ValueError
raise
source
.
error
(
"incomplete escape
%
s"
%
escape
,
len
(
escape
))
c
=
int
(
escape
[
2
:],
16
)
c
=
int
(
escape
[
2
:],
16
)
chr
(
c
)
# raise ValueError for invalid code
chr
(
c
)
# raise ValueError for invalid code
return
LITERAL
,
c
return
LITERAL
,
c
...
@@ -339,7 +344,7 @@ def _class_escape(source, escape):
...
@@ -339,7 +344,7 @@ def _class_escape(source, escape):
escape
+=
source
.
getwhile
(
2
,
OCTDIGITS
)
escape
+=
source
.
getwhile
(
2
,
OCTDIGITS
)
c
=
int
(
escape
[
1
:],
8
)
c
=
int
(
escape
[
1
:],
8
)
if
c
>
0
o377
:
if
c
>
0
o377
:
raise
source
.
error
(
'octal escape value
%
r
outside of '
raise
source
.
error
(
'octal escape value
%
s
outside of '
'range 0-0o377'
%
escape
,
len
(
escape
))
'range 0-0o377'
%
escape
,
len
(
escape
))
return
LITERAL
,
c
return
LITERAL
,
c
elif
c
in
DIGITS
:
elif
c
in
DIGITS
:
...
@@ -352,7 +357,7 @@ def _class_escape(source, escape):
...
@@ -352,7 +357,7 @@ def _class_escape(source, escape):
return
LITERAL
,
ord
(
escape
[
1
])
return
LITERAL
,
ord
(
escape
[
1
])
except
ValueError
:
except
ValueError
:
pass
pass
raise
source
.
error
(
"b
ogus escape:
%
r
"
%
escape
,
len
(
escape
))
raise
source
.
error
(
"b
ad escape
%
s
"
%
escape
,
len
(
escape
))
def
_escape
(
source
,
escape
,
state
):
def
_escape
(
source
,
escape
,
state
):
# handle escape code in expression
# handle escape code in expression
...
@@ -368,19 +373,19 @@ def _escape(source, escape, state):
...
@@ -368,19 +373,19 @@ def _escape(source, escape, state):
# hexadecimal escape
# hexadecimal escape
escape
+=
source
.
getwhile
(
2
,
HEXDIGITS
)
escape
+=
source
.
getwhile
(
2
,
HEXDIGITS
)
if
len
(
escape
)
!=
4
:
if
len
(
escape
)
!=
4
:
raise
ValueError
raise
source
.
error
(
"incomplete escape
%
s"
%
escape
,
len
(
escape
))
return
LITERAL
,
int
(
escape
[
2
:],
16
)
return
LITERAL
,
int
(
escape
[
2
:],
16
)
elif
c
==
"u"
and
source
.
istext
:
elif
c
==
"u"
and
source
.
istext
:
# unicode escape (exactly four digits)
# unicode escape (exactly four digits)
escape
+=
source
.
getwhile
(
4
,
HEXDIGITS
)
escape
+=
source
.
getwhile
(
4
,
HEXDIGITS
)
if
len
(
escape
)
!=
6
:
if
len
(
escape
)
!=
6
:
raise
ValueError
raise
source
.
error
(
"incomplete escape
%
s"
%
escape
,
len
(
escape
))
return
LITERAL
,
int
(
escape
[
2
:],
16
)
return
LITERAL
,
int
(
escape
[
2
:],
16
)
elif
c
==
"U"
and
source
.
istext
:
elif
c
==
"U"
and
source
.
istext
:
# unicode escape (exactly eight digits)
# unicode escape (exactly eight digits)
escape
+=
source
.
getwhile
(
8
,
HEXDIGITS
)
escape
+=
source
.
getwhile
(
8
,
HEXDIGITS
)
if
len
(
escape
)
!=
10
:
if
len
(
escape
)
!=
10
:
raise
ValueError
raise
source
.
error
(
"incomplete escape
%
s"
%
escape
,
len
(
escape
))
c
=
int
(
escape
[
2
:],
16
)
c
=
int
(
escape
[
2
:],
16
)
chr
(
c
)
# raise ValueError for invalid code
chr
(
c
)
# raise ValueError for invalid code
return
LITERAL
,
c
return
LITERAL
,
c
...
@@ -398,7 +403,7 @@ def _escape(source, escape, state):
...
@@ -398,7 +403,7 @@ def _escape(source, escape, state):
escape
+=
source
.
get
()
escape
+=
source
.
get
()
c
=
int
(
escape
[
1
:],
8
)
c
=
int
(
escape
[
1
:],
8
)
if
c
>
0
o377
:
if
c
>
0
o377
:
raise
source
.
error
(
'octal escape value
%
r
outside of '
raise
source
.
error
(
'octal escape value
%
s
outside of '
'range 0-0o377'
%
escape
,
'range 0-0o377'
%
escape
,
len
(
escape
))
len
(
escape
))
return
LITERAL
,
c
return
LITERAL
,
c
...
@@ -406,11 +411,11 @@ def _escape(source, escape, state):
...
@@ -406,11 +411,11 @@ def _escape(source, escape, state):
group
=
int
(
escape
[
1
:])
group
=
int
(
escape
[
1
:])
if
group
<
state
.
groups
:
if
group
<
state
.
groups
:
if
not
state
.
checkgroup
(
group
):
if
not
state
.
checkgroup
(
group
):
raise
source
.
error
(
"cannot refer to open group"
,
raise
source
.
error
(
"cannot refer to
an
open group"
,
len
(
escape
))
len
(
escape
))
state
.
checklookbehindgroup
(
group
,
source
)
state
.
checklookbehindgroup
(
group
,
source
)
return
GROUPREF
,
group
return
GROUPREF
,
group
raise
ValueError
raise
source
.
error
(
"invalid group reference"
,
len
(
escape
))
if
len
(
escape
)
==
2
:
if
len
(
escape
)
==
2
:
if
c
in
ASCIILETTERS
:
if
c
in
ASCIILETTERS
:
import
warnings
import
warnings
...
@@ -419,7 +424,7 @@ def _escape(source, escape, state):
...
@@ -419,7 +424,7 @@ def _escape(source, escape, state):
return
LITERAL
,
ord
(
escape
[
1
])
return
LITERAL
,
ord
(
escape
[
1
])
except
ValueError
:
except
ValueError
:
pass
pass
raise
source
.
error
(
"b
ogus escape:
%
r
"
%
escape
,
len
(
escape
))
raise
source
.
error
(
"b
ad escape
%
s
"
%
escape
,
len
(
escape
))
def
_parse_sub
(
source
,
state
,
nested
=
True
):
def
_parse_sub
(
source
,
state
,
nested
=
True
):
# parse an alternation: a|b|c
# parse an alternation: a|b|c
...
@@ -427,12 +432,11 @@ def _parse_sub(source, state, nested=True):
...
@@ -427,12 +432,11 @@ def _parse_sub(source, state, nested=True):
items
=
[]
items
=
[]
itemsappend
=
items
.
append
itemsappend
=
items
.
append
sourcematch
=
source
.
match
sourcematch
=
source
.
match
start
=
source
.
tell
()
while
True
:
while
True
:
itemsappend
(
_parse
(
source
,
state
))
itemsappend
(
_parse
(
source
,
state
))
if
not
sourcematch
(
"|"
):
if
not
sourcematch
(
"|"
):
break
break
if
nested
and
source
.
next
is
not
None
and
source
.
next
!=
")"
:
raise
source
.
error
(
"pattern not properly closed"
)
if
len
(
items
)
==
1
:
if
len
(
items
)
==
1
:
return
items
[
0
]
return
items
[
0
]
...
@@ -480,8 +484,6 @@ def _parse_sub_cond(source, state, condgroup):
...
@@ -480,8 +484,6 @@ def _parse_sub_cond(source, state, condgroup):
raise
source
.
error
(
"conditional backref with more than two branches"
)
raise
source
.
error
(
"conditional backref with more than two branches"
)
else
:
else
:
item_no
=
None
item_no
=
None
if
source
.
next
is
not
None
and
source
.
next
!=
")"
:
raise
source
.
error
(
"pattern not properly closed"
)
subpattern
=
SubPattern
(
state
)
subpattern
=
SubPattern
(
state
)
subpattern
.
append
((
GROUPREF_EXISTS
,
(
condgroup
,
item_yes
,
item_no
)))
subpattern
.
append
((
GROUPREF_EXISTS
,
(
condgroup
,
item_yes
,
item_no
)))
return
subpattern
return
subpattern
...
@@ -526,6 +528,7 @@ def _parse(source, state):
...
@@ -526,6 +528,7 @@ def _parse(source, state):
subpatternappend
((
LITERAL
,
_ord
(
this
)))
subpatternappend
((
LITERAL
,
_ord
(
this
)))
elif
this
==
"["
:
elif
this
==
"["
:
here
=
source
.
tell
()
-
1
# character set
# character set
set
=
[]
set
=
[]
setappend
=
set
.
append
setappend
=
set
.
append
...
@@ -538,7 +541,8 @@ def _parse(source, state):
...
@@ -538,7 +541,8 @@ def _parse(source, state):
while
True
:
while
True
:
this
=
sourceget
()
this
=
sourceget
()
if
this
is
None
:
if
this
is
None
:
raise
source
.
error
(
"unexpected end of regular expression"
)
raise
source
.
error
(
"unterminated character set"
,
source
.
tell
()
-
here
)
if
this
==
"]"
and
set
!=
start
:
if
this
==
"]"
and
set
!=
start
:
break
break
elif
this
[
0
]
==
"
\\
"
:
elif
this
[
0
]
==
"
\\
"
:
...
@@ -547,25 +551,28 @@ def _parse(source, state):
...
@@ -547,25 +551,28 @@ def _parse(source, state):
code1
=
LITERAL
,
_ord
(
this
)
code1
=
LITERAL
,
_ord
(
this
)
if
sourcematch
(
"-"
):
if
sourcematch
(
"-"
):
# potential range
# potential range
this
=
sourceget
()
that
=
sourceget
()
if
this
is
None
:
if
that
is
None
:
raise
source
.
error
(
"unexpected end of regular expression"
)
raise
source
.
error
(
"unterminated character set"
,
if
this
==
"]"
:
source
.
tell
()
-
here
)
if
that
==
"]"
:
if
code1
[
0
]
is
IN
:
if
code1
[
0
]
is
IN
:
code1
=
code1
[
1
][
0
]
code1
=
code1
[
1
][
0
]
setappend
(
code1
)
setappend
(
code1
)
setappend
((
LITERAL
,
_ord
(
"-"
)))
setappend
((
LITERAL
,
_ord
(
"-"
)))
break
break
if
th
is
[
0
]
==
"
\\
"
:
if
th
at
[
0
]
==
"
\\
"
:
code2
=
_class_escape
(
source
,
th
is
)
code2
=
_class_escape
(
source
,
th
at
)
else
:
else
:
code2
=
LITERAL
,
_ord
(
th
is
)
code2
=
LITERAL
,
_ord
(
th
at
)
if
code1
[
0
]
!=
LITERAL
or
code2
[
0
]
!=
LITERAL
:
if
code1
[
0
]
!=
LITERAL
or
code2
[
0
]
!=
LITERAL
:
raise
source
.
error
(
"bad character range"
,
len
(
this
))
msg
=
"bad character range
%
s-
%
s"
%
(
this
,
that
)
raise
source
.
error
(
msg
,
len
(
this
)
+
1
+
len
(
that
))
lo
=
code1
[
1
]
lo
=
code1
[
1
]
hi
=
code2
[
1
]
hi
=
code2
[
1
]
if
hi
<
lo
:
if
hi
<
lo
:
raise
source
.
error
(
"bad character range"
,
len
(
this
))
msg
=
"bad character range
%
s-
%
s"
%
(
this
,
that
)
raise
source
.
error
(
msg
,
len
(
this
)
+
1
+
len
(
that
))
setappend
((
RANGE
,
(
lo
,
hi
)))
setappend
((
RANGE
,
(
lo
,
hi
)))
else
:
else
:
if
code1
[
0
]
is
IN
:
if
code1
[
0
]
is
IN
:
...
@@ -617,10 +624,10 @@ def _parse(source, state):
...
@@ -617,10 +624,10 @@ def _parse(source, state):
if
max
>=
MAXREPEAT
:
if
max
>=
MAXREPEAT
:
raise
OverflowError
(
"the repetition number is too large"
)
raise
OverflowError
(
"the repetition number is too large"
)
if
max
<
min
:
if
max
<
min
:
raise
source
.
error
(
"
bad repeat interval
"
,
raise
source
.
error
(
"
min repeat greater than max repeat
"
,
source
.
tell
()
-
here
)
source
.
tell
()
-
here
)
else
:
else
:
raise
source
.
error
(
"not supported"
,
len
(
this
))
raise
AssertionError
(
"unsupported quantifier
%
r"
%
(
char
,
))
# figure out which item to repeat
# figure out which item to repeat
if
subpattern
:
if
subpattern
:
item
=
subpattern
[
-
1
:]
item
=
subpattern
[
-
1
:]
...
@@ -641,39 +648,32 @@ def _parse(source, state):
...
@@ -641,39 +648,32 @@ def _parse(source, state):
subpatternappend
((
ANY
,
None
))
subpatternappend
((
ANY
,
None
))
elif
this
==
"("
:
elif
this
==
"("
:
group
=
1
start
=
source
.
tell
()
-
1
group
=
True
name
=
None
name
=
None
condgroup
=
None
condgroup
=
None
if
sourcematch
(
"?"
):
if
sourcematch
(
"?"
):
group
=
0
# options
# options
char
=
sourceget
()
char
=
sourceget
()
if
char
is
None
:
if
char
is
None
:
raise
s
elf
.
error
(
"unexpected end of pattern"
)
raise
s
ource
.
error
(
"unexpected end of pattern"
)
if
char
==
"P"
:
if
char
==
"P"
:
# python extensions
# python extensions
if
sourcematch
(
"<"
):
if
sourcematch
(
"<"
):
# named group: skip forward to end of name
# named group: skip forward to end of name
name
=
source
.
getuntil
(
">"
)
name
=
source
.
getuntil
(
">"
)
group
=
1
if
not
name
:
raise
source
.
error
(
"missing group name"
,
1
)
if
not
name
.
isidentifier
():
if
not
name
.
isidentifier
():
raise
source
.
error
(
"bad character in group name "
msg
=
"bad character in group name
%
r"
%
name
"
%
r"
%
name
,
raise
source
.
error
(
msg
,
len
(
name
)
+
1
)
len
(
name
)
+
1
)
elif
sourcematch
(
"="
):
elif
sourcematch
(
"="
):
# named backreference
# named backreference
name
=
source
.
getuntil
(
")"
)
name
=
source
.
getuntil
(
")"
)
if
not
name
:
raise
source
.
error
(
"missing group name"
,
1
)
if
not
name
.
isidentifier
():
if
not
name
.
isidentifier
():
raise
source
.
error
(
"bad character in backref "
msg
=
"bad character in group name
%
r"
%
name
"group name
%
r"
%
name
,
raise
source
.
error
(
msg
,
len
(
name
)
+
1
)
len
(
name
)
+
1
)
gid
=
state
.
groupdict
.
get
(
name
)
gid
=
state
.
groupdict
.
get
(
name
)
if
gid
is
None
:
if
gid
is
None
:
msg
=
"unknown group name
: {0!r}"
.
format
(
name
)
msg
=
"unknown group name
%
r"
%
name
raise
source
.
error
(
msg
,
len
(
name
)
+
1
)
raise
source
.
error
(
msg
,
len
(
name
)
+
1
)
state
.
checklookbehindgroup
(
gid
,
source
)
state
.
checklookbehindgroup
(
gid
,
source
)
subpatternappend
((
GROUPREF
,
gid
))
subpatternappend
((
GROUPREF
,
gid
))
...
@@ -682,16 +682,17 @@ def _parse(source, state):
...
@@ -682,16 +682,17 @@ def _parse(source, state):
char
=
sourceget
()
char
=
sourceget
()
if
char
is
None
:
if
char
is
None
:
raise
source
.
error
(
"unexpected end of pattern"
)
raise
source
.
error
(
"unexpected end of pattern"
)
raise
source
.
error
(
"unknown
specifier: ?P
%
s"
%
char
,
raise
source
.
error
(
"unknown
extension ?P"
+
char
,
len
(
char
))
len
(
char
)
+
2
)
elif
char
==
":"
:
elif
char
==
":"
:
# non-capturing group
# non-capturing group
group
=
2
group
=
None
elif
char
==
"#"
:
elif
char
==
"#"
:
# comment
# comment
while
True
:
while
True
:
if
source
.
next
is
None
:
if
source
.
next
is
None
:
raise
source
.
error
(
"unbalanced parenthesis"
)
raise
source
.
error
(
"missing ), unterminated comment"
,
source
.
tell
()
-
start
)
if
sourceget
()
==
")"
:
if
sourceget
()
==
")"
:
break
break
continue
continue
...
@@ -700,8 +701,11 @@ def _parse(source, state):
...
@@ -700,8 +701,11 @@ def _parse(source, state):
dir
=
1
dir
=
1
if
char
==
"<"
:
if
char
==
"<"
:
char
=
sourceget
()
char
=
sourceget
()
if
char
is
None
or
char
not
in
"=!"
:
if
char
is
None
:
raise
source
.
error
(
"syntax error"
)
raise
source
.
error
(
"unexpected end of pattern"
)
if
char
not
in
"=!"
:
raise
source
.
error
(
"unknown extension ?<"
+
char
,
len
(
char
)
+
2
)
dir
=
-
1
# lookbehind
dir
=
-
1
# lookbehind
lookbehindgroups
=
state
.
lookbehindgroups
lookbehindgroups
=
state
.
lookbehindgroups
if
lookbehindgroups
is
None
:
if
lookbehindgroups
is
None
:
...
@@ -711,7 +715,8 @@ def _parse(source, state):
...
@@ -711,7 +715,8 @@ def _parse(source, state):
if
lookbehindgroups
is
None
:
if
lookbehindgroups
is
None
:
state
.
lookbehindgroups
=
None
state
.
lookbehindgroups
=
None
if
not
sourcematch
(
")"
):
if
not
sourcematch
(
")"
):
raise
source
.
error
(
"unbalanced parenthesis"
)
raise
source
.
error
(
"missing ), unterminated subpattern"
,
source
.
tell
()
-
start
)
if
char
==
"="
:
if
char
==
"="
:
subpatternappend
((
ASSERT
,
(
dir
,
p
)))
subpatternappend
((
ASSERT
,
(
dir
,
p
)))
else
:
else
:
...
@@ -720,13 +725,11 @@ def _parse(source, state):
...
@@ -720,13 +725,11 @@ def _parse(source, state):
elif
char
==
"("
:
elif
char
==
"("
:
# conditional backreference group
# conditional backreference group
condname
=
source
.
getuntil
(
")"
)
condname
=
source
.
getuntil
(
")"
)
group
=
2
group
=
None
if
not
condname
:
raise
source
.
error
(
"missing group name"
,
1
)
if
condname
.
isidentifier
():
if
condname
.
isidentifier
():
condgroup
=
state
.
groupdict
.
get
(
condname
)
condgroup
=
state
.
groupdict
.
get
(
condname
)
if
condgroup
is
None
:
if
condgroup
is
None
:
msg
=
"unknown group name
: {0!r}"
.
format
(
condname
)
msg
=
"unknown group name
%
r"
%
condname
raise
source
.
error
(
msg
,
len
(
condname
)
+
1
)
raise
source
.
error
(
msg
,
len
(
condname
)
+
1
)
else
:
else
:
try
:
try
:
...
@@ -734,50 +737,48 @@ def _parse(source, state):
...
@@ -734,50 +737,48 @@ def _parse(source, state):
if
condgroup
<
0
:
if
condgroup
<
0
:
raise
ValueError
raise
ValueError
except
ValueError
:
except
ValueError
:
raise
source
.
error
(
"bad character in group name"
,
msg
=
"bad character in group name
%
r"
%
condname
len
(
condname
)
+
1
)
raise
source
.
error
(
msg
,
len
(
condname
)
+
1
)
from
None
if
not
condgroup
:
if
not
condgroup
:
raise
source
.
error
(
"bad group number"
,
raise
source
.
error
(
"bad group number"
,
len
(
condname
)
+
1
)
len
(
condname
)
+
1
)
if
condgroup
>=
MAXGROUPS
:
if
condgroup
>=
MAXGROUPS
:
raise
source
.
error
(
"
the group number is too larg
e"
,
raise
source
.
error
(
"
invalid group referenc
e"
,
len
(
condname
)
+
1
)
len
(
condname
)
+
1
)
state
.
checklookbehindgroup
(
condgroup
,
source
)
state
.
checklookbehindgroup
(
condgroup
,
source
)
elif
char
in
FLAGS
:
elif
char
in
FLAGS
:
# flags
# flags
while
True
:
state
.
flags
|=
FLAGS
[
char
]
state
.
flags
|=
FLAGS
[
char
]
while
source
.
next
in
FLAGS
:
char
=
sourceget
()
state
.
flags
|=
FLAGS
[
sourceget
()]
if
char
is
None
:
raise
source
.
error
(
"missing )"
)
if
char
==
")"
:
break
if
char
not
in
FLAGS
:
raise
source
.
error
(
"unknown flag"
,
len
(
char
))
verbose
=
state
.
flags
&
SRE_FLAG_VERBOSE
verbose
=
state
.
flags
&
SRE_FLAG_VERBOSE
continue
else
:
else
:
raise
source
.
error
(
"unexpected end of pattern"
)
raise
source
.
error
(
"unknown extension ?"
+
char
,
if
group
:
len
(
char
)
+
1
)
# parse group contents
# parse group contents
if
group
==
2
:
if
group
is
not
None
:
# anonymous group
group
=
None
else
:
try
:
try
:
group
=
state
.
opengroup
(
name
)
group
=
state
.
opengroup
(
name
)
except
error
as
err
:
except
error
as
err
:
raise
source
.
error
(
err
.
msg
,
len
(
name
)
+
1
)
raise
source
.
error
(
err
.
msg
,
len
(
name
)
+
1
)
from
None
if
condgroup
:
if
condgroup
:
p
=
_parse_sub_cond
(
source
,
state
,
condgroup
)
p
=
_parse_sub_cond
(
source
,
state
,
condgroup
)
else
:
else
:
p
=
_parse_sub
(
source
,
state
)
p
=
_parse_sub
(
source
,
state
)
if
not
sourcematch
(
")"
):
if
not
source
.
match
(
")"
):
raise
source
.
error
(
"unbalanced parenthesis"
)
raise
source
.
error
(
"missing ), unterminated subpattern"
,
source
.
tell
()
-
start
)
if
group
is
not
None
:
if
group
is
not
None
:
state
.
closegroup
(
group
,
p
)
state
.
closegroup
(
group
,
p
)
subpatternappend
((
SUBPATTERN
,
(
group
,
p
)))
subpatternappend
((
SUBPATTERN
,
(
group
,
p
)))
else
:
while
True
:
char
=
sourceget
()
if
char
is
None
:
raise
source
.
error
(
"unexpected end of pattern"
)
if
char
==
")"
:
break
raise
source
.
error
(
"unknown extension"
,
len
(
char
))
elif
this
==
"^"
:
elif
this
==
"^"
:
subpatternappend
((
AT
,
AT_BEGINNING
))
subpatternappend
((
AT
,
AT_BEGINNING
))
...
@@ -786,7 +787,7 @@ def _parse(source, state):
...
@@ -786,7 +787,7 @@ def _parse(source, state):
subpattern
.
append
((
AT
,
AT_END
))
subpattern
.
append
((
AT
,
AT_END
))
else
:
else
:
raise
source
.
error
(
"parser error"
,
len
(
this
))
raise
AssertionError
(
"unsupported special character
%
r"
%
(
char
,
))
return
subpattern
return
subpattern
...
@@ -804,7 +805,7 @@ def fix_flags(src, flags):
...
@@ -804,7 +805,7 @@ def fix_flags(src, flags):
raise
ValueError
(
"ASCII and UNICODE flags are incompatible"
)
raise
ValueError
(
"ASCII and UNICODE flags are incompatible"
)
else
:
else
:
if
flags
&
SRE_FLAG_UNICODE
:
if
flags
&
SRE_FLAG_UNICODE
:
raise
ValueError
(
"can
'
t use UNICODE flag with a bytes pattern"
)
raise
ValueError
(
"can
no
t use UNICODE flag with a bytes pattern"
)
if
flags
&
SRE_FLAG_LOCALE
and
flags
&
SRE_FLAG_ASCII
:
if
flags
&
SRE_FLAG_LOCALE
and
flags
&
SRE_FLAG_ASCII
:
import
warnings
import
warnings
warnings
.
warn
(
"ASCII and LOCALE flags are incompatible. "
warnings
.
warn
(
"ASCII and LOCALE flags are incompatible. "
...
@@ -826,11 +827,8 @@ def parse(str, flags=0, pattern=None):
...
@@ -826,11 +827,8 @@ def parse(str, flags=0, pattern=None):
p
.
pattern
.
flags
=
fix_flags
(
str
,
p
.
pattern
.
flags
)
p
.
pattern
.
flags
=
fix_flags
(
str
,
p
.
pattern
.
flags
)
if
source
.
next
is
not
None
:
if
source
.
next
is
not
None
:
if
source
.
next
==
")"
:
assert
source
.
next
==
")"
raise
source
.
error
(
"unbalanced parenthesis"
)
raise
source
.
error
(
"unbalanced parenthesis"
)
else
:
raise
source
.
error
(
"bogus characters at end of regular expression"
,
len
(
tail
))
if
flags
&
SRE_FLAG_DEBUG
:
if
flags
&
SRE_FLAG_DEBUG
:
p
.
dump
()
p
.
dump
()
...
@@ -866,26 +864,25 @@ def parse_template(source, pattern):
...
@@ -866,26 +864,25 @@ def parse_template(source, pattern):
c
=
this
[
1
]
c
=
this
[
1
]
if
c
==
"g"
:
if
c
==
"g"
:
name
=
""
name
=
""
if
s
.
match
(
"<"
):
if
not
s
.
match
(
"<"
):
raise
s
.
error
(
"missing <"
)
name
=
s
.
getuntil
(
">"
)
name
=
s
.
getuntil
(
">"
)
if
not
name
:
if
name
.
isidentifier
():
raise
s
.
error
(
"missing group name"
,
1
)
try
:
index
=
pattern
.
groupindex
[
name
]
except
KeyError
:
raise
IndexError
(
"unknown group name
%
r"
%
name
)
else
:
try
:
try
:
index
=
int
(
name
)
index
=
int
(
name
)
if
index
<
0
:
if
index
<
0
:
raise
s
.
error
(
"negative group number"
,
len
(
name
)
+
1
)
raise
ValueError
if
index
>=
MAXGROUPS
:
raise
s
.
error
(
"the group number is too large"
,
len
(
name
)
+
1
)
except
ValueError
:
except
ValueError
:
if
not
name
.
isidentifier
():
raise
s
.
error
(
"bad character in group name
%
r"
%
name
,
raise
s
.
error
(
"bad character in group name"
,
len
(
name
)
+
1
)
from
None
if
index
>=
MAXGROUPS
:
raise
s
.
error
(
"invalid group reference"
,
len
(
name
)
+
1
)
len
(
name
)
+
1
)
try
:
index
=
pattern
.
groupindex
[
name
]
except
KeyError
:
msg
=
"unknown group name: {0!r}"
.
format
(
name
)
raise
IndexError
(
msg
)
addgroup
(
index
)
addgroup
(
index
)
elif
c
==
"0"
:
elif
c
==
"0"
:
if
s
.
next
in
OCTDIGITS
:
if
s
.
next
in
OCTDIGITS
:
...
@@ -903,7 +900,7 @@ def parse_template(source, pattern):
...
@@ -903,7 +900,7 @@ def parse_template(source, pattern):
isoctal
=
True
isoctal
=
True
c
=
int
(
this
[
1
:],
8
)
c
=
int
(
this
[
1
:],
8
)
if
c
>
0
o377
:
if
c
>
0
o377
:
raise
s
.
error
(
'octal escape value
%
r
outside of '
raise
s
.
error
(
'octal escape value
%
s
outside of '
'range 0-0o377'
%
this
,
len
(
this
))
'range 0-0o377'
%
this
,
len
(
this
))
lappend
(
chr
(
c
))
lappend
(
chr
(
c
))
if
not
isoctal
:
if
not
isoctal
:
...
...
Lib/test/test_re.py
Dosyayı görüntüle @
632a77e6
...
@@ -38,6 +38,24 @@ class ReTests(unittest.TestCase):
...
@@ -38,6 +38,24 @@ class ReTests(unittest.TestCase):
self
.
assertIs
(
type
(
actual
),
type
(
expect
),
msg
)
self
.
assertIs
(
type
(
actual
),
type
(
expect
),
msg
)
recurse
(
actual
,
expect
)
recurse
(
actual
,
expect
)
def
checkPatternError
(
self
,
pattern
,
errmsg
,
pos
=
None
):
with
self
.
assertRaises
(
re
.
error
)
as
cm
:
re
.
compile
(
pattern
)
with
self
.
subTest
(
pattern
=
pattern
):
err
=
cm
.
exception
self
.
assertEqual
(
err
.
msg
,
errmsg
)
if
pos
is
not
None
:
self
.
assertEqual
(
err
.
pos
,
pos
)
def
checkTemplateError
(
self
,
pattern
,
repl
,
string
,
errmsg
,
pos
=
None
):
with
self
.
assertRaises
(
re
.
error
)
as
cm
:
re
.
sub
(
pattern
,
repl
,
string
)
with
self
.
subTest
(
pattern
=
pattern
,
repl
=
repl
):
err
=
cm
.
exception
self
.
assertEqual
(
err
.
msg
,
errmsg
)
if
pos
is
not
None
:
self
.
assertEqual
(
err
.
pos
,
pos
)
def
test_keep_buffer
(
self
):
def
test_keep_buffer
(
self
):
# See bug 14212
# See bug 14212
b
=
bytearray
(
b
'x'
)
b
=
bytearray
(
b
'x'
)
...
@@ -148,6 +166,7 @@ class ReTests(unittest.TestCase):
...
@@ -148,6 +166,7 @@ class ReTests(unittest.TestCase):
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\009'
,
'x'
),
'
\0
'
+
'9'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\009'
,
'x'
),
'
\0
'
+
'9'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\111'
,
'x'
),
'
\111
'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\111'
,
'x'
),
'
\111
'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\117'
,
'x'
),
'
\117
'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\117'
,
'x'
),
'
\117
'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\377'
,
'x'
),
'
\377
'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\1111'
,
'x'
),
'
\111
1'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\1111'
,
'x'
),
'
\111
1'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\1111'
,
'x'
),
'
\111
'
+
'1'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\1111'
,
'x'
),
'
\111
'
+
'1'
)
...
@@ -158,21 +177,25 @@ class ReTests(unittest.TestCase):
...
@@ -158,21 +177,25 @@ class ReTests(unittest.TestCase):
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\09'
,
'x'
),
'
\0
'
+
'9'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\09'
,
'x'
),
'
\0
'
+
'9'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\0a'
,
'x'
),
'
\0
'
+
'a'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\0a'
,
'x'
),
'
\0
'
+
'a'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\400'
,
'x'
)
self
.
checkTemplateError
(
'x'
,
r'\400'
,
'x'
,
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\777'
,
'x'
)
r'octal escape value \400 outside of '
r'range 0-0o377'
,
0
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\1'
,
'x'
)
self
.
checkTemplateError
(
'x'
,
r'\777'
,
'x'
,
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\8'
,
'x'
)
r'octal escape value \777 outside of '
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\9'
,
'x'
)
r'range 0-0o377'
,
0
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\11'
,
'x'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\18'
,
'x'
)
self
.
checkTemplateError
(
'x'
,
r'\1'
,
'x'
,
'invalid group reference'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\1a'
,
'x'
)
self
.
checkTemplateError
(
'x'
,
r'\8'
,
'x'
,
'invalid group reference'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\90'
,
'x'
)
self
.
checkTemplateError
(
'x'
,
r'\9'
,
'x'
,
'invalid group reference'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\99'
,
'x'
)
self
.
checkTemplateError
(
'x'
,
r'\11'
,
'x'
,
'invalid group reference'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\118'
,
'x'
)
# r'\11' + '8'
self
.
checkTemplateError
(
'x'
,
r'\18'
,
'x'
,
'invalid group reference'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\11a'
,
'x'
)
self
.
checkTemplateError
(
'x'
,
r'\1a'
,
'x'
,
'invalid group reference'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\181'
,
'x'
)
# r'\18' + '1'
self
.
checkTemplateError
(
'x'
,
r'\90'
,
'x'
,
'invalid group reference'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\800'
,
'x'
)
# r'\80' + '0'
self
.
checkTemplateError
(
'x'
,
r'\99'
,
'x'
,
'invalid group reference'
)
self
.
checkTemplateError
(
'x'
,
r'\118'
,
'x'
,
'invalid group reference'
)
# r'\11' + '8'
self
.
checkTemplateError
(
'x'
,
r'\11a'
,
'x'
,
'invalid group reference'
)
self
.
checkTemplateError
(
'x'
,
r'\181'
,
'x'
,
'invalid group reference'
)
# r'\18' + '1'
self
.
checkTemplateError
(
'x'
,
r'\800'
,
'x'
,
'invalid group reference'
)
# r'\80' + '0'
# in python2.3 (etc), these loop endlessly in sre_parser.py
# in python2.3 (etc), these loop endlessly in sre_parser.py
self
.
assertEqual
(
re
.
sub
(
'(((((((((((x)))))))))))'
,
r'\11'
,
'x'
),
'x'
)
self
.
assertEqual
(
re
.
sub
(
'(((((((((((x)))))))))))'
,
r'\11'
,
'x'
),
'x'
)
...
@@ -198,47 +221,65 @@ class ReTests(unittest.TestCase):
...
@@ -198,47 +221,65 @@ class ReTests(unittest.TestCase):
re
.
compile
(
'(?P<a>x)(?P=a)(?(a)y)'
)
re
.
compile
(
'(?P<a>x)(?P=a)(?(a)y)'
)
re
.
compile
(
'(?P<a1>x)(?P=a1)(?(a1)y)'
)
re
.
compile
(
'(?P<a1>x)(?P=a1)(?(a1)y)'
)
re
.
compile
(
'(?P<a1>x)
\1
(?(1)y)'
)
re
.
compile
(
'(?P<a1>x)
\1
(?(1)y)'
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?P<a>)(?P<a>)'
)
self
.
checkPatternError
(
'(?P<a>)(?P<a>)'
,
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?Px)'
)
"redefinition of group name 'a' as group 2; "
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?P=)'
)
"was group 1"
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?P=1)'
)
self
.
checkPatternError
(
'(?Pxy)'
,
'unknown extension ?Px'
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?P=a)'
)
self
.
checkPatternError
(
'(?P<a>)(?P=a'
,
'missing ), unterminated name'
,
11
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?P=a1)'
)
self
.
checkPatternError
(
'(?P='
,
'missing group name'
,
4
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?P=a.)'
)
self
.
checkPatternError
(
'(?P=)'
,
'missing group name'
,
4
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?P<)'
)
self
.
checkPatternError
(
'(?P=1)'
,
"bad character in group name '1'"
,
4
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?P<>)'
)
self
.
checkPatternError
(
'(?P=a)'
,
"unknown group name 'a'"
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?P<1>)'
)
self
.
checkPatternError
(
'(?P=a1)'
,
"unknown group name 'a1'"
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?P<a.>)'
)
self
.
checkPatternError
(
'(?P=a.)'
,
"bad character in group name 'a.'"
,
4
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?())'
)
self
.
checkPatternError
(
'(?P<)'
,
'missing >, unterminated name'
,
4
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?(a))'
)
self
.
checkPatternError
(
'(?P<a'
,
'missing >, unterminated name'
,
4
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?(1a))'
)
self
.
checkPatternError
(
'(?P<'
,
'missing group name'
,
4
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?(a.))'
)
self
.
checkPatternError
(
'(?P<>)'
,
'missing group name'
,
4
)
self
.
checkPatternError
(
r'(?P<1>)'
,
"bad character in group name '1'"
,
4
)
self
.
checkPatternError
(
r'(?P<a.>)'
,
"bad character in group name 'a.'"
,
4
)
self
.
checkPatternError
(
r'(?('
,
'missing group name'
,
3
)
self
.
checkPatternError
(
r'(?())'
,
'missing group name'
,
3
)
self
.
checkPatternError
(
r'(?(a))'
,
"unknown group name 'a'"
,
3
)
self
.
checkPatternError
(
r'(?(-1))'
,
"bad character in group name '-1'"
,
3
)
self
.
checkPatternError
(
r'(?(1a))'
,
"bad character in group name '1a'"
,
3
)
self
.
checkPatternError
(
r'(?(a.))'
,
"bad character in group name 'a.'"
,
3
)
# New valid/invalid identifiers in Python 3
# New valid/invalid identifiers in Python 3
re
.
compile
(
'(?P<µ>x)(?P=µ)(?(µ)y)'
)
re
.
compile
(
'(?P<µ>x)(?P=µ)(?(µ)y)'
)
re
.
compile
(
'(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)'
)
re
.
compile
(
'(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)'
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?P<©>x)'
)
self
.
checkPatternError
(
'(?P<©>x)'
,
"bad character in group name '©'"
,
4
)
# Support > 100 groups.
# Support > 100 groups.
pat
=
'|'
.
join
(
'x(?P<a
%
d>
%
x)y'
%
(
i
,
i
)
for
i
in
range
(
1
,
200
+
1
))
pat
=
'|'
.
join
(
'x(?P<a
%
d>
%
x)y'
%
(
i
,
i
)
for
i
in
range
(
1
,
200
+
1
))
pat
=
'(?:
%
s)(?(200)z|t)'
%
pat
pat
=
'(?:
%
s)(?(200)z|t)'
%
pat
self
.
assertEqual
(
re
.
match
(
pat
,
'xc8yz'
)
.
span
(),
(
0
,
5
))
self
.
assertEqual
(
re
.
match
(
pat
,
'xc8yz'
)
.
span
(),
(
0
,
5
))
def
test_symbolic_refs
(
self
):
def
test_symbolic_refs
(
self
):
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'(?P<a>x)'
,
'
\
g<a'
,
'xx'
)
self
.
checkTemplateError
(
'(?P<a>x)'
,
'
\
g<a'
,
'xx'
,
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'(?P<a>x)'
,
'
\
g<'
,
'xx'
)
'missing >, unterminated name'
,
3
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'(?P<a>x)'
,
'
\
g'
,
'xx'
)
self
.
checkTemplateError
(
'(?P<a>x)'
,
'
\
g<'
,
'xx'
,
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'(?P<a>x)'
,
'
\
g<a a>'
,
'xx'
)
'missing group name'
,
3
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'(?P<a>x)'
,
'
\
g<>'
,
'xx'
)
self
.
checkTemplateError
(
'(?P<a>x)'
,
'
\
g'
,
'xx'
,
'missing <'
,
2
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'(?P<a>x)'
,
'
\
g<1a1>'
,
'xx'
)
self
.
checkTemplateError
(
'(?P<a>x)'
,
'
\
g<a a>'
,
'xx'
,
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'(?P<a>x)'
,
r'\g<2>'
,
'xx'
)
"bad character in group name 'a a'"
,
3
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'(?P<a>x)'
,
r'\2'
,
'xx'
)
self
.
checkTemplateError
(
'(?P<a>x)'
,
'
\
g<>'
,
'xx'
,
self
.
assertRaises
(
IndexError
,
re
.
sub
,
'(?P<a>x)'
,
'
\
g<ab>'
,
'xx'
)
'missing group name'
,
3
)
self
.
checkTemplateError
(
'(?P<a>x)'
,
'
\
g<1a1>'
,
'xx'
,
"bad character in group name '1a1'"
,
3
)
self
.
checkTemplateError
(
'(?P<a>x)'
,
r'\g<2>'
,
'xx'
,
'invalid group reference'
)
self
.
checkTemplateError
(
'(?P<a>x)'
,
r'\2'
,
'xx'
,
'invalid group reference'
)
with
self
.
assertRaisesRegex
(
IndexError
,
"unknown group name 'ab'"
):
re
.
sub
(
'(?P<a>x)'
,
'
\
g<ab>'
,
'xx'
)
self
.
assertEqual
(
re
.
sub
(
'(?P<a>x)|(?P<b>y)'
,
r'\g<b>'
,
'xx'
),
''
)
self
.
assertEqual
(
re
.
sub
(
'(?P<a>x)|(?P<b>y)'
,
r'\g<b>'
,
'xx'
),
''
)
self
.
assertEqual
(
re
.
sub
(
'(?P<a>x)|(?P<b>y)'
,
r'\2'
,
'xx'
),
''
)
self
.
assertEqual
(
re
.
sub
(
'(?P<a>x)|(?P<b>y)'
,
r'\2'
,
'xx'
),
''
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'(?P<a>x)'
,
'
\
g<-1>'
,
'xx'
)
self
.
checkTemplateError
(
'(?P<a>x)'
,
'
\
g<-1>'
,
'xx'
,
"bad character in group name '-1'"
,
3
)
# New valid/invalid identifiers in Python 3
# New valid/invalid identifiers in Python 3
self
.
assertEqual
(
re
.
sub
(
'(?P<µ>x)'
,
r'\g<µ>'
,
'xx'
),
'xx'
)
self
.
assertEqual
(
re
.
sub
(
'(?P<µ>x)'
,
r'\g<µ>'
,
'xx'
),
'xx'
)
self
.
assertEqual
(
re
.
sub
(
'(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)'
,
r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>'
,
'xx'
),
'xx'
)
self
.
assertEqual
(
re
.
sub
(
'(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)'
,
r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>'
,
'xx'
),
'xx'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'(?P<a>x)'
,
r'\g<©>'
,
'xx'
)
self
.
checkTemplateError
(
'(?P<a>x)'
,
'
\
g<©>'
,
'xx'
,
"bad character in group name '©'"
,
3
)
# Support > 100 groups.
# Support > 100 groups.
pat
=
'|'
.
join
(
'x(?P<a
%
d>
%
x)y'
%
(
i
,
i
)
for
i
in
range
(
1
,
200
+
1
))
pat
=
'|'
.
join
(
'x(?P<a
%
d>
%
x)y'
%
(
i
,
i
)
for
i
in
range
(
1
,
200
+
1
))
self
.
assertEqual
(
re
.
sub
(
pat
,
'
\
g<200>'
,
'xc8yzxc8y'
),
'c8zc8'
)
self
.
assertEqual
(
re
.
sub
(
pat
,
'
\
g<200>'
,
'xc8yzxc8y'
),
'c8zc8'
)
...
@@ -444,6 +485,19 @@ class ReTests(unittest.TestCase):
...
@@ -444,6 +485,19 @@ class ReTests(unittest.TestCase):
pat
=
'(?:
%
s)(?(200)z)'
%
pat
pat
=
'(?:
%
s)(?(200)z)'
%
pat
self
.
assertEqual
(
re
.
match
(
pat
,
'xc8yz'
)
.
span
(),
(
0
,
5
))
self
.
assertEqual
(
re
.
match
(
pat
,
'xc8yz'
)
.
span
(),
(
0
,
5
))
self
.
checkPatternError
(
r'(?P<a>)(?(0))'
,
'bad group number'
,
10
)
self
.
checkPatternError
(
r'()(?(1)a|b'
,
'missing ), unterminated subpattern'
,
2
)
self
.
checkPatternError
(
r'()(?(1)a|b|c)'
,
'conditional backref with more than '
'two branches'
,
10
)
def
test_re_groupref_overflow
(
self
):
self
.
checkTemplateError
(
'()'
,
'
\
g<
%
s>'
%
sre_constants
.
MAXGROUPS
,
'xx'
,
'invalid group reference'
,
3
)
self
.
checkPatternError
(
r'(?P<a>)(?(
%
d))'
%
sre_constants
.
MAXGROUPS
,
'invalid group reference'
,
10
)
def
test_re_groupref
(
self
):
def
test_re_groupref
(
self
):
self
.
assertEqual
(
re
.
match
(
r'^(\|)?([^()]+)\1$'
,
'|a|'
)
.
groups
(),
self
.
assertEqual
(
re
.
match
(
r'^(\|)?([^()]+)\1$'
,
'|a|'
)
.
groups
(),
(
'|'
,
'a'
))
(
'|'
,
'a'
))
...
@@ -456,6 +510,8 @@ class ReTests(unittest.TestCase):
...
@@ -456,6 +510,8 @@ class ReTests(unittest.TestCase):
self
.
assertEqual
(
re
.
match
(
r'^(?:(a)|c)(\1)?$'
,
'c'
)
.
groups
(),
self
.
assertEqual
(
re
.
match
(
r'^(?:(a)|c)(\1)?$'
,
'c'
)
.
groups
(),
(
None
,
None
))
(
None
,
None
))
self
.
checkPatternError
(
r'(abc\1)'
,
'cannot refer to an open group'
,
4
)
def
test_groupdict
(
self
):
def
test_groupdict
(
self
):
self
.
assertEqual
(
re
.
match
(
'(?P<first>first) (?P<second>second)'
,
self
.
assertEqual
(
re
.
match
(
'(?P<first>first) (?P<second>second)'
,
'first second'
)
.
groupdict
(),
'first second'
)
.
groupdict
(),
...
@@ -493,6 +549,7 @@ class ReTests(unittest.TestCase):
...
@@ -493,6 +549,7 @@ class ReTests(unittest.TestCase):
self
.
assertTrue
(
re
.
match
(
"^x{3}$"
,
"xxx"
))
self
.
assertTrue
(
re
.
match
(
"^x{3}$"
,
"xxx"
))
self
.
assertTrue
(
re
.
match
(
"^x{1,3}$"
,
"xxx"
))
self
.
assertTrue
(
re
.
match
(
"^x{1,3}$"
,
"xxx"
))
self
.
assertTrue
(
re
.
match
(
"^x{3,3}$"
,
"xxx"
))
self
.
assertTrue
(
re
.
match
(
"^x{1,4}$"
,
"xxx"
))
self
.
assertTrue
(
re
.
match
(
"^x{1,4}$"
,
"xxx"
))
self
.
assertTrue
(
re
.
match
(
"^x{3,4}?$"
,
"xxx"
))
self
.
assertTrue
(
re
.
match
(
"^x{3,4}?$"
,
"xxx"
))
self
.
assertTrue
(
re
.
match
(
"^x{3}?$"
,
"xxx"
))
self
.
assertTrue
(
re
.
match
(
"^x{3}?$"
,
"xxx"
))
...
@@ -503,6 +560,9 @@ class ReTests(unittest.TestCase):
...
@@ -503,6 +560,9 @@ class ReTests(unittest.TestCase):
self
.
assertIsNone
(
re
.
match
(
"^x{}$"
,
"xxx"
))
self
.
assertIsNone
(
re
.
match
(
"^x{}$"
,
"xxx"
))
self
.
assertTrue
(
re
.
match
(
"^x{}$"
,
"x{}"
))
self
.
assertTrue
(
re
.
match
(
"^x{}$"
,
"x{}"
))
self
.
checkPatternError
(
r'x{2,1}'
,
'min repeat greater than max repeat'
,
2
)
def
test_getattr
(
self
):
def
test_getattr
(
self
):
self
.
assertEqual
(
re
.
compile
(
"(?i)(a)(b)"
)
.
pattern
,
"(?i)(a)(b)"
)
self
.
assertEqual
(
re
.
compile
(
"(?i)(a)(b)"
)
.
pattern
,
"(?i)(a)(b)"
)
self
.
assertEqual
(
re
.
compile
(
"(?i)(a)(b)"
)
.
flags
,
re
.
I
|
re
.
U
)
self
.
assertEqual
(
re
.
compile
(
"(?i)(a)(b)"
)
.
flags
,
re
.
I
|
re
.
U
)
...
@@ -550,7 +610,7 @@ class ReTests(unittest.TestCase):
...
@@ -550,7 +610,7 @@ class ReTests(unittest.TestCase):
b
"1aa! a"
,
re
.
LOCALE
)
.
group
(
0
),
b
"1aa! a"
)
b
"1aa! a"
,
re
.
LOCALE
)
.
group
(
0
),
b
"1aa! a"
)
def
test_other_escapes
(
self
):
def
test_other_escapes
(
self
):
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
"
\\
"
)
self
.
checkPatternError
(
"
\\
"
,
'bad escape (end of pattern)'
,
0
)
self
.
assertEqual
(
re
.
match
(
r"\("
,
'('
)
.
group
(),
'('
)
self
.
assertEqual
(
re
.
match
(
r"\("
,
'('
)
.
group
(),
'('
)
self
.
assertIsNone
(
re
.
match
(
r"\("
,
')'
))
self
.
assertIsNone
(
re
.
match
(
r"\("
,
')'
))
self
.
assertEqual
(
re
.
match
(
r"\\"
,
'
\\
'
)
.
group
(),
'
\\
'
)
self
.
assertEqual
(
re
.
match
(
r"\\"
,
'
\\
'
)
.
group
(),
'
\\
'
)
...
@@ -875,15 +935,17 @@ class ReTests(unittest.TestCase):
...
@@ -875,15 +935,17 @@ class ReTests(unittest.TestCase):
self
.
assertTrue
(
re
.
match
(
r"\08"
,
"
\000
8"
))
self
.
assertTrue
(
re
.
match
(
r"\08"
,
"
\000
8"
))
self
.
assertTrue
(
re
.
match
(
r"\01"
,
"
\001
"
))
self
.
assertTrue
(
re
.
match
(
r"\01"
,
"
\001
"
))
self
.
assertTrue
(
re
.
match
(
r"\018"
,
"
\001
8"
))
self
.
assertTrue
(
re
.
match
(
r"\018"
,
"
\001
8"
))
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"\567"
,
""
)
self
.
checkPatternError
(
r"\567"
,
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"\911"
,
""
)
r'octal escape value \567 outside of '
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"\x1"
,
""
)
r'range 0-0o377'
,
0
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"\x1z"
,
""
)
self
.
checkPatternError
(
r"\911"
,
'invalid group reference'
,
0
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"\u123"
,
""
)
self
.
checkPatternError
(
r"\x1"
,
r'incomplete escape \x1'
,
0
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"\u123z"
,
""
)
self
.
checkPatternError
(
r"\x1z"
,
r'incomplete escape \x1'
,
0
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"\U0001234"
,
""
)
self
.
checkPatternError
(
r"\u123"
,
r'incomplete escape \u123'
,
0
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"\U0001234z"
,
""
)
self
.
checkPatternError
(
r"\u123z"
,
r'incomplete escape \u123'
,
0
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"\U00110000"
,
""
)
self
.
checkPatternError
(
r"\U0001234"
,
r'incomplete escape \U0001234'
,
0
)
self
.
checkPatternError
(
r"\U0001234z"
,
r'incomplete escape \U0001234'
,
0
)
self
.
checkPatternError
(
r"\U00110000"
,
r'bad escape \U00110000'
,
0
)
def
test_sre_character_class_literals
(
self
):
def
test_sre_character_class_literals
(
self
):
for
i
in
[
0
,
8
,
16
,
32
,
64
,
127
,
128
,
255
,
256
,
0xFFFF
,
0x10000
,
0x10FFFF
]:
for
i
in
[
0
,
8
,
16
,
32
,
64
,
127
,
128
,
255
,
256
,
0xFFFF
,
0x10000
,
0x10FFFF
]:
...
@@ -903,12 +965,14 @@ class ReTests(unittest.TestCase):
...
@@ -903,12 +965,14 @@ class ReTests(unittest.TestCase):
self
.
assertTrue
(
re
.
match
(
r"[\U
%08
x]"
%
i
,
chr
(
i
)))
self
.
assertTrue
(
re
.
match
(
r"[\U
%08
x]"
%
i
,
chr
(
i
)))
self
.
assertTrue
(
re
.
match
(
r"[\U
%08
x0]"
%
i
,
chr
(
i
)
+
"0"
))
self
.
assertTrue
(
re
.
match
(
r"[\U
%08
x0]"
%
i
,
chr
(
i
)
+
"0"
))
self
.
assertTrue
(
re
.
match
(
r"[\U
%08
xz]"
%
i
,
chr
(
i
)
+
"z"
))
self
.
assertTrue
(
re
.
match
(
r"[\U
%08
xz]"
%
i
,
chr
(
i
)
+
"z"
))
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"[\567]"
,
""
)
self
.
checkPatternError
(
r"[\567]"
,
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"[\911]"
,
""
)
r'octal escape value \567 outside of '
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"[\x1z]"
,
""
)
r'range 0-0o377'
,
1
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"[\u123z]"
,
""
)
self
.
checkPatternError
(
r"[\911]"
,
r'bad escape \9'
,
1
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"[\U0001234z]"
,
""
)
self
.
checkPatternError
(
r"[\x1z]"
,
r'incomplete escape \x1'
,
1
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"[\U00110000]"
,
""
)
self
.
checkPatternError
(
r"[\u123z]"
,
r'incomplete escape \u123'
,
1
)
self
.
checkPatternError
(
r"[\U0001234z]"
,
r'incomplete escape \U0001234'
,
1
)
self
.
checkPatternError
(
r"[\U00110000]"
,
r'bad escape \U00110000'
,
1
)
self
.
assertTrue
(
re
.
match
(
r"[\U0001d49c-\U0001d4b5]"
,
"
\U0001d49e
"
))
self
.
assertTrue
(
re
.
match
(
r"[\U0001d49c-\U0001d4b5]"
,
"
\U0001d49e
"
))
def
test_sre_byte_literals
(
self
):
def
test_sre_byte_literals
(
self
):
...
@@ -927,10 +991,12 @@ class ReTests(unittest.TestCase):
...
@@ -927,10 +991,12 @@ class ReTests(unittest.TestCase):
self
.
assertTrue
(
re
.
match
(
br
"
\0
8"
,
b
"
\000
8"
))
self
.
assertTrue
(
re
.
match
(
br
"
\0
8"
,
b
"
\000
8"
))
self
.
assertTrue
(
re
.
match
(
br
"
\01
"
,
b
"
\001
"
))
self
.
assertTrue
(
re
.
match
(
br
"
\01
"
,
b
"
\001
"
))
self
.
assertTrue
(
re
.
match
(
br
"
\01
8"
,
b
"
\001
8"
))
self
.
assertTrue
(
re
.
match
(
br
"
\01
8"
,
b
"
\001
8"
))
self
.
assertRaises
(
re
.
error
,
re
.
match
,
br
"
\567
"
,
b
""
)
self
.
checkPatternError
(
br
"
\567
"
,
self
.
assertRaises
(
re
.
error
,
re
.
match
,
br
"
\
911"
,
b
""
)
r'octal escape value \567 outside of '
self
.
assertRaises
(
re
.
error
,
re
.
match
,
br
"
\
x1"
,
b
""
)
r'range 0-0o377'
,
0
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
br
"
\
x1z"
,
b
""
)
self
.
checkPatternError
(
br
"
\
911"
,
'invalid group reference'
,
0
)
self
.
checkPatternError
(
br
"
\
x1"
,
r'incomplete escape \x1'
,
0
)
self
.
checkPatternError
(
br
"
\
x1z"
,
r'incomplete escape \x1'
,
0
)
def
test_sre_byte_class_literals
(
self
):
def
test_sre_byte_class_literals
(
self
):
for
i
in
[
0
,
8
,
16
,
32
,
64
,
127
,
128
,
255
]:
for
i
in
[
0
,
8
,
16
,
32
,
64
,
127
,
128
,
255
]:
...
@@ -946,9 +1012,22 @@ class ReTests(unittest.TestCase):
...
@@ -946,9 +1012,22 @@ class ReTests(unittest.TestCase):
self
.
assertTrue
(
re
.
match
(
br
"[
\u1234
]"
,
b
'u'
))
self
.
assertTrue
(
re
.
match
(
br
"[
\u1234
]"
,
b
'u'
))
with
self
.
assertWarns
(
DeprecationWarning
):
with
self
.
assertWarns
(
DeprecationWarning
):
self
.
assertTrue
(
re
.
match
(
br
"[
\U00012345
]"
,
b
'U'
))
self
.
assertTrue
(
re
.
match
(
br
"[
\U00012345
]"
,
b
'U'
))
self
.
assertRaises
(
re
.
error
,
re
.
match
,
br
"[
\567
]"
,
b
""
)
self
.
checkPatternError
(
br
"[
\567
]"
,
self
.
assertRaises
(
re
.
error
,
re
.
match
,
br
"[
\
911]"
,
b
""
)
r'octal escape value \567 outside of '
self
.
assertRaises
(
re
.
error
,
re
.
match
,
br
"[
\
x1z]"
,
b
""
)
r'range 0-0o377'
,
1
)
self
.
checkPatternError
(
br
"[
\
911]"
,
r'bad escape \9'
,
1
)
self
.
checkPatternError
(
br
"[
\
x1z]"
,
r'incomplete escape \x1'
,
1
)
def
test_character_set_errors
(
self
):
self
.
checkPatternError
(
r'['
,
'unterminated character set'
,
0
)
self
.
checkPatternError
(
r'[^'
,
'unterminated character set'
,
0
)
self
.
checkPatternError
(
r'[a'
,
'unterminated character set'
,
0
)
# bug 545855 -- This pattern failed to cause a compile error as it
# should, instead provoking a TypeError.
self
.
checkPatternError
(
r"[a-"
,
'unterminated character set'
,
0
)
self
.
checkPatternError
(
r"[\w-b]"
,
r'bad character range \w-b'
,
1
)
self
.
checkPatternError
(
r"[a-\w]"
,
r'bad character range a-\w'
,
1
)
self
.
checkPatternError
(
r"[b-a]"
,
'bad character range b-a'
,
1
)
def
test_bug_113254
(
self
):
def
test_bug_113254
(
self
):
self
.
assertEqual
(
re
.
match
(
r'(a)|(b)'
,
'b'
)
.
start
(
1
),
-
1
)
self
.
assertEqual
(
re
.
match
(
r'(a)|(b)'
,
'b'
)
.
start
(
1
),
-
1
)
...
@@ -963,11 +1042,6 @@ class ReTests(unittest.TestCase):
...
@@ -963,11 +1042,6 @@ class ReTests(unittest.TestCase):
self
.
assertEqual
(
re
.
match
(
"(?P<a>a(b))"
,
"ab"
)
.
lastgroup
,
'a'
)
self
.
assertEqual
(
re
.
match
(
"(?P<a>a(b))"
,
"ab"
)
.
lastgroup
,
'a'
)
self
.
assertEqual
(
re
.
match
(
"((a))"
,
"a"
)
.
lastindex
,
1
)
self
.
assertEqual
(
re
.
match
(
"((a))"
,
"a"
)
.
lastindex
,
1
)
def
test_bug_545855
(
self
):
# bug 545855 -- This pattern failed to cause a compile error as it
# should, instead provoking a TypeError.
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'foo[a-'
)
def
test_bug_418626
(
self
):
def
test_bug_418626
(
self
):
# bugs 418626 at al. -- Testing Greg Chapman's addition of op code
# bugs 418626 at al. -- Testing Greg Chapman's addition of op code
# SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
# SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
...
@@ -991,6 +1065,24 @@ class ReTests(unittest.TestCase):
...
@@ -991,6 +1065,24 @@ class ReTests(unittest.TestCase):
self
.
assertEqual
(
re
.
match
(
'(x)*y'
,
50000
*
'x'
+
'y'
)
.
group
(
1
),
'x'
)
self
.
assertEqual
(
re
.
match
(
'(x)*y'
,
50000
*
'x'
+
'y'
)
.
group
(
1
),
'x'
)
self
.
assertEqual
(
re
.
match
(
'(x)*?y'
,
50000
*
'x'
+
'y'
)
.
group
(
1
),
'x'
)
self
.
assertEqual
(
re
.
match
(
'(x)*?y'
,
50000
*
'x'
+
'y'
)
.
group
(
1
),
'x'
)
def
test_nothing_to_repeat
(
self
):
for
reps
in
'*'
,
'+'
,
'?'
,
'{1,2}'
:
for
mod
in
''
,
'?'
:
self
.
checkPatternError
(
'
%
s
%
s'
%
(
reps
,
mod
),
'nothing to repeat'
,
0
)
self
.
checkPatternError
(
'(?:
%
s
%
s)'
%
(
reps
,
mod
),
'nothing to repeat'
,
3
)
def
test_multiple_repeat
(
self
):
for
outer_reps
in
'*'
,
'+'
,
'{1,2}'
:
for
outer_mod
in
''
,
'?'
:
outer_op
=
outer_reps
+
outer_mod
for
inner_reps
in
'*'
,
'+'
,
'?'
,
'{1,2}'
:
for
inner_mod
in
''
,
'?'
:
inner_op
=
inner_reps
+
inner_mod
self
.
checkPatternError
(
r'x
%
s
%
s'
%
(
inner_op
,
outer_op
),
'multiple repeat'
,
1
+
len
(
inner_op
))
def
test_unlimited_zero_width_repeat
(
self
):
def
test_unlimited_zero_width_repeat
(
self
):
# Issue #9669
# Issue #9669
self
.
assertIsNone
(
re
.
match
(
r'(?:a?)*y'
,
'z'
))
self
.
assertIsNone
(
re
.
match
(
r'(?:a?)*y'
,
'z'
))
...
@@ -1381,13 +1473,13 @@ class ReTests(unittest.TestCase):
...
@@ -1381,13 +1473,13 @@ class ReTests(unittest.TestCase):
def
test_backref_group_name_in_exception
(
self
):
def
test_backref_group_name_in_exception
(
self
):
# Issue 17341: Poor error message when compiling invalid regex
# Issue 17341: Poor error message when compiling invalid regex
with
self
.
assertRaisesRegex
(
sre_constants
.
error
,
'<foo>'
):
self
.
checkPatternError
(
'(?P=<foo>)'
,
re
.
compile
(
'(?P=<foo>)'
)
"bad character in group name '<foo>'"
,
4
)
def
test_group_name_in_exception
(
self
):
def
test_group_name_in_exception
(
self
):
# Issue 17341: Poor error message when compiling invalid regex
# Issue 17341: Poor error message when compiling invalid regex
with
self
.
assertRaisesRegex
(
sre_constants
.
error
,
'
\
?foo'
):
self
.
checkPatternError
(
'(?P<?foo>)'
,
re
.
compile
(
'(?P<?foo>)'
)
"bad character in group name '?foo'"
,
4
)
def
test_issue17998
(
self
):
def
test_issue17998
(
self
):
for
reps
in
'*'
,
'+'
,
'?'
,
'{1}'
:
for
reps
in
'*'
,
'+'
,
'?'
,
'{1}'
:
...
@@ -1556,6 +1648,19 @@ SUBPATTERN None
...
@@ -1556,6 +1648,19 @@ SUBPATTERN None
self
.
assertIn
(
' at position 77'
,
str
(
err
))
self
.
assertIn
(
' at position 77'
,
str
(
err
))
self
.
assertIn
(
'(line 5, column 17)'
,
str
(
err
))
self
.
assertIn
(
'(line 5, column 17)'
,
str
(
err
))
def
test_misc_errors
(
self
):
self
.
checkPatternError
(
r'('
,
'missing ), unterminated subpattern'
,
0
)
self
.
checkPatternError
(
r'((a|b)'
,
'missing ), unterminated subpattern'
,
0
)
self
.
checkPatternError
(
r'(a|b))'
,
'unbalanced parenthesis'
,
5
)
self
.
checkPatternError
(
r'(?P'
,
'unexpected end of pattern'
,
3
)
self
.
checkPatternError
(
r'(?z)'
,
'unknown extension ?z'
,
1
)
self
.
checkPatternError
(
r'(?iz)'
,
'unknown flag'
,
3
)
self
.
checkPatternError
(
r'(?i'
,
'missing )'
,
3
)
self
.
checkPatternError
(
r'(?#abc'
,
'missing ), unterminated comment'
,
0
)
self
.
checkPatternError
(
r'(?<'
,
'unexpected end of pattern'
,
3
)
self
.
checkPatternError
(
r'(?<>)'
,
'unknown extension ?<>'
,
1
)
self
.
checkPatternError
(
r'(?'
,
'unexpected end of pattern'
,
2
)
class
PatternReprTests
(
unittest
.
TestCase
):
class
PatternReprTests
(
unittest
.
TestCase
):
def
check
(
self
,
pattern
,
expected
):
def
check
(
self
,
pattern
,
expected
):
...
...
Misc/NEWS
Dosyayı görüntüle @
632a77e6
...
@@ -30,6 +30,8 @@ Core and Builtins
...
@@ -30,6 +30,8 @@ Core and Builtins
Library
Library
-------
-------
-
Issue
#
22364
:
Improved
some
re
error
messages
using
regex
for
hints
.
-
Issue
#
23742
:
ntpath
.
expandvars
()
no
longer
loses
unbalanced
single
quotes
.
-
Issue
#
23742
:
ntpath
.
expandvars
()
no
longer
loses
unbalanced
single
quotes
.
-
Issue
#
21717
:
The
zipfile
.
ZipFile
.
open
function
now
supports
'x'
(
exclusive
-
Issue
#
21717
:
The
zipfile
.
ZipFile
.
open
function
now
supports
'x'
(
exclusive
...
...
Modules/_sre.c
Dosyayı görüntüle @
632a77e6
...
@@ -315,7 +315,7 @@ getstring(PyObject* string, Py_ssize_t* p_length,
...
@@ -315,7 +315,7 @@ getstring(PyObject* string, Py_ssize_t* p_length,
/* get pointer to byte string buffer */
/* get pointer to byte string buffer */
if
(
PyObject_GetBuffer
(
string
,
view
,
PyBUF_SIMPLE
)
!=
0
)
{
if
(
PyObject_GetBuffer
(
string
,
view
,
PyBUF_SIMPLE
)
!=
0
)
{
PyErr_SetString
(
PyExc_TypeError
,
"expected string or b
uffer
"
);
PyErr_SetString
(
PyExc_TypeError
,
"expected string or b
ytes-like object
"
);
return
NULL
;
return
NULL
;
}
}
...
@@ -359,12 +359,12 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
...
@@ -359,12 +359,12 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
if
(
isbytes
&&
pattern
->
isbytes
==
0
)
{
if
(
isbytes
&&
pattern
->
isbytes
==
0
)
{
PyErr_SetString
(
PyExc_TypeError
,
PyErr_SetString
(
PyExc_TypeError
,
"can
'
t use a string pattern on a bytes-like object"
);
"can
no
t use a string pattern on a bytes-like object"
);
goto
err
;
goto
err
;
}
}
if
(
!
isbytes
&&
pattern
->
isbytes
>
0
)
{
if
(
!
isbytes
&&
pattern
->
isbytes
>
0
)
{
PyErr_SetString
(
PyExc_TypeError
,
PyErr_SetString
(
PyExc_TypeError
,
"can
'
t use a bytes pattern on a string-like object"
);
"can
no
t use a bytes pattern on a string-like object"
);
goto
err
;
goto
err
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment