Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
632a77e6
Kaydet (Commit)
632a77e6
authored
Mar 25, 2015
tarafından
Serhiy Storchaka
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
Issue #22364: Improved some re error messages using regex for hints.
üst
7c316a18
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
285 additions
and
181 deletions
+285
-181
re.py
Lib/re.py
+1
-1
sre_compile.py
Lib/sre_compile.py
+3
-3
sre_parse.py
Lib/sre_parse.py
+97
-100
test_re.py
Lib/test/test_re.py
+179
-74
NEWS
Misc/NEWS
+2
-0
_sre.c
Modules/_sre.c
+3
-3
No files found.
Lib/re.py
Dosyayı görüntüle @
632a77e6
...
...
@@ -286,7 +286,7 @@ def _compile(pattern, flags):
if
isinstance
(
pattern
,
_pattern_type
):
if
flags
:
raise
ValueError
(
"
C
annot process flags argument with a compiled pattern"
)
"
c
annot process flags argument with a compiled pattern"
)
return
pattern
if
not
sre_compile
.
isstring
(
pattern
):
raise
TypeError
(
"first argument must be string or compiled pattern"
)
...
...
Lib/sre_compile.py
Dosyayı görüntüle @
632a77e6
...
...
@@ -113,7 +113,7 @@ def _compile(code, pattern, flags):
emit
(
ANY
)
elif
op
in
REPEATING_CODES
:
if
flags
&
SRE_FLAG_TEMPLATE
:
raise
error
(
"internal: unsupported template operator
"
)
raise
error
(
"internal: unsupported template operator
%
r"
%
(
op
,)
)
elif
_simple
(
av
)
and
op
is
not
REPEAT
:
if
op
is
MAX_REPEAT
:
emit
(
REPEAT_ONE
)
...
...
@@ -216,7 +216,7 @@ def _compile(code, pattern, flags):
else
:
code
[
skipyes
]
=
_len
(
code
)
-
skipyes
+
1
else
:
raise
ValueError
(
"unsupported operand type"
,
op
)
raise
error
(
"internal: unsupported operand type
%
r"
%
(
op
,)
)
def
_compile_charset
(
charset
,
flags
,
code
,
fixup
=
None
,
fixes
=
None
):
# compile charset subprogram
...
...
@@ -242,7 +242,7 @@ def _compile_charset(charset, flags, code, fixup=None, fixes=None):
else
:
emit
(
av
)
else
:
raise
error
(
"internal: unsupported set operator
"
)
raise
error
(
"internal: unsupported set operator
%
r"
%
(
op
,)
)
emit
(
FAILURE
)
def
_optimize_charset
(
charset
,
fixup
,
fixes
):
...
...
Lib/sre_parse.py
Dosyayı görüntüle @
632a77e6
...
...
@@ -79,7 +79,7 @@ class Pattern:
gid
=
self
.
groups
self
.
subpatterns
.
append
(
None
)
if
self
.
groups
>
MAXGROUPS
:
raise
error
(
"
groups number is too large
"
)
raise
error
(
"
too many groups
"
)
if
name
is
not
None
:
ogid
=
self
.
groupdict
.
get
(
name
,
None
)
if
ogid
is
not
None
:
...
...
@@ -235,7 +235,7 @@ class Tokenizer:
try
:
char
+=
self
.
decoded_string
[
index
]
except
IndexError
:
raise
error
(
"b
ogus escape (end of line
)"
,
raise
error
(
"b
ad escape (end of pattern
)"
,
self
.
string
,
len
(
self
.
string
)
-
1
)
from
None
self
.
index
=
index
+
1
self
.
next
=
char
...
...
@@ -263,8 +263,13 @@ class Tokenizer:
c
=
self
.
next
self
.
__next
()
if
c
is
None
:
raise
self
.
error
(
"unterminated name"
)
if
not
result
:
raise
self
.
error
(
"missing group name"
)
raise
self
.
error
(
"missing
%
s, unterminated name"
%
terminator
,
len
(
result
))
if
c
==
terminator
:
if
not
result
:
raise
self
.
error
(
"missing group name"
,
1
)
break
result
+=
c
return
result
...
...
@@ -318,19 +323,19 @@ def _class_escape(source, escape):
# hexadecimal escape (exactly two digits)
escape
+=
source
.
getwhile
(
2
,
HEXDIGITS
)
if
len
(
escape
)
!=
4
:
raise
ValueError
raise
source
.
error
(
"incomplete escape
%
s"
%
escape
,
len
(
escape
))
return
LITERAL
,
int
(
escape
[
2
:],
16
)
elif
c
==
"u"
and
source
.
istext
:
# unicode escape (exactly four digits)
escape
+=
source
.
getwhile
(
4
,
HEXDIGITS
)
if
len
(
escape
)
!=
6
:
raise
ValueError
raise
source
.
error
(
"incomplete escape
%
s"
%
escape
,
len
(
escape
))
return
LITERAL
,
int
(
escape
[
2
:],
16
)
elif
c
==
"U"
and
source
.
istext
:
# unicode escape (exactly eight digits)
escape
+=
source
.
getwhile
(
8
,
HEXDIGITS
)
if
len
(
escape
)
!=
10
:
raise
ValueError
raise
source
.
error
(
"incomplete escape
%
s"
%
escape
,
len
(
escape
))
c
=
int
(
escape
[
2
:],
16
)
chr
(
c
)
# raise ValueError for invalid code
return
LITERAL
,
c
...
...
@@ -339,7 +344,7 @@ def _class_escape(source, escape):
escape
+=
source
.
getwhile
(
2
,
OCTDIGITS
)
c
=
int
(
escape
[
1
:],
8
)
if
c
>
0
o377
:
raise
source
.
error
(
'octal escape value
%
r
outside of '
raise
source
.
error
(
'octal escape value
%
s
outside of '
'range 0-0o377'
%
escape
,
len
(
escape
))
return
LITERAL
,
c
elif
c
in
DIGITS
:
...
...
@@ -352,7 +357,7 @@ def _class_escape(source, escape):
return
LITERAL
,
ord
(
escape
[
1
])
except
ValueError
:
pass
raise
source
.
error
(
"b
ogus escape:
%
r
"
%
escape
,
len
(
escape
))
raise
source
.
error
(
"b
ad escape
%
s
"
%
escape
,
len
(
escape
))
def
_escape
(
source
,
escape
,
state
):
# handle escape code in expression
...
...
@@ -368,19 +373,19 @@ def _escape(source, escape, state):
# hexadecimal escape
escape
+=
source
.
getwhile
(
2
,
HEXDIGITS
)
if
len
(
escape
)
!=
4
:
raise
ValueError
raise
source
.
error
(
"incomplete escape
%
s"
%
escape
,
len
(
escape
))
return
LITERAL
,
int
(
escape
[
2
:],
16
)
elif
c
==
"u"
and
source
.
istext
:
# unicode escape (exactly four digits)
escape
+=
source
.
getwhile
(
4
,
HEXDIGITS
)
if
len
(
escape
)
!=
6
:
raise
ValueError
raise
source
.
error
(
"incomplete escape
%
s"
%
escape
,
len
(
escape
))
return
LITERAL
,
int
(
escape
[
2
:],
16
)
elif
c
==
"U"
and
source
.
istext
:
# unicode escape (exactly eight digits)
escape
+=
source
.
getwhile
(
8
,
HEXDIGITS
)
if
len
(
escape
)
!=
10
:
raise
ValueError
raise
source
.
error
(
"incomplete escape
%
s"
%
escape
,
len
(
escape
))
c
=
int
(
escape
[
2
:],
16
)
chr
(
c
)
# raise ValueError for invalid code
return
LITERAL
,
c
...
...
@@ -398,7 +403,7 @@ def _escape(source, escape, state):
escape
+=
source
.
get
()
c
=
int
(
escape
[
1
:],
8
)
if
c
>
0
o377
:
raise
source
.
error
(
'octal escape value
%
r
outside of '
raise
source
.
error
(
'octal escape value
%
s
outside of '
'range 0-0o377'
%
escape
,
len
(
escape
))
return
LITERAL
,
c
...
...
@@ -406,11 +411,11 @@ def _escape(source, escape, state):
group
=
int
(
escape
[
1
:])
if
group
<
state
.
groups
:
if
not
state
.
checkgroup
(
group
):
raise
source
.
error
(
"cannot refer to open group"
,
raise
source
.
error
(
"cannot refer to
an
open group"
,
len
(
escape
))
state
.
checklookbehindgroup
(
group
,
source
)
return
GROUPREF
,
group
raise
ValueError
raise
source
.
error
(
"invalid group reference"
,
len
(
escape
))
if
len
(
escape
)
==
2
:
if
c
in
ASCIILETTERS
:
import
warnings
...
...
@@ -419,7 +424,7 @@ def _escape(source, escape, state):
return
LITERAL
,
ord
(
escape
[
1
])
except
ValueError
:
pass
raise
source
.
error
(
"b
ogus escape:
%
r
"
%
escape
,
len
(
escape
))
raise
source
.
error
(
"b
ad escape
%
s
"
%
escape
,
len
(
escape
))
def
_parse_sub
(
source
,
state
,
nested
=
True
):
# parse an alternation: a|b|c
...
...
@@ -427,12 +432,11 @@ def _parse_sub(source, state, nested=True):
items
=
[]
itemsappend
=
items
.
append
sourcematch
=
source
.
match
start
=
source
.
tell
()
while
True
:
itemsappend
(
_parse
(
source
,
state
))
if
not
sourcematch
(
"|"
):
break
if
nested
and
source
.
next
is
not
None
and
source
.
next
!=
")"
:
raise
source
.
error
(
"pattern not properly closed"
)
if
len
(
items
)
==
1
:
return
items
[
0
]
...
...
@@ -480,8 +484,6 @@ def _parse_sub_cond(source, state, condgroup):
raise
source
.
error
(
"conditional backref with more than two branches"
)
else
:
item_no
=
None
if
source
.
next
is
not
None
and
source
.
next
!=
")"
:
raise
source
.
error
(
"pattern not properly closed"
)
subpattern
=
SubPattern
(
state
)
subpattern
.
append
((
GROUPREF_EXISTS
,
(
condgroup
,
item_yes
,
item_no
)))
return
subpattern
...
...
@@ -526,6 +528,7 @@ def _parse(source, state):
subpatternappend
((
LITERAL
,
_ord
(
this
)))
elif
this
==
"["
:
here
=
source
.
tell
()
-
1
# character set
set
=
[]
setappend
=
set
.
append
...
...
@@ -538,7 +541,8 @@ def _parse(source, state):
while
True
:
this
=
sourceget
()
if
this
is
None
:
raise
source
.
error
(
"unexpected end of regular expression"
)
raise
source
.
error
(
"unterminated character set"
,
source
.
tell
()
-
here
)
if
this
==
"]"
and
set
!=
start
:
break
elif
this
[
0
]
==
"
\\
"
:
...
...
@@ -547,25 +551,28 @@ def _parse(source, state):
code1
=
LITERAL
,
_ord
(
this
)
if
sourcematch
(
"-"
):
# potential range
this
=
sourceget
()
if
this
is
None
:
raise
source
.
error
(
"unexpected end of regular expression"
)
if
this
==
"]"
:
that
=
sourceget
()
if
that
is
None
:
raise
source
.
error
(
"unterminated character set"
,
source
.
tell
()
-
here
)
if
that
==
"]"
:
if
code1
[
0
]
is
IN
:
code1
=
code1
[
1
][
0
]
setappend
(
code1
)
setappend
((
LITERAL
,
_ord
(
"-"
)))
break
if
th
is
[
0
]
==
"
\\
"
:
code2
=
_class_escape
(
source
,
th
is
)
if
th
at
[
0
]
==
"
\\
"
:
code2
=
_class_escape
(
source
,
th
at
)
else
:
code2
=
LITERAL
,
_ord
(
th
is
)
code2
=
LITERAL
,
_ord
(
th
at
)
if
code1
[
0
]
!=
LITERAL
or
code2
[
0
]
!=
LITERAL
:
raise
source
.
error
(
"bad character range"
,
len
(
this
))
msg
=
"bad character range
%
s-
%
s"
%
(
this
,
that
)
raise
source
.
error
(
msg
,
len
(
this
)
+
1
+
len
(
that
))
lo
=
code1
[
1
]
hi
=
code2
[
1
]
if
hi
<
lo
:
raise
source
.
error
(
"bad character range"
,
len
(
this
))
msg
=
"bad character range
%
s-
%
s"
%
(
this
,
that
)
raise
source
.
error
(
msg
,
len
(
this
)
+
1
+
len
(
that
))
setappend
((
RANGE
,
(
lo
,
hi
)))
else
:
if
code1
[
0
]
is
IN
:
...
...
@@ -617,10 +624,10 @@ def _parse(source, state):
if
max
>=
MAXREPEAT
:
raise
OverflowError
(
"the repetition number is too large"
)
if
max
<
min
:
raise
source
.
error
(
"
bad repeat interval
"
,
raise
source
.
error
(
"
min repeat greater than max repeat
"
,
source
.
tell
()
-
here
)
else
:
raise
source
.
error
(
"not supported"
,
len
(
this
))
raise
AssertionError
(
"unsupported quantifier
%
r"
%
(
char
,
))
# figure out which item to repeat
if
subpattern
:
item
=
subpattern
[
-
1
:]
...
...
@@ -641,39 +648,32 @@ def _parse(source, state):
subpatternappend
((
ANY
,
None
))
elif
this
==
"("
:
group
=
1
start
=
source
.
tell
()
-
1
group
=
True
name
=
None
condgroup
=
None
if
sourcematch
(
"?"
):
group
=
0
# options
char
=
sourceget
()
if
char
is
None
:
raise
s
elf
.
error
(
"unexpected end of pattern"
)
raise
s
ource
.
error
(
"unexpected end of pattern"
)
if
char
==
"P"
:
# python extensions
if
sourcematch
(
"<"
):
# named group: skip forward to end of name
name
=
source
.
getuntil
(
">"
)
group
=
1
if
not
name
:
raise
source
.
error
(
"missing group name"
,
1
)
if
not
name
.
isidentifier
():
raise
source
.
error
(
"bad character in group name "
"
%
r"
%
name
,
len
(
name
)
+
1
)
msg
=
"bad character in group name
%
r"
%
name
raise
source
.
error
(
msg
,
len
(
name
)
+
1
)
elif
sourcematch
(
"="
):
# named backreference
name
=
source
.
getuntil
(
")"
)
if
not
name
:
raise
source
.
error
(
"missing group name"
,
1
)
if
not
name
.
isidentifier
():
raise
source
.
error
(
"bad character in backref "
"group name
%
r"
%
name
,
len
(
name
)
+
1
)
msg
=
"bad character in group name
%
r"
%
name
raise
source
.
error
(
msg
,
len
(
name
)
+
1
)
gid
=
state
.
groupdict
.
get
(
name
)
if
gid
is
None
:
msg
=
"unknown group name
: {0!r}"
.
format
(
name
)
msg
=
"unknown group name
%
r"
%
name
raise
source
.
error
(
msg
,
len
(
name
)
+
1
)
state
.
checklookbehindgroup
(
gid
,
source
)
subpatternappend
((
GROUPREF
,
gid
))
...
...
@@ -682,16 +682,17 @@ def _parse(source, state):
char
=
sourceget
()
if
char
is
None
:
raise
source
.
error
(
"unexpected end of pattern"
)
raise
source
.
error
(
"unknown
specifier: ?P
%
s"
%
char
,
len
(
char
))
raise
source
.
error
(
"unknown
extension ?P"
+
char
,
len
(
char
)
+
2
)
elif
char
==
":"
:
# non-capturing group
group
=
2
group
=
None
elif
char
==
"#"
:
# comment
while
True
:
if
source
.
next
is
None
:
raise
source
.
error
(
"unbalanced parenthesis"
)
raise
source
.
error
(
"missing ), unterminated comment"
,
source
.
tell
()
-
start
)
if
sourceget
()
==
")"
:
break
continue
...
...
@@ -700,8 +701,11 @@ def _parse(source, state):
dir
=
1
if
char
==
"<"
:
char
=
sourceget
()
if
char
is
None
or
char
not
in
"=!"
:
raise
source
.
error
(
"syntax error"
)
if
char
is
None
:
raise
source
.
error
(
"unexpected end of pattern"
)
if
char
not
in
"=!"
:
raise
source
.
error
(
"unknown extension ?<"
+
char
,
len
(
char
)
+
2
)
dir
=
-
1
# lookbehind
lookbehindgroups
=
state
.
lookbehindgroups
if
lookbehindgroups
is
None
:
...
...
@@ -711,7 +715,8 @@ def _parse(source, state):
if
lookbehindgroups
is
None
:
state
.
lookbehindgroups
=
None
if
not
sourcematch
(
")"
):
raise
source
.
error
(
"unbalanced parenthesis"
)
raise
source
.
error
(
"missing ), unterminated subpattern"
,
source
.
tell
()
-
start
)
if
char
==
"="
:
subpatternappend
((
ASSERT
,
(
dir
,
p
)))
else
:
...
...
@@ -720,13 +725,11 @@ def _parse(source, state):
elif
char
==
"("
:
# conditional backreference group
condname
=
source
.
getuntil
(
")"
)
group
=
2
if
not
condname
:
raise
source
.
error
(
"missing group name"
,
1
)
group
=
None
if
condname
.
isidentifier
():
condgroup
=
state
.
groupdict
.
get
(
condname
)
if
condgroup
is
None
:
msg
=
"unknown group name
: {0!r}"
.
format
(
condname
)
msg
=
"unknown group name
%
r"
%
condname
raise
source
.
error
(
msg
,
len
(
condname
)
+
1
)
else
:
try
:
...
...
@@ -734,50 +737,48 @@ def _parse(source, state):
if
condgroup
<
0
:
raise
ValueError
except
ValueError
:
raise
source
.
error
(
"bad character in group name"
,
len
(
condname
)
+
1
)
msg
=
"bad character in group name
%
r"
%
condname
raise
source
.
error
(
msg
,
len
(
condname
)
+
1
)
from
None
if
not
condgroup
:
raise
source
.
error
(
"bad group number"
,
len
(
condname
)
+
1
)
if
condgroup
>=
MAXGROUPS
:
raise
source
.
error
(
"
the group number is too larg
e"
,
raise
source
.
error
(
"
invalid group referenc
e"
,
len
(
condname
)
+
1
)
state
.
checklookbehindgroup
(
condgroup
,
source
)
elif
char
in
FLAGS
:
# flags
while
True
:
state
.
flags
|=
FLAGS
[
char
]
while
source
.
next
in
FLAGS
:
state
.
flags
|=
FLAGS
[
sourceget
()]
char
=
sourceget
()
if
char
is
None
:
raise
source
.
error
(
"missing )"
)
if
char
==
")"
:
break
if
char
not
in
FLAGS
:
raise
source
.
error
(
"unknown flag"
,
len
(
char
))
verbose
=
state
.
flags
&
SRE_FLAG_VERBOSE
continue
else
:
raise
source
.
error
(
"unexpected end of pattern"
)
if
group
:
raise
source
.
error
(
"unknown extension ?"
+
char
,
len
(
char
)
+
1
)
# parse group contents
if
group
==
2
:
# anonymous group
group
=
None
else
:
if
group
is
not
None
:
try
:
group
=
state
.
opengroup
(
name
)
except
error
as
err
:
raise
source
.
error
(
err
.
msg
,
len
(
name
)
+
1
)
raise
source
.
error
(
err
.
msg
,
len
(
name
)
+
1
)
from
None
if
condgroup
:
p
=
_parse_sub_cond
(
source
,
state
,
condgroup
)
else
:
p
=
_parse_sub
(
source
,
state
)
if
not
sourcematch
(
")"
):
raise
source
.
error
(
"unbalanced parenthesis"
)
if
not
source
.
match
(
")"
):
raise
source
.
error
(
"missing ), unterminated subpattern"
,
source
.
tell
()
-
start
)
if
group
is
not
None
:
state
.
closegroup
(
group
,
p
)
subpatternappend
((
SUBPATTERN
,
(
group
,
p
)))
else
:
while
True
:
char
=
sourceget
()
if
char
is
None
:
raise
source
.
error
(
"unexpected end of pattern"
)
if
char
==
")"
:
break
raise
source
.
error
(
"unknown extension"
,
len
(
char
))
elif
this
==
"^"
:
subpatternappend
((
AT
,
AT_BEGINNING
))
...
...
@@ -786,7 +787,7 @@ def _parse(source, state):
subpattern
.
append
((
AT
,
AT_END
))
else
:
raise
source
.
error
(
"parser error"
,
len
(
this
))
raise
AssertionError
(
"unsupported special character
%
r"
%
(
char
,
))
return
subpattern
...
...
@@ -804,7 +805,7 @@ def fix_flags(src, flags):
raise
ValueError
(
"ASCII and UNICODE flags are incompatible"
)
else
:
if
flags
&
SRE_FLAG_UNICODE
:
raise
ValueError
(
"can
'
t use UNICODE flag with a bytes pattern"
)
raise
ValueError
(
"can
no
t use UNICODE flag with a bytes pattern"
)
if
flags
&
SRE_FLAG_LOCALE
and
flags
&
SRE_FLAG_ASCII
:
import
warnings
warnings
.
warn
(
"ASCII and LOCALE flags are incompatible. "
...
...
@@ -826,11 +827,8 @@ def parse(str, flags=0, pattern=None):
p
.
pattern
.
flags
=
fix_flags
(
str
,
p
.
pattern
.
flags
)
if
source
.
next
is
not
None
:
if
source
.
next
==
")"
:
assert
source
.
next
==
")"
raise
source
.
error
(
"unbalanced parenthesis"
)
else
:
raise
source
.
error
(
"bogus characters at end of regular expression"
,
len
(
tail
))
if
flags
&
SRE_FLAG_DEBUG
:
p
.
dump
()
...
...
@@ -866,26 +864,25 @@ def parse_template(source, pattern):
c
=
this
[
1
]
if
c
==
"g"
:
name
=
""
if
s
.
match
(
"<"
):
if
not
s
.
match
(
"<"
):
raise
s
.
error
(
"missing <"
)
name
=
s
.
getuntil
(
">"
)
if
not
name
:
raise
s
.
error
(
"missing group name"
,
1
)
if
name
.
isidentifier
():
try
:
index
=
pattern
.
groupindex
[
name
]
except
KeyError
:
raise
IndexError
(
"unknown group name
%
r"
%
name
)
else
:
try
:
index
=
int
(
name
)
if
index
<
0
:
raise
s
.
error
(
"negative group number"
,
len
(
name
)
+
1
)
if
index
>=
MAXGROUPS
:
raise
s
.
error
(
"the group number is too large"
,
len
(
name
)
+
1
)
raise
ValueError
except
ValueError
:
if
not
name
.
isidentifier
():
raise
s
.
error
(
"bad character in group name"
,
raise
s
.
error
(
"bad character in group name
%
r"
%
name
,
len
(
name
)
+
1
)
from
None
if
index
>=
MAXGROUPS
:
raise
s
.
error
(
"invalid group reference"
,
len
(
name
)
+
1
)
try
:
index
=
pattern
.
groupindex
[
name
]
except
KeyError
:
msg
=
"unknown group name: {0!r}"
.
format
(
name
)
raise
IndexError
(
msg
)
addgroup
(
index
)
elif
c
==
"0"
:
if
s
.
next
in
OCTDIGITS
:
...
...
@@ -903,7 +900,7 @@ def parse_template(source, pattern):
isoctal
=
True
c
=
int
(
this
[
1
:],
8
)
if
c
>
0
o377
:
raise
s
.
error
(
'octal escape value
%
r
outside of '
raise
s
.
error
(
'octal escape value
%
s
outside of '
'range 0-0o377'
%
this
,
len
(
this
))
lappend
(
chr
(
c
))
if
not
isoctal
:
...
...
Lib/test/test_re.py
Dosyayı görüntüle @
632a77e6
...
...
@@ -38,6 +38,24 @@ class ReTests(unittest.TestCase):
self
.
assertIs
(
type
(
actual
),
type
(
expect
),
msg
)
recurse
(
actual
,
expect
)
def
checkPatternError
(
self
,
pattern
,
errmsg
,
pos
=
None
):
with
self
.
assertRaises
(
re
.
error
)
as
cm
:
re
.
compile
(
pattern
)
with
self
.
subTest
(
pattern
=
pattern
):
err
=
cm
.
exception
self
.
assertEqual
(
err
.
msg
,
errmsg
)
if
pos
is
not
None
:
self
.
assertEqual
(
err
.
pos
,
pos
)
def
checkTemplateError
(
self
,
pattern
,
repl
,
string
,
errmsg
,
pos
=
None
):
with
self
.
assertRaises
(
re
.
error
)
as
cm
:
re
.
sub
(
pattern
,
repl
,
string
)
with
self
.
subTest
(
pattern
=
pattern
,
repl
=
repl
):
err
=
cm
.
exception
self
.
assertEqual
(
err
.
msg
,
errmsg
)
if
pos
is
not
None
:
self
.
assertEqual
(
err
.
pos
,
pos
)
def
test_keep_buffer
(
self
):
# See bug 14212
b
=
bytearray
(
b
'x'
)
...
...
@@ -148,6 +166,7 @@ class ReTests(unittest.TestCase):
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\009'
,
'x'
),
'
\0
'
+
'9'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\111'
,
'x'
),
'
\111
'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\117'
,
'x'
),
'
\117
'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\377'
,
'x'
),
'
\377
'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\1111'
,
'x'
),
'
\111
1'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\1111'
,
'x'
),
'
\111
'
+
'1'
)
...
...
@@ -158,21 +177,25 @@ class ReTests(unittest.TestCase):
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\09'
,
'x'
),
'
\0
'
+
'9'
)
self
.
assertEqual
(
re
.
sub
(
'x'
,
r'\0a'
,
'x'
),
'
\0
'
+
'a'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\400'
,
'x'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\777'
,
'x'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\1'
,
'x'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\8'
,
'x'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\9'
,
'x'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\11'
,
'x'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\18'
,
'x'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\1a'
,
'x'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\90'
,
'x'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\99'
,
'x'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\118'
,
'x'
)
# r'\11' + '8'
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\11a'
,
'x'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\181'
,
'x'
)
# r'\18' + '1'
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'x'
,
r'\800'
,
'x'
)
# r'\80' + '0'
self
.
checkTemplateError
(
'x'
,
r'\400'
,
'x'
,
r'octal escape value \400 outside of '
r'range 0-0o377'
,
0
)
self
.
checkTemplateError
(
'x'
,
r'\777'
,
'x'
,
r'octal escape value \777 outside of '
r'range 0-0o377'
,
0
)
self
.
checkTemplateError
(
'x'
,
r'\1'
,
'x'
,
'invalid group reference'
)
self
.
checkTemplateError
(
'x'
,
r'\8'
,
'x'
,
'invalid group reference'
)
self
.
checkTemplateError
(
'x'
,
r'\9'
,
'x'
,
'invalid group reference'
)
self
.
checkTemplateError
(
'x'
,
r'\11'
,
'x'
,
'invalid group reference'
)
self
.
checkTemplateError
(
'x'
,
r'\18'
,
'x'
,
'invalid group reference'
)
self
.
checkTemplateError
(
'x'
,
r'\1a'
,
'x'
,
'invalid group reference'
)
self
.
checkTemplateError
(
'x'
,
r'\90'
,
'x'
,
'invalid group reference'
)
self
.
checkTemplateError
(
'x'
,
r'\99'
,
'x'
,
'invalid group reference'
)
self
.
checkTemplateError
(
'x'
,
r'\118'
,
'x'
,
'invalid group reference'
)
# r'\11' + '8'
self
.
checkTemplateError
(
'x'
,
r'\11a'
,
'x'
,
'invalid group reference'
)
self
.
checkTemplateError
(
'x'
,
r'\181'
,
'x'
,
'invalid group reference'
)
# r'\18' + '1'
self
.
checkTemplateError
(
'x'
,
r'\800'
,
'x'
,
'invalid group reference'
)
# r'\80' + '0'
# in python2.3 (etc), these loop endlessly in sre_parser.py
self
.
assertEqual
(
re
.
sub
(
'(((((((((((x)))))))))))'
,
r'\11'
,
'x'
),
'x'
)
...
...
@@ -198,47 +221,65 @@ class ReTests(unittest.TestCase):
re
.
compile
(
'(?P<a>x)(?P=a)(?(a)y)'
)
re
.
compile
(
'(?P<a1>x)(?P=a1)(?(a1)y)'
)
re
.
compile
(
'(?P<a1>x)
\1
(?(1)y)'
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?P<a>)(?P<a>)'
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?Px)'
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?P=)'
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?P=1)'
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?P=a)'
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?P=a1)'
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?P=a.)'
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?P<)'
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?P<>)'
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?P<1>)'
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?P<a.>)'
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?())'
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?(a))'
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?(1a))'
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?(a.))'
)
self
.
checkPatternError
(
'(?P<a>)(?P<a>)'
,
"redefinition of group name 'a' as group 2; "
"was group 1"
)
self
.
checkPatternError
(
'(?Pxy)'
,
'unknown extension ?Px'
)
self
.
checkPatternError
(
'(?P<a>)(?P=a'
,
'missing ), unterminated name'
,
11
)
self
.
checkPatternError
(
'(?P='
,
'missing group name'
,
4
)
self
.
checkPatternError
(
'(?P=)'
,
'missing group name'
,
4
)
self
.
checkPatternError
(
'(?P=1)'
,
"bad character in group name '1'"
,
4
)
self
.
checkPatternError
(
'(?P=a)'
,
"unknown group name 'a'"
)
self
.
checkPatternError
(
'(?P=a1)'
,
"unknown group name 'a1'"
)
self
.
checkPatternError
(
'(?P=a.)'
,
"bad character in group name 'a.'"
,
4
)
self
.
checkPatternError
(
'(?P<)'
,
'missing >, unterminated name'
,
4
)
self
.
checkPatternError
(
'(?P<a'
,
'missing >, unterminated name'
,
4
)
self
.
checkPatternError
(
'(?P<'
,
'missing group name'
,
4
)
self
.
checkPatternError
(
'(?P<>)'
,
'missing group name'
,
4
)
self
.
checkPatternError
(
r'(?P<1>)'
,
"bad character in group name '1'"
,
4
)
self
.
checkPatternError
(
r'(?P<a.>)'
,
"bad character in group name 'a.'"
,
4
)
self
.
checkPatternError
(
r'(?('
,
'missing group name'
,
3
)
self
.
checkPatternError
(
r'(?())'
,
'missing group name'
,
3
)
self
.
checkPatternError
(
r'(?(a))'
,
"unknown group name 'a'"
,
3
)
self
.
checkPatternError
(
r'(?(-1))'
,
"bad character in group name '-1'"
,
3
)
self
.
checkPatternError
(
r'(?(1a))'
,
"bad character in group name '1a'"
,
3
)
self
.
checkPatternError
(
r'(?(a.))'
,
"bad character in group name 'a.'"
,
3
)
# New valid/invalid identifiers in Python 3
re
.
compile
(
'(?P<µ>x)(?P=µ)(?(µ)y)'
)
re
.
compile
(
'(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)'
)
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'(?P<©>x)'
)
self
.
checkPatternError
(
'(?P<©>x)'
,
"bad character in group name '©'"
,
4
)
# Support > 100 groups.
pat
=
'|'
.
join
(
'x(?P<a
%
d>
%
x)y'
%
(
i
,
i
)
for
i
in
range
(
1
,
200
+
1
))
pat
=
'(?:
%
s)(?(200)z|t)'
%
pat
self
.
assertEqual
(
re
.
match
(
pat
,
'xc8yz'
)
.
span
(),
(
0
,
5
))
def
test_symbolic_refs
(
self
):
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'(?P<a>x)'
,
'
\
g<a'
,
'xx'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'(?P<a>x)'
,
'
\
g<'
,
'xx'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'(?P<a>x)'
,
'
\
g'
,
'xx'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'(?P<a>x)'
,
'
\
g<a a>'
,
'xx'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'(?P<a>x)'
,
'
\
g<>'
,
'xx'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'(?P<a>x)'
,
'
\
g<1a1>'
,
'xx'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'(?P<a>x)'
,
r'\g<2>'
,
'xx'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'(?P<a>x)'
,
r'\2'
,
'xx'
)
self
.
assertRaises
(
IndexError
,
re
.
sub
,
'(?P<a>x)'
,
'
\
g<ab>'
,
'xx'
)
self
.
checkTemplateError
(
'(?P<a>x)'
,
'
\
g<a'
,
'xx'
,
'missing >, unterminated name'
,
3
)
self
.
checkTemplateError
(
'(?P<a>x)'
,
'
\
g<'
,
'xx'
,
'missing group name'
,
3
)
self
.
checkTemplateError
(
'(?P<a>x)'
,
'
\
g'
,
'xx'
,
'missing <'
,
2
)
self
.
checkTemplateError
(
'(?P<a>x)'
,
'
\
g<a a>'
,
'xx'
,
"bad character in group name 'a a'"
,
3
)
self
.
checkTemplateError
(
'(?P<a>x)'
,
'
\
g<>'
,
'xx'
,
'missing group name'
,
3
)
self
.
checkTemplateError
(
'(?P<a>x)'
,
'
\
g<1a1>'
,
'xx'
,
"bad character in group name '1a1'"
,
3
)
self
.
checkTemplateError
(
'(?P<a>x)'
,
r'\g<2>'
,
'xx'
,
'invalid group reference'
)
self
.
checkTemplateError
(
'(?P<a>x)'
,
r'\2'
,
'xx'
,
'invalid group reference'
)
with
self
.
assertRaisesRegex
(
IndexError
,
"unknown group name 'ab'"
):
re
.
sub
(
'(?P<a>x)'
,
'
\
g<ab>'
,
'xx'
)
self
.
assertEqual
(
re
.
sub
(
'(?P<a>x)|(?P<b>y)'
,
r'\g<b>'
,
'xx'
),
''
)
self
.
assertEqual
(
re
.
sub
(
'(?P<a>x)|(?P<b>y)'
,
r'\2'
,
'xx'
),
''
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'(?P<a>x)'
,
'
\
g<-1>'
,
'xx'
)
self
.
checkTemplateError
(
'(?P<a>x)'
,
'
\
g<-1>'
,
'xx'
,
"bad character in group name '-1'"
,
3
)
# New valid/invalid identifiers in Python 3
self
.
assertEqual
(
re
.
sub
(
'(?P<µ>x)'
,
r'\g<µ>'
,
'xx'
),
'xx'
)
self
.
assertEqual
(
re
.
sub
(
'(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)'
,
r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>'
,
'xx'
),
'xx'
)
self
.
assertRaises
(
re
.
error
,
re
.
sub
,
'(?P<a>x)'
,
r'\g<©>'
,
'xx'
)
self
.
checkTemplateError
(
'(?P<a>x)'
,
'
\
g<©>'
,
'xx'
,
"bad character in group name '©'"
,
3
)
# Support > 100 groups.
pat
=
'|'
.
join
(
'x(?P<a
%
d>
%
x)y'
%
(
i
,
i
)
for
i
in
range
(
1
,
200
+
1
))
self
.
assertEqual
(
re
.
sub
(
pat
,
'
\
g<200>'
,
'xc8yzxc8y'
),
'c8zc8'
)
...
...
@@ -444,6 +485,19 @@ class ReTests(unittest.TestCase):
pat
=
'(?:
%
s)(?(200)z)'
%
pat
self
.
assertEqual
(
re
.
match
(
pat
,
'xc8yz'
)
.
span
(),
(
0
,
5
))
self
.
checkPatternError
(
r'(?P<a>)(?(0))'
,
'bad group number'
,
10
)
self
.
checkPatternError
(
r'()(?(1)a|b'
,
'missing ), unterminated subpattern'
,
2
)
self
.
checkPatternError
(
r'()(?(1)a|b|c)'
,
'conditional backref with more than '
'two branches'
,
10
)
def
test_re_groupref_overflow
(
self
):
self
.
checkTemplateError
(
'()'
,
'
\
g<
%
s>'
%
sre_constants
.
MAXGROUPS
,
'xx'
,
'invalid group reference'
,
3
)
self
.
checkPatternError
(
r'(?P<a>)(?(
%
d))'
%
sre_constants
.
MAXGROUPS
,
'invalid group reference'
,
10
)
def
test_re_groupref
(
self
):
self
.
assertEqual
(
re
.
match
(
r'^(\|)?([^()]+)\1$'
,
'|a|'
)
.
groups
(),
(
'|'
,
'a'
))
...
...
@@ -456,6 +510,8 @@ class ReTests(unittest.TestCase):
self
.
assertEqual
(
re
.
match
(
r'^(?:(a)|c)(\1)?$'
,
'c'
)
.
groups
(),
(
None
,
None
))
self
.
checkPatternError
(
r'(abc\1)'
,
'cannot refer to an open group'
,
4
)
def
test_groupdict
(
self
):
self
.
assertEqual
(
re
.
match
(
'(?P<first>first) (?P<second>second)'
,
'first second'
)
.
groupdict
(),
...
...
@@ -493,6 +549,7 @@ class ReTests(unittest.TestCase):
self
.
assertTrue
(
re
.
match
(
"^x{3}$"
,
"xxx"
))
self
.
assertTrue
(
re
.
match
(
"^x{1,3}$"
,
"xxx"
))
self
.
assertTrue
(
re
.
match
(
"^x{3,3}$"
,
"xxx"
))
self
.
assertTrue
(
re
.
match
(
"^x{1,4}$"
,
"xxx"
))
self
.
assertTrue
(
re
.
match
(
"^x{3,4}?$"
,
"xxx"
))
self
.
assertTrue
(
re
.
match
(
"^x{3}?$"
,
"xxx"
))
...
...
@@ -503,6 +560,9 @@ class ReTests(unittest.TestCase):
self
.
assertIsNone
(
re
.
match
(
"^x{}$"
,
"xxx"
))
self
.
assertTrue
(
re
.
match
(
"^x{}$"
,
"x{}"
))
self
.
checkPatternError
(
r'x{2,1}'
,
'min repeat greater than max repeat'
,
2
)
def
test_getattr
(
self
):
self
.
assertEqual
(
re
.
compile
(
"(?i)(a)(b)"
)
.
pattern
,
"(?i)(a)(b)"
)
self
.
assertEqual
(
re
.
compile
(
"(?i)(a)(b)"
)
.
flags
,
re
.
I
|
re
.
U
)
...
...
@@ -550,7 +610,7 @@ class ReTests(unittest.TestCase):
b
"1aa! a"
,
re
.
LOCALE
)
.
group
(
0
),
b
"1aa! a"
)
def
test_other_escapes
(
self
):
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
"
\\
"
)
self
.
checkPatternError
(
"
\\
"
,
'bad escape (end of pattern)'
,
0
)
self
.
assertEqual
(
re
.
match
(
r"\("
,
'('
)
.
group
(),
'('
)
self
.
assertIsNone
(
re
.
match
(
r"\("
,
')'
))
self
.
assertEqual
(
re
.
match
(
r"\\"
,
'
\\
'
)
.
group
(),
'
\\
'
)
...
...
@@ -875,15 +935,17 @@ class ReTests(unittest.TestCase):
self
.
assertTrue
(
re
.
match
(
r"\08"
,
"
\000
8"
))
self
.
assertTrue
(
re
.
match
(
r"\01"
,
"
\001
"
))
self
.
assertTrue
(
re
.
match
(
r"\018"
,
"
\001
8"
))
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"\567"
,
""
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"\911"
,
""
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"\x1"
,
""
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"\x1z"
,
""
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"\u123"
,
""
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"\u123z"
,
""
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"\U0001234"
,
""
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"\U0001234z"
,
""
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"\U00110000"
,
""
)
self
.
checkPatternError
(
r"\567"
,
r'octal escape value \567 outside of '
r'range 0-0o377'
,
0
)
self
.
checkPatternError
(
r"\911"
,
'invalid group reference'
,
0
)
self
.
checkPatternError
(
r"\x1"
,
r'incomplete escape \x1'
,
0
)
self
.
checkPatternError
(
r"\x1z"
,
r'incomplete escape \x1'
,
0
)
self
.
checkPatternError
(
r"\u123"
,
r'incomplete escape \u123'
,
0
)
self
.
checkPatternError
(
r"\u123z"
,
r'incomplete escape \u123'
,
0
)
self
.
checkPatternError
(
r"\U0001234"
,
r'incomplete escape \U0001234'
,
0
)
self
.
checkPatternError
(
r"\U0001234z"
,
r'incomplete escape \U0001234'
,
0
)
self
.
checkPatternError
(
r"\U00110000"
,
r'bad escape \U00110000'
,
0
)
def
test_sre_character_class_literals
(
self
):
for
i
in
[
0
,
8
,
16
,
32
,
64
,
127
,
128
,
255
,
256
,
0xFFFF
,
0x10000
,
0x10FFFF
]:
...
...
@@ -903,12 +965,14 @@ class ReTests(unittest.TestCase):
self
.
assertTrue
(
re
.
match
(
r"[\U
%08
x]"
%
i
,
chr
(
i
)))
self
.
assertTrue
(
re
.
match
(
r"[\U
%08
x0]"
%
i
,
chr
(
i
)
+
"0"
))
self
.
assertTrue
(
re
.
match
(
r"[\U
%08
xz]"
%
i
,
chr
(
i
)
+
"z"
))
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"[\567]"
,
""
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"[\911]"
,
""
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"[\x1z]"
,
""
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"[\u123z]"
,
""
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"[\U0001234z]"
,
""
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
r"[\U00110000]"
,
""
)
self
.
checkPatternError
(
r"[\567]"
,
r'octal escape value \567 outside of '
r'range 0-0o377'
,
1
)
self
.
checkPatternError
(
r"[\911]"
,
r'bad escape \9'
,
1
)
self
.
checkPatternError
(
r"[\x1z]"
,
r'incomplete escape \x1'
,
1
)
self
.
checkPatternError
(
r"[\u123z]"
,
r'incomplete escape \u123'
,
1
)
self
.
checkPatternError
(
r"[\U0001234z]"
,
r'incomplete escape \U0001234'
,
1
)
self
.
checkPatternError
(
r"[\U00110000]"
,
r'bad escape \U00110000'
,
1
)
self
.
assertTrue
(
re
.
match
(
r"[\U0001d49c-\U0001d4b5]"
,
"
\U0001d49e
"
))
def
test_sre_byte_literals
(
self
):
...
...
@@ -927,10 +991,12 @@ class ReTests(unittest.TestCase):
self
.
assertTrue
(
re
.
match
(
br
"
\0
8"
,
b
"
\000
8"
))
self
.
assertTrue
(
re
.
match
(
br
"
\01
"
,
b
"
\001
"
))
self
.
assertTrue
(
re
.
match
(
br
"
\01
8"
,
b
"
\001
8"
))
self
.
assertRaises
(
re
.
error
,
re
.
match
,
br
"
\567
"
,
b
""
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
br
"
\
911"
,
b
""
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
br
"
\
x1"
,
b
""
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
br
"
\
x1z"
,
b
""
)
self
.
checkPatternError
(
br
"
\567
"
,
r'octal escape value \567 outside of '
r'range 0-0o377'
,
0
)
self
.
checkPatternError
(
br
"
\
911"
,
'invalid group reference'
,
0
)
self
.
checkPatternError
(
br
"
\
x1"
,
r'incomplete escape \x1'
,
0
)
self
.
checkPatternError
(
br
"
\
x1z"
,
r'incomplete escape \x1'
,
0
)
def
test_sre_byte_class_literals
(
self
):
for
i
in
[
0
,
8
,
16
,
32
,
64
,
127
,
128
,
255
]:
...
...
@@ -946,9 +1012,22 @@ class ReTests(unittest.TestCase):
self
.
assertTrue
(
re
.
match
(
br
"[
\u1234
]"
,
b
'u'
))
with
self
.
assertWarns
(
DeprecationWarning
):
self
.
assertTrue
(
re
.
match
(
br
"[
\U00012345
]"
,
b
'U'
))
self
.
assertRaises
(
re
.
error
,
re
.
match
,
br
"[
\567
]"
,
b
""
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
br
"[
\
911]"
,
b
""
)
self
.
assertRaises
(
re
.
error
,
re
.
match
,
br
"[
\
x1z]"
,
b
""
)
self
.
checkPatternError
(
br
"[
\567
]"
,
r'octal escape value \567 outside of '
r'range 0-0o377'
,
1
)
self
.
checkPatternError
(
br
"[
\
911]"
,
r'bad escape \9'
,
1
)
self
.
checkPatternError
(
br
"[
\
x1z]"
,
r'incomplete escape \x1'
,
1
)
def
test_character_set_errors
(
self
):
self
.
checkPatternError
(
r'['
,
'unterminated character set'
,
0
)
self
.
checkPatternError
(
r'[^'
,
'unterminated character set'
,
0
)
self
.
checkPatternError
(
r'[a'
,
'unterminated character set'
,
0
)
# bug 545855 -- This pattern failed to cause a compile error as it
# should, instead provoking a TypeError.
self
.
checkPatternError
(
r"[a-"
,
'unterminated character set'
,
0
)
self
.
checkPatternError
(
r"[\w-b]"
,
r'bad character range \w-b'
,
1
)
self
.
checkPatternError
(
r"[a-\w]"
,
r'bad character range a-\w'
,
1
)
self
.
checkPatternError
(
r"[b-a]"
,
'bad character range b-a'
,
1
)
def
test_bug_113254
(
self
):
self
.
assertEqual
(
re
.
match
(
r'(a)|(b)'
,
'b'
)
.
start
(
1
),
-
1
)
...
...
@@ -963,11 +1042,6 @@ class ReTests(unittest.TestCase):
self
.
assertEqual
(
re
.
match
(
"(?P<a>a(b))"
,
"ab"
)
.
lastgroup
,
'a'
)
self
.
assertEqual
(
re
.
match
(
"((a))"
,
"a"
)
.
lastindex
,
1
)
def
test_bug_545855
(
self
):
# bug 545855 -- This pattern failed to cause a compile error as it
# should, instead provoking a TypeError.
self
.
assertRaises
(
re
.
error
,
re
.
compile
,
'foo[a-'
)
def
test_bug_418626
(
self
):
# bugs 418626 at al. -- Testing Greg Chapman's addition of op code
# SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
...
...
@@ -991,6 +1065,24 @@ class ReTests(unittest.TestCase):
self
.
assertEqual
(
re
.
match
(
'(x)*y'
,
50000
*
'x'
+
'y'
)
.
group
(
1
),
'x'
)
self
.
assertEqual
(
re
.
match
(
'(x)*?y'
,
50000
*
'x'
+
'y'
)
.
group
(
1
),
'x'
)
def
test_nothing_to_repeat
(
self
):
for
reps
in
'*'
,
'+'
,
'?'
,
'{1,2}'
:
for
mod
in
''
,
'?'
:
self
.
checkPatternError
(
'
%
s
%
s'
%
(
reps
,
mod
),
'nothing to repeat'
,
0
)
self
.
checkPatternError
(
'(?:
%
s
%
s)'
%
(
reps
,
mod
),
'nothing to repeat'
,
3
)
def
test_multiple_repeat
(
self
):
for
outer_reps
in
'*'
,
'+'
,
'{1,2}'
:
for
outer_mod
in
''
,
'?'
:
outer_op
=
outer_reps
+
outer_mod
for
inner_reps
in
'*'
,
'+'
,
'?'
,
'{1,2}'
:
for
inner_mod
in
''
,
'?'
:
inner_op
=
inner_reps
+
inner_mod
self
.
checkPatternError
(
r'x
%
s
%
s'
%
(
inner_op
,
outer_op
),
'multiple repeat'
,
1
+
len
(
inner_op
))
def
test_unlimited_zero_width_repeat
(
self
):
# Issue #9669
self
.
assertIsNone
(
re
.
match
(
r'(?:a?)*y'
,
'z'
))
...
...
@@ -1381,13 +1473,13 @@ class ReTests(unittest.TestCase):
def
test_backref_group_name_in_exception
(
self
):
# Issue 17341: Poor error message when compiling invalid regex
with
self
.
assertRaisesRegex
(
sre_constants
.
error
,
'<foo>'
):
re
.
compile
(
'(?P=<foo>)'
)
self
.
checkPatternError
(
'(?P=<foo>)'
,
"bad character in group name '<foo>'"
,
4
)
def
test_group_name_in_exception
(
self
):
# Issue 17341: Poor error message when compiling invalid regex
with
self
.
assertRaisesRegex
(
sre_constants
.
error
,
'
\
?foo'
):
re
.
compile
(
'(?P<?foo>)'
)
self
.
checkPatternError
(
'(?P<?foo>)'
,
"bad character in group name '?foo'"
,
4
)
def
test_issue17998
(
self
):
for
reps
in
'*'
,
'+'
,
'?'
,
'{1}'
:
...
...
@@ -1556,6 +1648,19 @@ SUBPATTERN None
self
.
assertIn
(
' at position 77'
,
str
(
err
))
self
.
assertIn
(
'(line 5, column 17)'
,
str
(
err
))
def
test_misc_errors
(
self
):
self
.
checkPatternError
(
r'('
,
'missing ), unterminated subpattern'
,
0
)
self
.
checkPatternError
(
r'((a|b)'
,
'missing ), unterminated subpattern'
,
0
)
self
.
checkPatternError
(
r'(a|b))'
,
'unbalanced parenthesis'
,
5
)
self
.
checkPatternError
(
r'(?P'
,
'unexpected end of pattern'
,
3
)
self
.
checkPatternError
(
r'(?z)'
,
'unknown extension ?z'
,
1
)
self
.
checkPatternError
(
r'(?iz)'
,
'unknown flag'
,
3
)
self
.
checkPatternError
(
r'(?i'
,
'missing )'
,
3
)
self
.
checkPatternError
(
r'(?#abc'
,
'missing ), unterminated comment'
,
0
)
self
.
checkPatternError
(
r'(?<'
,
'unexpected end of pattern'
,
3
)
self
.
checkPatternError
(
r'(?<>)'
,
'unknown extension ?<>'
,
1
)
self
.
checkPatternError
(
r'(?'
,
'unexpected end of pattern'
,
2
)
class
PatternReprTests
(
unittest
.
TestCase
):
def
check
(
self
,
pattern
,
expected
):
...
...
Misc/NEWS
Dosyayı görüntüle @
632a77e6
...
...
@@ -30,6 +30,8 @@ Core and Builtins
Library
-------
-
Issue
#
22364
:
Improved
some
re
error
messages
using
regex
for
hints
.
-
Issue
#
23742
:
ntpath
.
expandvars
()
no
longer
loses
unbalanced
single
quotes
.
-
Issue
#
21717
:
The
zipfile
.
ZipFile
.
open
function
now
supports
'x'
(
exclusive
...
...
Modules/_sre.c
Dosyayı görüntüle @
632a77e6
...
...
@@ -315,7 +315,7 @@ getstring(PyObject* string, Py_ssize_t* p_length,
/* get pointer to byte string buffer */
if
(
PyObject_GetBuffer
(
string
,
view
,
PyBUF_SIMPLE
)
!=
0
)
{
PyErr_SetString
(
PyExc_TypeError
,
"expected string or b
uffer
"
);
PyErr_SetString
(
PyExc_TypeError
,
"expected string or b
ytes-like object
"
);
return
NULL
;
}
...
...
@@ -359,12 +359,12 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
if
(
isbytes
&&
pattern
->
isbytes
==
0
)
{
PyErr_SetString
(
PyExc_TypeError
,
"can
'
t use a string pattern on a bytes-like object"
);
"can
no
t use a string pattern on a bytes-like object"
);
goto
err
;
}
if
(
!
isbytes
&&
pattern
->
isbytes
>
0
)
{
PyErr_SetString
(
PyExc_TypeError
,
"can
'
t use a bytes pattern on a string-like object"
);
"can
no
t use a bytes pattern on a string-like object"
);
goto
err
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment