Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
ad446d57
Kaydet (Commit)
ad446d57
authored
Kas 10, 2014
tarafından
Serhiy Storchaka
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
Issue #22578: Added attributes to the re.error class.
üst
eb99e515
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
173 additions
and
61 deletions
+173
-61
re.rst
Doc/library/re.rst
+25
-2
sre_constants.py
Lib/sre_constants.py
+29
-1
sre_parse.py
Lib/sre_parse.py
+81
-58
test_re.py
Lib/test/test_re.py
+36
-0
NEWS
Misc/NEWS
+2
-0
No files found.
Doc/library/re.rst
Dosyayı görüntüle @
ad446d57
...
@@ -733,13 +733,36 @@ form.
...
@@ -733,13 +733,36 @@ form.
Clear the regular expression cache.
Clear the regular expression cache.
.. exception:: error
.. exception:: error
(msg, pattern=None, pos=None)
Exception raised when a string passed to one of the functions here is not a
Exception raised when a string passed to one of the functions here is not a
valid regular expression (for example, it might contain unmatched parentheses)
valid regular expression (for example, it might contain unmatched parentheses)
or when some other error occurs during compilation or matching. It is never an
or when some other error occurs during compilation or matching. It is never an
error if a string contains no match for a pattern.
error if a string contains no match for a pattern. The error instance has
the following additional attributes:
.. attribute:: msg
The unformatted error message.
.. attribute:: pattern
The regular expression pattern.
.. attribute:: pos
The index of *pattern* where compilation failed.
.. attribute:: lineno
The line corresponding to *pos*.
.. attribute:: colno
The column corresponding to *pos*.
.. versionchanged:: 3.5
Added additional attributes.
.. _re-objects:
.. _re-objects:
...
...
Lib/sre_constants.py
Dosyayı görüntüle @
ad446d57
...
@@ -21,7 +21,35 @@ from _sre import MAXREPEAT, MAXGROUPS
...
@@ -21,7 +21,35 @@ from _sre import MAXREPEAT, MAXGROUPS
# should this really be here?
# should this really be here?
class
error
(
Exception
):
class
error
(
Exception
):
pass
def
__init__
(
self
,
msg
,
pattern
=
None
,
pos
=
None
):
self
.
msg
=
msg
self
.
pattern
=
pattern
self
.
pos
=
pos
if
pattern
is
not
None
and
pos
is
not
None
:
msg
=
'
%
s at position
%
d'
%
(
msg
,
pos
)
if
isinstance
(
pattern
,
str
):
newline
=
'
\n
'
else
:
newline
=
b
'
\n
'
self
.
lineno
=
pattern
.
count
(
newline
,
0
,
pos
)
+
1
self
.
colno
=
pos
-
pattern
.
rfind
(
newline
,
0
,
pos
)
if
newline
in
pattern
:
msg
=
'
%
s (line
%
d, column
%
d)'
%
(
msg
,
self
.
lineno
,
self
.
colno
)
else
:
self
.
lineno
=
self
.
colno
=
None
super
()
.
__init__
(
msg
)
def
linecol
(
doc
,
pos
):
if
isinstance
(
pattern
,
str
):
newline
=
'
\n
'
else
:
newline
=
b
'
\n
'
lineno
=
pattern
.
count
(
newline
,
0
,
pos
)
+
1
if
lineno
==
1
:
colno
=
pos
+
1
else
:
colno
=
pos
-
doc
.
rindex
(
newline
,
0
,
pos
)
return
lineno
,
colno
class
_NamedIntConstant
(
int
):
class
_NamedIntConstant
(
int
):
...
...
Lib/sre_parse.py
Dosyayı görüntüle @
ad446d57
...
@@ -81,8 +81,8 @@ class Pattern:
...
@@ -81,8 +81,8 @@ class Pattern:
if
name
is
not
None
:
if
name
is
not
None
:
ogid
=
self
.
groupdict
.
get
(
name
,
None
)
ogid
=
self
.
groupdict
.
get
(
name
,
None
)
if
ogid
is
not
None
:
if
ogid
is
not
None
:
raise
error
(
"redefinition of group name
%
s
as group
%
d; "
raise
error
(
"redefinition of group name
%
r
as group
%
d; "
"was group
%
d"
%
(
repr
(
name
)
,
gid
,
ogid
))
"was group
%
d"
%
(
name
,
gid
,
ogid
))
self
.
groupdict
[
name
]
=
gid
self
.
groupdict
[
name
]
=
gid
return
gid
return
gid
def
closegroup
(
self
,
gid
,
p
):
def
closegroup
(
self
,
gid
,
p
):
...
@@ -206,24 +206,25 @@ class SubPattern:
...
@@ -206,24 +206,25 @@ class SubPattern:
class
Tokenizer
:
class
Tokenizer
:
def
__init__
(
self
,
string
):
def
__init__
(
self
,
string
):
self
.
istext
=
isinstance
(
string
,
str
)
self
.
istext
=
isinstance
(
string
,
str
)
self
.
string
=
string
if
not
self
.
istext
:
if
not
self
.
istext
:
string
=
str
(
string
,
'latin1'
)
string
=
str
(
string
,
'latin1'
)
self
.
string
=
string
self
.
decoded_
string
=
string
self
.
index
=
0
self
.
index
=
0
self
.
__next
()
self
.
__next
()
def
__next
(
self
):
def
__next
(
self
):
index
=
self
.
index
index
=
self
.
index
try
:
try
:
char
=
self
.
string
[
index
]
char
=
self
.
decoded_
string
[
index
]
except
IndexError
:
except
IndexError
:
self
.
next
=
None
self
.
next
=
None
return
return
if
char
==
"
\\
"
:
if
char
==
"
\\
"
:
index
+=
1
index
+=
1
try
:
try
:
char
+=
self
.
string
[
index
]
char
+=
self
.
decoded_
string
[
index
]
except
IndexError
:
except
IndexError
:
raise
error
(
"bogus escape (end of line)"
)
raise
self
.
error
(
"bogus escape (end of line)"
)
from
None
self
.
index
=
index
+
1
self
.
index
=
index
+
1
self
.
next
=
char
self
.
next
=
char
def
match
(
self
,
char
):
def
match
(
self
,
char
):
...
@@ -250,15 +251,19 @@ class Tokenizer:
...
@@ -250,15 +251,19 @@ class Tokenizer:
c
=
self
.
next
c
=
self
.
next
self
.
__next
()
self
.
__next
()
if
c
is
None
:
if
c
is
None
:
raise
error
(
"unterminated name"
)
raise
self
.
error
(
"unterminated name"
)
if
c
==
terminator
:
if
c
==
terminator
:
break
break
result
+=
c
result
+=
c
return
result
return
result
def
tell
(
self
):
def
tell
(
self
):
return
self
.
index
,
self
.
next
return
self
.
index
-
len
(
self
.
next
or
''
)
def
seek
(
self
,
index
):
def
seek
(
self
,
index
):
self
.
index
,
self
.
next
=
index
self
.
index
=
index
self
.
__next
()
def
error
(
self
,
msg
,
offset
=
0
):
return
error
(
msg
,
self
.
string
,
self
.
tell
()
-
offset
)
# The following three functions are not used in this module anymore, but we keep
# The following three functions are not used in this module anymore, but we keep
# them here (with DeprecationWarnings) for backwards compatibility.
# them here (with DeprecationWarnings) for backwards compatibility.
...
@@ -322,8 +327,8 @@ def _class_escape(source, escape):
...
@@ -322,8 +327,8 @@ def _class_escape(source, escape):
escape
+=
source
.
getwhile
(
2
,
OCTDIGITS
)
escape
+=
source
.
getwhile
(
2
,
OCTDIGITS
)
c
=
int
(
escape
[
1
:],
8
)
c
=
int
(
escape
[
1
:],
8
)
if
c
>
0
o377
:
if
c
>
0
o377
:
raise
error
(
'octal escape value
%
r outside of '
raise
source
.
error
(
'octal escape value
%
r outside of '
'range 0-0o377'
%
escape
)
'range 0-0o377'
%
escape
,
len
(
escape
)
)
return
LITERAL
,
c
return
LITERAL
,
c
elif
c
in
DIGITS
:
elif
c
in
DIGITS
:
raise
ValueError
raise
ValueError
...
@@ -331,7 +336,7 @@ def _class_escape(source, escape):
...
@@ -331,7 +336,7 @@ def _class_escape(source, escape):
return
LITERAL
,
ord
(
escape
[
1
])
return
LITERAL
,
ord
(
escape
[
1
])
except
ValueError
:
except
ValueError
:
pass
pass
raise
error
(
"bogus escape:
%
s"
%
repr
(
escape
))
raise
source
.
error
(
"bogus escape:
%
r"
%
escape
,
len
(
escape
))
def
_escape
(
source
,
escape
,
state
):
def
_escape
(
source
,
escape
,
state
):
# handle escape code in expression
# handle escape code in expression
...
@@ -377,21 +382,23 @@ def _escape(source, escape, state):
...
@@ -377,21 +382,23 @@ def _escape(source, escape, state):
escape
+=
source
.
get
()
escape
+=
source
.
get
()
c
=
int
(
escape
[
1
:],
8
)
c
=
int
(
escape
[
1
:],
8
)
if
c
>
0
o377
:
if
c
>
0
o377
:
raise
error
(
'octal escape value
%
r outside of '
raise
source
.
error
(
'octal escape value
%
r outside of '
'range 0-0o377'
%
escape
)
'range 0-0o377'
%
escape
,
len
(
escape
))
return
LITERAL
,
c
return
LITERAL
,
c
# not an octal escape, so this is a group reference
# not an octal escape, so this is a group reference
group
=
int
(
escape
[
1
:])
group
=
int
(
escape
[
1
:])
if
group
<
state
.
groups
:
if
group
<
state
.
groups
:
if
not
state
.
checkgroup
(
group
):
if
not
state
.
checkgroup
(
group
):
raise
error
(
"cannot refer to open group"
)
raise
source
.
error
(
"cannot refer to open group"
,
len
(
escape
))
return
GROUPREF
,
group
return
GROUPREF
,
group
raise
ValueError
raise
ValueError
if
len
(
escape
)
==
2
:
if
len
(
escape
)
==
2
:
return
LITERAL
,
ord
(
escape
[
1
])
return
LITERAL
,
ord
(
escape
[
1
])
except
ValueError
:
except
ValueError
:
pass
pass
raise
error
(
"bogus escape:
%
s"
%
repr
(
escape
))
raise
source
.
error
(
"bogus escape:
%
r"
%
escape
,
len
(
escape
))
def
_parse_sub
(
source
,
state
,
nested
=
True
):
def
_parse_sub
(
source
,
state
,
nested
=
True
):
# parse an alternation: a|b|c
# parse an alternation: a|b|c
...
@@ -404,7 +411,7 @@ def _parse_sub(source, state, nested=True):
...
@@ -404,7 +411,7 @@ def _parse_sub(source, state, nested=True):
if
not
sourcematch
(
"|"
):
if
not
sourcematch
(
"|"
):
break
break
if
nested
and
source
.
next
is
not
None
and
source
.
next
!=
")"
:
if
nested
and
source
.
next
is
not
None
and
source
.
next
!=
")"
:
raise
error
(
"pattern not properly closed"
)
raise
source
.
error
(
"pattern not properly closed"
)
if
len
(
items
)
==
1
:
if
len
(
items
)
==
1
:
return
items
[
0
]
return
items
[
0
]
...
@@ -449,11 +456,11 @@ def _parse_sub_cond(source, state, condgroup):
...
@@ -449,11 +456,11 @@ def _parse_sub_cond(source, state, condgroup):
if
source
.
match
(
"|"
):
if
source
.
match
(
"|"
):
item_no
=
_parse
(
source
,
state
)
item_no
=
_parse
(
source
,
state
)
if
source
.
next
==
"|"
:
if
source
.
next
==
"|"
:
raise
error
(
"conditional backref with more than two branches"
)
raise
source
.
error
(
"conditional backref with more than two branches"
)
else
:
else
:
item_no
=
None
item_no
=
None
if
source
.
next
is
not
None
and
source
.
next
!=
")"
:
if
source
.
next
is
not
None
and
source
.
next
!=
")"
:
raise
error
(
"pattern not properly closed"
)
raise
source
.
error
(
"pattern not properly closed"
)
subpattern
=
SubPattern
(
state
)
subpattern
=
SubPattern
(
state
)
subpattern
.
append
((
GROUPREF_EXISTS
,
(
condgroup
,
item_yes
,
item_no
)))
subpattern
.
append
((
GROUPREF_EXISTS
,
(
condgroup
,
item_yes
,
item_no
)))
return
subpattern
return
subpattern
...
@@ -510,7 +517,7 @@ def _parse(source, state):
...
@@ -510,7 +517,7 @@ def _parse(source, state):
while
True
:
while
True
:
this
=
sourceget
()
this
=
sourceget
()
if
this
is
None
:
if
this
is
None
:
raise
error
(
"unexpected end of regular expression"
)
raise
source
.
error
(
"unexpected end of regular expression"
)
if
this
==
"]"
and
set
!=
start
:
if
this
==
"]"
and
set
!=
start
:
break
break
elif
this
[
0
]
==
"
\\
"
:
elif
this
[
0
]
==
"
\\
"
:
...
@@ -521,7 +528,7 @@ def _parse(source, state):
...
@@ -521,7 +528,7 @@ def _parse(source, state):
# potential range
# potential range
this
=
sourceget
()
this
=
sourceget
()
if
this
is
None
:
if
this
is
None
:
raise
error
(
"unexpected end of regular expression"
)
raise
source
.
error
(
"unexpected end of regular expression"
)
if
this
==
"]"
:
if
this
==
"]"
:
if
code1
[
0
]
is
IN
:
if
code1
[
0
]
is
IN
:
code1
=
code1
[
1
][
0
]
code1
=
code1
[
1
][
0
]
...
@@ -533,11 +540,11 @@ def _parse(source, state):
...
@@ -533,11 +540,11 @@ def _parse(source, state):
else
:
else
:
code2
=
LITERAL
,
_ord
(
this
)
code2
=
LITERAL
,
_ord
(
this
)
if
code1
[
0
]
!=
LITERAL
or
code2
[
0
]
!=
LITERAL
:
if
code1
[
0
]
!=
LITERAL
or
code2
[
0
]
!=
LITERAL
:
raise
error
(
"bad character range"
)
raise
source
.
error
(
"bad character range"
,
len
(
this
)
)
lo
=
code1
[
1
]
lo
=
code1
[
1
]
hi
=
code2
[
1
]
hi
=
code2
[
1
]
if
hi
<
lo
:
if
hi
<
lo
:
raise
error
(
"bad character range"
)
raise
source
.
error
(
"bad character range"
,
len
(
this
)
)
setappend
((
RANGE
,
(
lo
,
hi
)))
setappend
((
RANGE
,
(
lo
,
hi
)))
else
:
else
:
if
code1
[
0
]
is
IN
:
if
code1
[
0
]
is
IN
:
...
@@ -555,6 +562,7 @@ def _parse(source, state):
...
@@ -555,6 +562,7 @@ def _parse(source, state):
elif
this
in
REPEAT_CHARS
:
elif
this
in
REPEAT_CHARS
:
# repeat previous item
# repeat previous item
here
=
source
.
tell
()
if
this
==
"?"
:
if
this
==
"?"
:
min
,
max
=
0
,
1
min
,
max
=
0
,
1
elif
this
==
"*"
:
elif
this
==
"*"
:
...
@@ -566,7 +574,6 @@ def _parse(source, state):
...
@@ -566,7 +574,6 @@ def _parse(source, state):
if
source
.
next
==
"}"
:
if
source
.
next
==
"}"
:
subpatternappend
((
LITERAL
,
_ord
(
this
)))
subpatternappend
((
LITERAL
,
_ord
(
this
)))
continue
continue
here
=
source
.
tell
()
min
,
max
=
0
,
MAXREPEAT
min
,
max
=
0
,
MAXREPEAT
lo
=
hi
=
""
lo
=
hi
=
""
while
source
.
next
in
DIGITS
:
while
source
.
next
in
DIGITS
:
...
@@ -589,18 +596,21 @@ def _parse(source, state):
...
@@ -589,18 +596,21 @@ def _parse(source, state):
if
max
>=
MAXREPEAT
:
if
max
>=
MAXREPEAT
:
raise
OverflowError
(
"the repetition number is too large"
)
raise
OverflowError
(
"the repetition number is too large"
)
if
max
<
min
:
if
max
<
min
:
raise
error
(
"bad repeat interval"
)
raise
source
.
error
(
"bad repeat interval"
,
source
.
tell
()
-
here
)
else
:
else
:
raise
error
(
"not supported"
)
raise
source
.
error
(
"not supported"
,
len
(
this
)
)
# figure out which item to repeat
# figure out which item to repeat
if
subpattern
:
if
subpattern
:
item
=
subpattern
[
-
1
:]
item
=
subpattern
[
-
1
:]
else
:
else
:
item
=
None
item
=
None
if
not
item
or
(
_len
(
item
)
==
1
and
item
[
0
][
0
]
==
AT
):
if
not
item
or
(
_len
(
item
)
==
1
and
item
[
0
][
0
]
==
AT
):
raise
error
(
"nothing to repeat"
)
raise
source
.
error
(
"nothing to repeat"
,
source
.
tell
()
-
here
+
len
(
this
))
if
item
[
0
][
0
]
in
_REPEATCODES
:
if
item
[
0
][
0
]
in
_REPEATCODES
:
raise
error
(
"multiple repeat"
)
raise
source
.
error
(
"multiple repeat"
,
source
.
tell
()
-
here
+
len
(
this
))
if
sourcematch
(
"?"
):
if
sourcematch
(
"?"
):
subpattern
[
-
1
]
=
(
MIN_REPEAT
,
(
min
,
max
,
item
))
subpattern
[
-
1
]
=
(
MIN_REPEAT
,
(
min
,
max
,
item
))
else
:
else
:
...
@@ -618,7 +628,7 @@ def _parse(source, state):
...
@@ -618,7 +628,7 @@ def _parse(source, state):
# options
# options
char
=
sourceget
()
char
=
sourceget
()
if
char
is
None
:
if
char
is
None
:
raise
error
(
"unexpected end of pattern"
)
raise
self
.
error
(
"unexpected end of pattern"
)
if
char
==
"P"
:
if
char
==
"P"
:
# python extensions
# python extensions
if
sourcematch
(
"<"
):
if
sourcematch
(
"<"
):
...
@@ -626,28 +636,32 @@ def _parse(source, state):
...
@@ -626,28 +636,32 @@ def _parse(source, state):
name
=
source
.
getuntil
(
">"
)
name
=
source
.
getuntil
(
">"
)
group
=
1
group
=
1
if
not
name
:
if
not
name
:
raise
error
(
"missing group name"
)
raise
source
.
error
(
"missing group name"
,
1
)
if
not
name
.
isidentifier
():
if
not
name
.
isidentifier
():
raise
error
(
"bad character in group name
%
r"
%
name
)
raise
source
.
error
(
"bad character in group name "
"
%
r"
%
name
,
len
(
name
)
+
1
)
elif
sourcematch
(
"="
):
elif
sourcematch
(
"="
):
# named backreference
# named backreference
name
=
source
.
getuntil
(
")"
)
name
=
source
.
getuntil
(
")"
)
if
not
name
:
if
not
name
:
raise
error
(
"missing group name"
)
raise
source
.
error
(
"missing group name"
,
1
)
if
not
name
.
isidentifier
():
if
not
name
.
isidentifier
():
raise
error
(
"bad character in backref group name "
raise
source
.
error
(
"bad character in backref "
"
%
r"
%
name
)
"group name
%
r"
%
name
,
len
(
name
)
+
1
)
gid
=
state
.
groupdict
.
get
(
name
)
gid
=
state
.
groupdict
.
get
(
name
)
if
gid
is
None
:
if
gid
is
None
:
msg
=
"unknown group name: {0!r}"
.
format
(
name
)
msg
=
"unknown group name: {0!r}"
.
format
(
name
)
raise
error
(
msg
)
raise
source
.
error
(
msg
,
len
(
name
)
+
1
)
subpatternappend
((
GROUPREF
,
gid
))
subpatternappend
((
GROUPREF
,
gid
))
continue
continue
else
:
else
:
char
=
sourceget
()
char
=
sourceget
()
if
char
is
None
:
if
char
is
None
:
raise
error
(
"unexpected end of pattern"
)
raise
source
.
error
(
"unexpected end of pattern"
)
raise
error
(
"unknown specifier: ?P
%
s"
%
char
)
raise
source
.
error
(
"unknown specifier: ?P
%
s"
%
char
,
len
(
char
))
elif
char
==
":"
:
elif
char
==
":"
:
# non-capturing group
# non-capturing group
group
=
2
group
=
2
...
@@ -655,7 +669,7 @@ def _parse(source, state):
...
@@ -655,7 +669,7 @@ def _parse(source, state):
# comment
# comment
while
True
:
while
True
:
if
source
.
next
is
None
:
if
source
.
next
is
None
:
raise
error
(
"unbalanced parenthesis"
)
raise
source
.
error
(
"unbalanced parenthesis"
)
if
sourceget
()
==
")"
:
if
sourceget
()
==
")"
:
break
break
continue
continue
...
@@ -665,11 +679,11 @@ def _parse(source, state):
...
@@ -665,11 +679,11 @@ def _parse(source, state):
if
char
==
"<"
:
if
char
==
"<"
:
char
=
sourceget
()
char
=
sourceget
()
if
char
is
None
or
char
not
in
"=!"
:
if
char
is
None
or
char
not
in
"=!"
:
raise
error
(
"syntax error"
)
raise
source
.
error
(
"syntax error"
)
dir
=
-
1
# lookbehind
dir
=
-
1
# lookbehind
p
=
_parse_sub
(
source
,
state
)
p
=
_parse_sub
(
source
,
state
)
if
not
sourcematch
(
")"
):
if
not
sourcematch
(
")"
):
raise
error
(
"unbalanced parenthesis"
)
raise
source
.
error
(
"unbalanced parenthesis"
)
if
char
==
"="
:
if
char
==
"="
:
subpatternappend
((
ASSERT
,
(
dir
,
p
)))
subpatternappend
((
ASSERT
,
(
dir
,
p
)))
else
:
else
:
...
@@ -680,23 +694,26 @@ def _parse(source, state):
...
@@ -680,23 +694,26 @@ def _parse(source, state):
condname
=
source
.
getuntil
(
")"
)
condname
=
source
.
getuntil
(
")"
)
group
=
2
group
=
2
if
not
condname
:
if
not
condname
:
raise
error
(
"missing group name"
)
raise
source
.
error
(
"missing group name"
,
1
)
if
condname
.
isidentifier
():
if
condname
.
isidentifier
():
condgroup
=
state
.
groupdict
.
get
(
condname
)
condgroup
=
state
.
groupdict
.
get
(
condname
)
if
condgroup
is
None
:
if
condgroup
is
None
:
msg
=
"unknown group name: {0!r}"
.
format
(
condname
)
msg
=
"unknown group name: {0!r}"
.
format
(
condname
)
raise
error
(
msg
)
raise
source
.
error
(
msg
,
len
(
condname
)
+
1
)
else
:
else
:
try
:
try
:
condgroup
=
int
(
condname
)
condgroup
=
int
(
condname
)
if
condgroup
<
0
:
if
condgroup
<
0
:
raise
ValueError
raise
ValueError
except
ValueError
:
except
ValueError
:
raise
error
(
"bad character in group name"
)
raise
source
.
error
(
"bad character in group name"
,
len
(
condname
)
+
1
)
if
not
condgroup
:
if
not
condgroup
:
raise
error
(
"bad group number"
)
raise
source
.
error
(
"bad group number"
,
len
(
condname
)
+
1
)
if
condgroup
>=
MAXGROUPS
:
if
condgroup
>=
MAXGROUPS
:
raise
error
(
"the group number is too large"
)
raise
source
.
error
(
"the group number is too large"
,
len
(
condname
)
+
1
)
elif
char
in
FLAGS
:
elif
char
in
FLAGS
:
# flags
# flags
state
.
flags
|=
FLAGS
[
char
]
state
.
flags
|=
FLAGS
[
char
]
...
@@ -704,20 +721,23 @@ def _parse(source, state):
...
@@ -704,20 +721,23 @@ def _parse(source, state):
state
.
flags
|=
FLAGS
[
sourceget
()]
state
.
flags
|=
FLAGS
[
sourceget
()]
verbose
=
state
.
flags
&
SRE_FLAG_VERBOSE
verbose
=
state
.
flags
&
SRE_FLAG_VERBOSE
else
:
else
:
raise
error
(
"unexpected end of pattern "
+
char
)
raise
source
.
error
(
"unexpected end of pattern"
)
if
group
:
if
group
:
# parse group contents
# parse group contents
if
group
==
2
:
if
group
==
2
:
# anonymous group
# anonymous group
group
=
None
group
=
None
else
:
else
:
try
:
group
=
state
.
opengroup
(
name
)
group
=
state
.
opengroup
(
name
)
except
error
as
err
:
raise
source
.
error
(
err
.
msg
,
len
(
name
)
+
1
)
if
condgroup
:
if
condgroup
:
p
=
_parse_sub_cond
(
source
,
state
,
condgroup
)
p
=
_parse_sub_cond
(
source
,
state
,
condgroup
)
else
:
else
:
p
=
_parse_sub
(
source
,
state
)
p
=
_parse_sub
(
source
,
state
)
if
not
sourcematch
(
")"
):
if
not
sourcematch
(
")"
):
raise
error
(
"unbalanced parenthesis"
)
raise
source
.
error
(
"unbalanced parenthesis"
)
if
group
is
not
None
:
if
group
is
not
None
:
state
.
closegroup
(
group
,
p
)
state
.
closegroup
(
group
,
p
)
subpatternappend
((
SUBPATTERN
,
(
group
,
p
)))
subpatternappend
((
SUBPATTERN
,
(
group
,
p
)))
...
@@ -725,10 +745,10 @@ def _parse(source, state):
...
@@ -725,10 +745,10 @@ def _parse(source, state):
while
True
:
while
True
:
char
=
sourceget
()
char
=
sourceget
()
if
char
is
None
:
if
char
is
None
:
raise
error
(
"unexpected end of pattern"
)
raise
source
.
error
(
"unexpected end of pattern"
)
if
char
==
")"
:
if
char
==
")"
:
break
break
raise
error
(
"unknown extension"
)
raise
source
.
error
(
"unknown extension"
,
len
(
char
)
)
elif
this
==
"^"
:
elif
this
==
"^"
:
subpatternappend
((
AT
,
AT_BEGINNING
))
subpatternappend
((
AT
,
AT_BEGINNING
))
...
@@ -737,7 +757,7 @@ def _parse(source, state):
...
@@ -737,7 +757,7 @@ def _parse(source, state):
subpattern
.
append
((
AT
,
AT_END
))
subpattern
.
append
((
AT
,
AT_END
))
else
:
else
:
raise
error
(
"parser error"
)
raise
source
.
error
(
"parser error"
,
len
(
this
)
)
return
subpattern
return
subpattern
...
@@ -768,9 +788,10 @@ def parse(str, flags=0, pattern=None):
...
@@ -768,9 +788,10 @@ def parse(str, flags=0, pattern=None):
if
source
.
next
is
not
None
:
if
source
.
next
is
not
None
:
if
source
.
next
==
")"
:
if
source
.
next
==
")"
:
raise
error
(
"unbalanced parenthesis"
)
raise
source
.
error
(
"unbalanced parenthesis"
)
else
:
else
:
raise
error
(
"bogus characters at end of regular expression"
)
raise
source
.
error
(
"bogus characters at end of regular expression"
,
len
(
tail
))
if
flags
&
SRE_FLAG_DEBUG
:
if
flags
&
SRE_FLAG_DEBUG
:
p
.
dump
()
p
.
dump
()
...
@@ -809,16 +830,18 @@ def parse_template(source, pattern):
...
@@ -809,16 +830,18 @@ def parse_template(source, pattern):
if
s
.
match
(
"<"
):
if
s
.
match
(
"<"
):
name
=
s
.
getuntil
(
">"
)
name
=
s
.
getuntil
(
">"
)
if
not
name
:
if
not
name
:
raise
error
(
"missing group name"
)
raise
s
.
error
(
"missing group name"
,
1
)
try
:
try
:
index
=
int
(
name
)
index
=
int
(
name
)
if
index
<
0
:
if
index
<
0
:
raise
error
(
"negative group number"
)
raise
s
.
error
(
"negative group number"
,
len
(
name
)
+
1
)
if
index
>=
MAXGROUPS
:
if
index
>=
MAXGROUPS
:
raise
error
(
"the group number is too large"
)
raise
s
.
error
(
"the group number is too large"
,
len
(
name
)
+
1
)
except
ValueError
:
except
ValueError
:
if
not
name
.
isidentifier
():
if
not
name
.
isidentifier
():
raise
error
(
"bad character in group name"
)
raise
s
.
error
(
"bad character in group name"
,
len
(
name
)
+
1
)
try
:
try
:
index
=
pattern
.
groupindex
[
name
]
index
=
pattern
.
groupindex
[
name
]
except
KeyError
:
except
KeyError
:
...
@@ -841,8 +864,8 @@ def parse_template(source, pattern):
...
@@ -841,8 +864,8 @@ def parse_template(source, pattern):
isoctal
=
True
isoctal
=
True
c
=
int
(
this
[
1
:],
8
)
c
=
int
(
this
[
1
:],
8
)
if
c
>
0
o377
:
if
c
>
0
o377
:
raise
error
(
'octal escape value
%
r outside of '
raise
s
.
error
(
'octal escape value
%
r outside of '
'range 0-0o377'
%
this
)
'range 0-0o377'
%
this
,
len
(
this
)
)
lappend
(
chr
(
c
))
lappend
(
chr
(
c
))
if
not
isoctal
:
if
not
isoctal
:
addgroup
(
int
(
this
[
1
:]))
addgroup
(
int
(
this
[
1
:]))
...
...
Lib/test/test_re.py
Dosyayı görüntüle @
ad446d57
...
@@ -1419,6 +1419,42 @@ SUBPATTERN None
...
@@ -1419,6 +1419,42 @@ SUBPATTERN None
self
.
assertIsNone
(
re
.
match
(
b
'(?Li)
\xc5
'
,
b
'
\xe5
'
))
self
.
assertIsNone
(
re
.
match
(
b
'(?Li)
\xc5
'
,
b
'
\xe5
'
))
self
.
assertIsNone
(
re
.
match
(
b
'(?Li)
\xe5
'
,
b
'
\xc5
'
))
self
.
assertIsNone
(
re
.
match
(
b
'(?Li)
\xe5
'
,
b
'
\xc5
'
))
def
test_error
(
self
):
with
self
.
assertRaises
(
re
.
error
)
as
cm
:
re
.
compile
(
'(
\u20ac
))'
)
err
=
cm
.
exception
self
.
assertIsInstance
(
err
.
pattern
,
str
)
self
.
assertEqual
(
err
.
pattern
,
'(
\u20ac
))'
)
self
.
assertEqual
(
err
.
pos
,
3
)
self
.
assertEqual
(
err
.
lineno
,
1
)
self
.
assertEqual
(
err
.
colno
,
4
)
self
.
assertIn
(
err
.
msg
,
str
(
err
))
self
.
assertIn
(
' at position 3'
,
str
(
err
))
self
.
assertNotIn
(
' at position 3'
,
err
.
msg
)
# Bytes pattern
with
self
.
assertRaises
(
re
.
error
)
as
cm
:
re
.
compile
(
b
'(
\xa4
))'
)
err
=
cm
.
exception
self
.
assertIsInstance
(
err
.
pattern
,
bytes
)
self
.
assertEqual
(
err
.
pattern
,
b
'(
\xa4
))'
)
self
.
assertEqual
(
err
.
pos
,
3
)
# Multiline pattern
with
self
.
assertRaises
(
re
.
error
)
as
cm
:
re
.
compile
(
"""
(
abc
)
)
(
"""
,
re
.
VERBOSE
)
err
=
cm
.
exception
self
.
assertEqual
(
err
.
pos
,
77
)
self
.
assertEqual
(
err
.
lineno
,
5
)
self
.
assertEqual
(
err
.
colno
,
17
)
self
.
assertIn
(
err
.
msg
,
str
(
err
))
self
.
assertIn
(
' at position 77'
,
str
(
err
))
self
.
assertIn
(
'(line 5, column 17)'
,
str
(
err
))
class
PatternReprTests
(
unittest
.
TestCase
):
class
PatternReprTests
(
unittest
.
TestCase
):
def
check
(
self
,
pattern
,
expected
):
def
check
(
self
,
pattern
,
expected
):
...
...
Misc/NEWS
Dosyayı görüntüle @
ad446d57
...
@@ -183,6 +183,8 @@ Core and Builtins
...
@@ -183,6 +183,8 @@ Core and Builtins
Library
Library
-------
-------
-
Issue
#
22578
:
Added
attributes
to
the
re
.
error
class
.
-
Issue
#
12728
:
Different
Unicode
characters
having
the
same
uppercase
but
-
Issue
#
12728
:
Different
Unicode
characters
having
the
same
uppercase
but
different
lowercase
are
now
matched
in
case
-
insensitive
regular
expressions
.
different
lowercase
are
now
matched
in
case
-
insensitive
regular
expressions
.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment