Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
ad446d57
Kaydet (Commit)
ad446d57
authored
Kas 10, 2014
tarafından
Serhiy Storchaka
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
Issue #22578: Added attributes to the re.error class.
üst
eb99e515
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
174 additions
and
62 deletions
+174
-62
re.rst
Doc/library/re.rst
+25
-2
sre_constants.py
Lib/sre_constants.py
+29
-1
sre_parse.py
Lib/sre_parse.py
+82
-59
test_re.py
Lib/test/test_re.py
+36
-0
NEWS
Misc/NEWS
+2
-0
No files found.
Doc/library/re.rst
Dosyayı görüntüle @
ad446d57
...
...
@@ -733,13 +733,36 @@ form.
Clear the regular expression cache.
.. exception:: error
.. exception:: error
(msg, pattern=None, pos=None)
Exception raised when a string passed to one of the functions here is not a
valid regular expression (for example, it might contain unmatched parentheses)
or when some other error occurs during compilation or matching. It is never an
error if a string contains no match for a pattern.
error if a string contains no match for a pattern. The error instance has
the following additional attributes:
.. attribute:: msg
The unformatted error message.
.. attribute:: pattern
The regular expression pattern.
.. attribute:: pos
The index of *pattern* where compilation failed.
.. attribute:: lineno
The line corresponding to *pos*.
.. attribute:: colno
The column corresponding to *pos*.
.. versionchanged:: 3.5
Added additional attributes.
.. _re-objects:
...
...
Lib/sre_constants.py
Dosyayı görüntüle @
ad446d57
...
...
@@ -21,7 +21,35 @@ from _sre import MAXREPEAT, MAXGROUPS
# should this really be here?
class
error
(
Exception
):
pass
def
__init__
(
self
,
msg
,
pattern
=
None
,
pos
=
None
):
self
.
msg
=
msg
self
.
pattern
=
pattern
self
.
pos
=
pos
if
pattern
is
not
None
and
pos
is
not
None
:
msg
=
'
%
s at position
%
d'
%
(
msg
,
pos
)
if
isinstance
(
pattern
,
str
):
newline
=
'
\n
'
else
:
newline
=
b
'
\n
'
self
.
lineno
=
pattern
.
count
(
newline
,
0
,
pos
)
+
1
self
.
colno
=
pos
-
pattern
.
rfind
(
newline
,
0
,
pos
)
if
newline
in
pattern
:
msg
=
'
%
s (line
%
d, column
%
d)'
%
(
msg
,
self
.
lineno
,
self
.
colno
)
else
:
self
.
lineno
=
self
.
colno
=
None
super
()
.
__init__
(
msg
)
def
linecol
(
doc
,
pos
):
if
isinstance
(
pattern
,
str
):
newline
=
'
\n
'
else
:
newline
=
b
'
\n
'
lineno
=
pattern
.
count
(
newline
,
0
,
pos
)
+
1
if
lineno
==
1
:
colno
=
pos
+
1
else
:
colno
=
pos
-
doc
.
rindex
(
newline
,
0
,
pos
)
return
lineno
,
colno
class
_NamedIntConstant
(
int
):
...
...
Lib/sre_parse.py
Dosyayı görüntüle @
ad446d57
...
...
@@ -81,8 +81,8 @@ class Pattern:
if
name
is
not
None
:
ogid
=
self
.
groupdict
.
get
(
name
,
None
)
if
ogid
is
not
None
:
raise
error
(
"redefinition of group name
%
s
as group
%
d; "
"was group
%
d"
%
(
repr
(
name
)
,
gid
,
ogid
))
raise
error
(
"redefinition of group name
%
r
as group
%
d; "
"was group
%
d"
%
(
name
,
gid
,
ogid
))
self
.
groupdict
[
name
]
=
gid
return
gid
def
closegroup
(
self
,
gid
,
p
):
...
...
@@ -206,24 +206,25 @@ class SubPattern:
class
Tokenizer
:
def
__init__
(
self
,
string
):
self
.
istext
=
isinstance
(
string
,
str
)
self
.
string
=
string
if
not
self
.
istext
:
string
=
str
(
string
,
'latin1'
)
self
.
string
=
string
self
.
decoded_
string
=
string
self
.
index
=
0
self
.
__next
()
def
__next
(
self
):
index
=
self
.
index
try
:
char
=
self
.
string
[
index
]
char
=
self
.
decoded_
string
[
index
]
except
IndexError
:
self
.
next
=
None
return
if
char
==
"
\\
"
:
index
+=
1
try
:
char
+=
self
.
string
[
index
]
char
+=
self
.
decoded_
string
[
index
]
except
IndexError
:
raise
error
(
"bogus escape (end of line)"
)
raise
self
.
error
(
"bogus escape (end of line)"
)
from
None
self
.
index
=
index
+
1
self
.
next
=
char
def
match
(
self
,
char
):
...
...
@@ -250,15 +251,19 @@ class Tokenizer:
c
=
self
.
next
self
.
__next
()
if
c
is
None
:
raise
error
(
"unterminated name"
)
raise
self
.
error
(
"unterminated name"
)
if
c
==
terminator
:
break
result
+=
c
return
result
def
tell
(
self
):
return
self
.
index
,
self
.
next
return
self
.
index
-
len
(
self
.
next
or
''
)
def
seek
(
self
,
index
):
self
.
index
,
self
.
next
=
index
self
.
index
=
index
self
.
__next
()
def
error
(
self
,
msg
,
offset
=
0
):
return
error
(
msg
,
self
.
string
,
self
.
tell
()
-
offset
)
# The following three functions are not used in this module anymore, but we keep
# them here (with DeprecationWarnings) for backwards compatibility.
...
...
@@ -322,8 +327,8 @@ def _class_escape(source, escape):
escape
+=
source
.
getwhile
(
2
,
OCTDIGITS
)
c
=
int
(
escape
[
1
:],
8
)
if
c
>
0
o377
:
raise
error
(
'octal escape value
%
r outside of '
'range 0-0o377'
%
escape
)
raise
source
.
error
(
'octal escape value
%
r outside of '
'range 0-0o377'
%
escape
,
len
(
escape
)
)
return
LITERAL
,
c
elif
c
in
DIGITS
:
raise
ValueError
...
...
@@ -331,7 +336,7 @@ def _class_escape(source, escape):
return
LITERAL
,
ord
(
escape
[
1
])
except
ValueError
:
pass
raise
error
(
"bogus escape:
%
s"
%
repr
(
escape
))
raise
source
.
error
(
"bogus escape:
%
r"
%
escape
,
len
(
escape
))
def
_escape
(
source
,
escape
,
state
):
# handle escape code in expression
...
...
@@ -377,21 +382,23 @@ def _escape(source, escape, state):
escape
+=
source
.
get
()
c
=
int
(
escape
[
1
:],
8
)
if
c
>
0
o377
:
raise
error
(
'octal escape value
%
r outside of '
'range 0-0o377'
%
escape
)
raise
source
.
error
(
'octal escape value
%
r outside of '
'range 0-0o377'
%
escape
,
len
(
escape
))
return
LITERAL
,
c
# not an octal escape, so this is a group reference
group
=
int
(
escape
[
1
:])
if
group
<
state
.
groups
:
if
not
state
.
checkgroup
(
group
):
raise
error
(
"cannot refer to open group"
)
raise
source
.
error
(
"cannot refer to open group"
,
len
(
escape
))
return
GROUPREF
,
group
raise
ValueError
if
len
(
escape
)
==
2
:
return
LITERAL
,
ord
(
escape
[
1
])
except
ValueError
:
pass
raise
error
(
"bogus escape:
%
s"
%
repr
(
escape
))
raise
source
.
error
(
"bogus escape:
%
r"
%
escape
,
len
(
escape
))
def
_parse_sub
(
source
,
state
,
nested
=
True
):
# parse an alternation: a|b|c
...
...
@@ -404,7 +411,7 @@ def _parse_sub(source, state, nested=True):
if
not
sourcematch
(
"|"
):
break
if
nested
and
source
.
next
is
not
None
and
source
.
next
!=
")"
:
raise
error
(
"pattern not properly closed"
)
raise
source
.
error
(
"pattern not properly closed"
)
if
len
(
items
)
==
1
:
return
items
[
0
]
...
...
@@ -449,11 +456,11 @@ def _parse_sub_cond(source, state, condgroup):
if
source
.
match
(
"|"
):
item_no
=
_parse
(
source
,
state
)
if
source
.
next
==
"|"
:
raise
error
(
"conditional backref with more than two branches"
)
raise
source
.
error
(
"conditional backref with more than two branches"
)
else
:
item_no
=
None
if
source
.
next
is
not
None
and
source
.
next
!=
")"
:
raise
error
(
"pattern not properly closed"
)
raise
source
.
error
(
"pattern not properly closed"
)
subpattern
=
SubPattern
(
state
)
subpattern
.
append
((
GROUPREF_EXISTS
,
(
condgroup
,
item_yes
,
item_no
)))
return
subpattern
...
...
@@ -510,7 +517,7 @@ def _parse(source, state):
while
True
:
this
=
sourceget
()
if
this
is
None
:
raise
error
(
"unexpected end of regular expression"
)
raise
source
.
error
(
"unexpected end of regular expression"
)
if
this
==
"]"
and
set
!=
start
:
break
elif
this
[
0
]
==
"
\\
"
:
...
...
@@ -521,7 +528,7 @@ def _parse(source, state):
# potential range
this
=
sourceget
()
if
this
is
None
:
raise
error
(
"unexpected end of regular expression"
)
raise
source
.
error
(
"unexpected end of regular expression"
)
if
this
==
"]"
:
if
code1
[
0
]
is
IN
:
code1
=
code1
[
1
][
0
]
...
...
@@ -533,11 +540,11 @@ def _parse(source, state):
else
:
code2
=
LITERAL
,
_ord
(
this
)
if
code1
[
0
]
!=
LITERAL
or
code2
[
0
]
!=
LITERAL
:
raise
error
(
"bad character range"
)
raise
source
.
error
(
"bad character range"
,
len
(
this
)
)
lo
=
code1
[
1
]
hi
=
code2
[
1
]
if
hi
<
lo
:
raise
error
(
"bad character range"
)
raise
source
.
error
(
"bad character range"
,
len
(
this
)
)
setappend
((
RANGE
,
(
lo
,
hi
)))
else
:
if
code1
[
0
]
is
IN
:
...
...
@@ -555,6 +562,7 @@ def _parse(source, state):
elif
this
in
REPEAT_CHARS
:
# repeat previous item
here
=
source
.
tell
()
if
this
==
"?"
:
min
,
max
=
0
,
1
elif
this
==
"*"
:
...
...
@@ -566,7 +574,6 @@ def _parse(source, state):
if
source
.
next
==
"}"
:
subpatternappend
((
LITERAL
,
_ord
(
this
)))
continue
here
=
source
.
tell
()
min
,
max
=
0
,
MAXREPEAT
lo
=
hi
=
""
while
source
.
next
in
DIGITS
:
...
...
@@ -589,18 +596,21 @@ def _parse(source, state):
if
max
>=
MAXREPEAT
:
raise
OverflowError
(
"the repetition number is too large"
)
if
max
<
min
:
raise
error
(
"bad repeat interval"
)
raise
source
.
error
(
"bad repeat interval"
,
source
.
tell
()
-
here
)
else
:
raise
error
(
"not supported"
)
raise
source
.
error
(
"not supported"
,
len
(
this
)
)
# figure out which item to repeat
if
subpattern
:
item
=
subpattern
[
-
1
:]
else
:
item
=
None
if
not
item
or
(
_len
(
item
)
==
1
and
item
[
0
][
0
]
==
AT
):
raise
error
(
"nothing to repeat"
)
raise
source
.
error
(
"nothing to repeat"
,
source
.
tell
()
-
here
+
len
(
this
))
if
item
[
0
][
0
]
in
_REPEATCODES
:
raise
error
(
"multiple repeat"
)
raise
source
.
error
(
"multiple repeat"
,
source
.
tell
()
-
here
+
len
(
this
))
if
sourcematch
(
"?"
):
subpattern
[
-
1
]
=
(
MIN_REPEAT
,
(
min
,
max
,
item
))
else
:
...
...
@@ -618,7 +628,7 @@ def _parse(source, state):
# options
char
=
sourceget
()
if
char
is
None
:
raise
error
(
"unexpected end of pattern"
)
raise
self
.
error
(
"unexpected end of pattern"
)
if
char
==
"P"
:
# python extensions
if
sourcematch
(
"<"
):
...
...
@@ -626,28 +636,32 @@ def _parse(source, state):
name
=
source
.
getuntil
(
">"
)
group
=
1
if
not
name
:
raise
error
(
"missing group name"
)
raise
source
.
error
(
"missing group name"
,
1
)
if
not
name
.
isidentifier
():
raise
error
(
"bad character in group name
%
r"
%
name
)
raise
source
.
error
(
"bad character in group name "
"
%
r"
%
name
,
len
(
name
)
+
1
)
elif
sourcematch
(
"="
):
# named backreference
name
=
source
.
getuntil
(
")"
)
if
not
name
:
raise
error
(
"missing group name"
)
raise
source
.
error
(
"missing group name"
,
1
)
if
not
name
.
isidentifier
():
raise
error
(
"bad character in backref group name "
"
%
r"
%
name
)
raise
source
.
error
(
"bad character in backref "
"group name
%
r"
%
name
,
len
(
name
)
+
1
)
gid
=
state
.
groupdict
.
get
(
name
)
if
gid
is
None
:
msg
=
"unknown group name: {0!r}"
.
format
(
name
)
raise
error
(
msg
)
raise
source
.
error
(
msg
,
len
(
name
)
+
1
)
subpatternappend
((
GROUPREF
,
gid
))
continue
else
:
char
=
sourceget
()
if
char
is
None
:
raise
error
(
"unexpected end of pattern"
)
raise
error
(
"unknown specifier: ?P
%
s"
%
char
)
raise
source
.
error
(
"unexpected end of pattern"
)
raise
source
.
error
(
"unknown specifier: ?P
%
s"
%
char
,
len
(
char
))
elif
char
==
":"
:
# non-capturing group
group
=
2
...
...
@@ -655,7 +669,7 @@ def _parse(source, state):
# comment
while
True
:
if
source
.
next
is
None
:
raise
error
(
"unbalanced parenthesis"
)
raise
source
.
error
(
"unbalanced parenthesis"
)
if
sourceget
()
==
")"
:
break
continue
...
...
@@ -665,11 +679,11 @@ def _parse(source, state):
if
char
==
"<"
:
char
=
sourceget
()
if
char
is
None
or
char
not
in
"=!"
:
raise
error
(
"syntax error"
)
raise
source
.
error
(
"syntax error"
)
dir
=
-
1
# lookbehind
p
=
_parse_sub
(
source
,
state
)
if
not
sourcematch
(
")"
):
raise
error
(
"unbalanced parenthesis"
)
raise
source
.
error
(
"unbalanced parenthesis"
)
if
char
==
"="
:
subpatternappend
((
ASSERT
,
(
dir
,
p
)))
else
:
...
...
@@ -680,23 +694,26 @@ def _parse(source, state):
condname
=
source
.
getuntil
(
")"
)
group
=
2
if
not
condname
:
raise
error
(
"missing group name"
)
raise
source
.
error
(
"missing group name"
,
1
)
if
condname
.
isidentifier
():
condgroup
=
state
.
groupdict
.
get
(
condname
)
if
condgroup
is
None
:
msg
=
"unknown group name: {0!r}"
.
format
(
condname
)
raise
error
(
msg
)
raise
source
.
error
(
msg
,
len
(
condname
)
+
1
)
else
:
try
:
condgroup
=
int
(
condname
)
if
condgroup
<
0
:
raise
ValueError
except
ValueError
:
raise
error
(
"bad character in group name"
)
raise
source
.
error
(
"bad character in group name"
,
len
(
condname
)
+
1
)
if
not
condgroup
:
raise
error
(
"bad group number"
)
raise
source
.
error
(
"bad group number"
,
len
(
condname
)
+
1
)
if
condgroup
>=
MAXGROUPS
:
raise
error
(
"the group number is too large"
)
raise
source
.
error
(
"the group number is too large"
,
len
(
condname
)
+
1
)
elif
char
in
FLAGS
:
# flags
state
.
flags
|=
FLAGS
[
char
]
...
...
@@ -704,20 +721,23 @@ def _parse(source, state):
state
.
flags
|=
FLAGS
[
sourceget
()]
verbose
=
state
.
flags
&
SRE_FLAG_VERBOSE
else
:
raise
error
(
"unexpected end of pattern "
+
char
)
raise
source
.
error
(
"unexpected end of pattern"
)
if
group
:
# parse group contents
if
group
==
2
:
# anonymous group
group
=
None
else
:
group
=
state
.
opengroup
(
name
)
try
:
group
=
state
.
opengroup
(
name
)
except
error
as
err
:
raise
source
.
error
(
err
.
msg
,
len
(
name
)
+
1
)
if
condgroup
:
p
=
_parse_sub_cond
(
source
,
state
,
condgroup
)
else
:
p
=
_parse_sub
(
source
,
state
)
if
not
sourcematch
(
")"
):
raise
error
(
"unbalanced parenthesis"
)
raise
source
.
error
(
"unbalanced parenthesis"
)
if
group
is
not
None
:
state
.
closegroup
(
group
,
p
)
subpatternappend
((
SUBPATTERN
,
(
group
,
p
)))
...
...
@@ -725,10 +745,10 @@ def _parse(source, state):
while
True
:
char
=
sourceget
()
if
char
is
None
:
raise
error
(
"unexpected end of pattern"
)
raise
source
.
error
(
"unexpected end of pattern"
)
if
char
==
")"
:
break
raise
error
(
"unknown extension"
)
raise
source
.
error
(
"unknown extension"
,
len
(
char
)
)
elif
this
==
"^"
:
subpatternappend
((
AT
,
AT_BEGINNING
))
...
...
@@ -737,7 +757,7 @@ def _parse(source, state):
subpattern
.
append
((
AT
,
AT_END
))
else
:
raise
error
(
"parser error"
)
raise
source
.
error
(
"parser error"
,
len
(
this
)
)
return
subpattern
...
...
@@ -768,9 +788,10 @@ def parse(str, flags=0, pattern=None):
if
source
.
next
is
not
None
:
if
source
.
next
==
")"
:
raise
error
(
"unbalanced parenthesis"
)
raise
source
.
error
(
"unbalanced parenthesis"
)
else
:
raise
error
(
"bogus characters at end of regular expression"
)
raise
source
.
error
(
"bogus characters at end of regular expression"
,
len
(
tail
))
if
flags
&
SRE_FLAG_DEBUG
:
p
.
dump
()
...
...
@@ -809,16 +830,18 @@ def parse_template(source, pattern):
if
s
.
match
(
"<"
):
name
=
s
.
getuntil
(
">"
)
if
not
name
:
raise
error
(
"missing group name"
)
raise
s
.
error
(
"missing group name"
,
1
)
try
:
index
=
int
(
name
)
if
index
<
0
:
raise
error
(
"negative group number"
)
raise
s
.
error
(
"negative group number"
,
len
(
name
)
+
1
)
if
index
>=
MAXGROUPS
:
raise
error
(
"the group number is too large"
)
raise
s
.
error
(
"the group number is too large"
,
len
(
name
)
+
1
)
except
ValueError
:
if
not
name
.
isidentifier
():
raise
error
(
"bad character in group name"
)
raise
s
.
error
(
"bad character in group name"
,
len
(
name
)
+
1
)
try
:
index
=
pattern
.
groupindex
[
name
]
except
KeyError
:
...
...
@@ -841,8 +864,8 @@ def parse_template(source, pattern):
isoctal
=
True
c
=
int
(
this
[
1
:],
8
)
if
c
>
0
o377
:
raise
error
(
'octal escape value
%
r outside of '
'range 0-0o377'
%
this
)
raise
s
.
error
(
'octal escape value
%
r outside of '
'range 0-0o377'
%
this
,
len
(
this
)
)
lappend
(
chr
(
c
))
if
not
isoctal
:
addgroup
(
int
(
this
[
1
:]))
...
...
Lib/test/test_re.py
Dosyayı görüntüle @
ad446d57
...
...
@@ -1419,6 +1419,42 @@ SUBPATTERN None
self
.
assertIsNone
(
re
.
match
(
b
'(?Li)
\xc5
'
,
b
'
\xe5
'
))
self
.
assertIsNone
(
re
.
match
(
b
'(?Li)
\xe5
'
,
b
'
\xc5
'
))
def
test_error
(
self
):
with
self
.
assertRaises
(
re
.
error
)
as
cm
:
re
.
compile
(
'(
\u20ac
))'
)
err
=
cm
.
exception
self
.
assertIsInstance
(
err
.
pattern
,
str
)
self
.
assertEqual
(
err
.
pattern
,
'(
\u20ac
))'
)
self
.
assertEqual
(
err
.
pos
,
3
)
self
.
assertEqual
(
err
.
lineno
,
1
)
self
.
assertEqual
(
err
.
colno
,
4
)
self
.
assertIn
(
err
.
msg
,
str
(
err
))
self
.
assertIn
(
' at position 3'
,
str
(
err
))
self
.
assertNotIn
(
' at position 3'
,
err
.
msg
)
# Bytes pattern
with
self
.
assertRaises
(
re
.
error
)
as
cm
:
re
.
compile
(
b
'(
\xa4
))'
)
err
=
cm
.
exception
self
.
assertIsInstance
(
err
.
pattern
,
bytes
)
self
.
assertEqual
(
err
.
pattern
,
b
'(
\xa4
))'
)
self
.
assertEqual
(
err
.
pos
,
3
)
# Multiline pattern
with
self
.
assertRaises
(
re
.
error
)
as
cm
:
re
.
compile
(
"""
(
abc
)
)
(
"""
,
re
.
VERBOSE
)
err
=
cm
.
exception
self
.
assertEqual
(
err
.
pos
,
77
)
self
.
assertEqual
(
err
.
lineno
,
5
)
self
.
assertEqual
(
err
.
colno
,
17
)
self
.
assertIn
(
err
.
msg
,
str
(
err
))
self
.
assertIn
(
' at position 77'
,
str
(
err
))
self
.
assertIn
(
'(line 5, column 17)'
,
str
(
err
))
class
PatternReprTests
(
unittest
.
TestCase
):
def
check
(
self
,
pattern
,
expected
):
...
...
Misc/NEWS
Dosyayı görüntüle @
ad446d57
...
...
@@ -183,6 +183,8 @@ Core and Builtins
Library
-------
-
Issue
#
22578
:
Added
attributes
to
the
re
.
error
class
.
-
Issue
#
12728
:
Different
Unicode
characters
having
the
same
uppercase
but
different
lowercase
are
now
matched
in
case
-
insensitive
regular
expressions
.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment