Kaydet (Commit) 698280df authored tarafından Guido van Rossum's avatar Guido van Rossum

Issue #3756: make re.escape() handle bytes as well as str.

Patch by Andrew McNamara, reviewed and tweaked by myself.
üst 92f8f3e0
...@@ -211,23 +211,38 @@ def template(pattern, flags=0): ...@@ -211,23 +211,38 @@ def template(pattern, flags=0):
"Compile a template pattern, returning a pattern object" "Compile a template pattern, returning a pattern object"
return _compile(pattern, flags|T) return _compile(pattern, flags|T)
_alphanum = {} _alphanum_str = frozenset(
for c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890': "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890")
_alphanum[c] = 1 _alphanum_bytes = frozenset(
del c b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890")
def escape(pattern): def escape(pattern):
"Escape all non-alphanumeric characters in pattern." "Escape all non-alphanumeric characters in pattern."
s = list(pattern) if isinstance(pattern, str):
alphanum = _alphanum alphanum = _alphanum_str
for i in range(len(pattern)): s = list(pattern)
c = pattern[i] for i in range(len(pattern)):
if c not in alphanum: c = pattern[i]
if c == "\000": if c not in alphanum:
s[i] = "\\000" if c == "\000":
s[i] = "\\000"
else:
s[i] = "\\" + c
return "".join(s)
else:
alphanum = _alphanum_bytes
s = []
esc = ord(b"\\")
for c in pattern:
if c in alphanum:
s.append(c)
else: else:
s[i] = "\\" + c if c == 0:
return pattern[:0].join(s) s.extend(b"\\000")
else:
s.append(esc)
s.append(c)
return bytes(s)
# -------------------------------------------------------------------- # --------------------------------------------------------------------
# internals # internals
...@@ -248,7 +263,8 @@ def _compile(*key): ...@@ -248,7 +263,8 @@ def _compile(*key):
pattern, flags = key pattern, flags = key
if isinstance(pattern, _pattern_type): if isinstance(pattern, _pattern_type):
if flags: if flags:
raise ValueError('Cannot process flags argument with a compiled pattern') raise ValueError(
"Cannot process flags argument with a compiled pattern")
return pattern return pattern
if not sre_compile.isstring(pattern): if not sre_compile.isstring(pattern):
raise TypeError("first argument must be string or compiled pattern") raise TypeError("first argument must be string or compiled pattern")
...@@ -325,7 +341,7 @@ class Scanner: ...@@ -325,7 +341,7 @@ class Scanner:
if i == j: if i == j:
break break
action = self.lexicon[m.lastindex-1][1] action = self.lexicon[m.lastindex-1][1]
if hasattr(action, '__call__'): if hasattr(action, "__call__"):
self.match = m self.match = m
action = action(self, m.group()) action = action(self, m.group())
if action is not None: if action is not None:
......
...@@ -416,6 +416,7 @@ class ReTests(unittest.TestCase): ...@@ -416,6 +416,7 @@ class ReTests(unittest.TestCase):
def test_re_escape(self): def test_re_escape(self):
p="" p=""
self.assertEqual(re.escape(p), p)
for i in range(0, 256): for i in range(0, 256):
p = p + chr(i) p = p + chr(i)
self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None, self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
...@@ -426,6 +427,19 @@ class ReTests(unittest.TestCase): ...@@ -426,6 +427,19 @@ class ReTests(unittest.TestCase):
self.assertEqual(pat.match(p) is not None, True) self.assertEqual(pat.match(p) is not None, True)
self.assertEqual(pat.match(p).span(), (0,256)) self.assertEqual(pat.match(p).span(), (0,256))
def test_re_escape_byte(self):
p=b""
self.assertEqual(re.escape(p), p)
for i in range(0, 256):
b = bytes([i])
p += b
self.assertEqual(re.match(re.escape(b), b) is not None, True)
self.assertEqual(re.match(re.escape(b), b).span(), (0,1))
pat=re.compile(re.escape(p))
self.assertEqual(pat.match(p) is not None, True)
self.assertEqual(pat.match(p).span(), (0,256))
def pickle_test(self, pickle): def pickle_test(self, pickle):
oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)') oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
s = pickle.dumps(oldpat) s = pickle.dumps(oldpat)
......
...@@ -96,6 +96,8 @@ C API ...@@ -96,6 +96,8 @@ C API
Library Library
------- -------
- Issue #3756: make re.escape() handle bytes as well as str.
- Issue #3800: fix filter() related bug in formatter.py. - Issue #3800: fix filter() related bug in formatter.py.
- Issue #874900: fix behaviour of threading module after a fork. - Issue #874900: fix behaviour of threading module after a fork.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment