Kaydet (Commit) 22628c4d authored tarafından Antoine Pitrou's avatar Antoine Pitrou

#3231: re.compile fails with some bytes patterns

üst 943f3391
...@@ -200,7 +200,7 @@ class Tokenizer: ...@@ -200,7 +200,7 @@ class Tokenizer:
except IndexError: except IndexError:
raise error("bogus escape (end of line)") raise error("bogus escape (end of line)")
if isinstance(self.string, bytes): if isinstance(self.string, bytes):
char = chr(c) c = chr(c)
char = char + c char = char + c
self.index = self.index + len(char) self.index = self.index + len(char)
self.next = char self.next = char
......
...@@ -661,12 +661,8 @@ xyzabc ...@@ -661,12 +661,8 @@ xyzabc
('^([ab]*?)(?<!(a))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'), ('^([ab]*?)(?<!(a))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
] ]
try: u = '\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'
u = eval("u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'") tests.extend([
except SyntaxError:
pass
else:
tests.extend([
# bug 410271: \b broken under locales # bug 410271: \b broken under locales
(r'\b.\b', 'a', SUCCEED, 'found', 'a'), (r'\b.\b', 'a', SUCCEED, 'found', 'a'),
(r'(?u)\b.\b', u, SUCCEED, 'found', u), (r'(?u)\b.\b', u, SUCCEED, 'found', u),
......
...@@ -732,23 +732,25 @@ def run_re_tests(): ...@@ -732,23 +732,25 @@ def run_re_tests():
else: else:
print('=== Failed incorrectly', t) print('=== Failed incorrectly', t)
# Try the match on a unicode string, and check that it # Try the match with both pattern and string converted to
# still succeeds. # bytes, and check that it still succeeds.
try: try:
result = obj.search(str(s, "latin-1")) bpat = bytes(pattern, "ascii")
if result is None: bs = bytes(s, "ascii")
print('=== Fails on unicode match', t) except UnicodeEncodeError:
except NameError: # skip non-ascii tests
continue # 1.5.2 pass
except TypeError: else:
continue # unicode test case try:
bpat = re.compile(bpat)
# Try the match on a unicode pattern, and check that it except Exception:
# still succeeds. print('=== Fails on bytes pattern compile', t)
obj=re.compile(str(pattern, "latin-1")) if verbose:
result = obj.search(s) traceback.print_exc(file=sys.stdout)
if result is None: else:
print('=== Fails on unicode pattern match', t) bytes_result = bpat.search(bs)
if bytes_result is None:
print('=== Fails on bytes pattern match', t)
# Try the match with the search area limited to the extent # Try the match with the search area limited to the extent
# of the match and see if it still succeeds. \B will # of the match and see if it still succeeds. \B will
...@@ -771,6 +773,7 @@ def run_re_tests(): ...@@ -771,6 +773,7 @@ def run_re_tests():
# Try the match with LOCALE enabled, and check that it # Try the match with LOCALE enabled, and check that it
# still succeeds. # still succeeds.
if '(?u)' not in pattern:
obj = re.compile(pattern, re.LOCALE) obj = re.compile(pattern, re.LOCALE)
result = obj.search(s) result = obj.search(s)
if result is None: if result is None:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment