Kaydet (Commit) 70ca0210 authored tarafından Serhiy Storchaka's avatar Serhiy Storchaka

Issue #13169: The maximal repetition number in a regular expression has been

increased from 65534 to 2147483647 (on 32-bit platform) or 4294967294 (on
64-bit).
üst b19ed57d
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
import _sre, sys import _sre, sys
import sre_parse import sre_parse
from sre_constants import * from sre_constants import *
from _sre import MAXREPEAT
assert _sre.MAGIC == MAGIC, "SRE module mismatch" assert _sre.MAGIC == MAGIC, "SRE module mismatch"
......
...@@ -15,10 +15,6 @@ ...@@ -15,10 +15,6 @@
MAGIC = 20031017 MAGIC = 20031017
# max code word in this release
MAXREPEAT = 65535
# SRE standard exception (access as sre.error) # SRE standard exception (access as sre.error)
# should this really be here? # should this really be here?
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
import sys import sys
from sre_constants import * from sre_constants import *
from _sre import MAXREPEAT
SPECIAL_CHARS = ".\\[{()*+?^$|" SPECIAL_CHARS = ".\\[{()*+?^$|"
REPEAT_CHARS = "*+?{" REPEAT_CHARS = "*+?{"
...@@ -505,10 +506,14 @@ def _parse(source, state): ...@@ -505,10 +506,14 @@ def _parse(source, state):
continue continue
if lo: if lo:
min = int(lo) min = int(lo)
if min >= MAXREPEAT:
raise OverflowError("the repetition number is too large")
if hi: if hi:
max = int(hi) max = int(hi)
if max < min: if max >= MAXREPEAT:
raise error("bad repeat interval") raise OverflowError("the repetition number is too large")
if max < min:
raise error("bad repeat interval")
else: else:
raise error("not supported") raise error("not supported")
# figure out which item to repeat # figure out which item to repeat
......
from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
cpython_only
import io import io
import re import re
from re import Scanner from re import Scanner
...@@ -883,6 +884,37 @@ class ReTests(unittest.TestCase): ...@@ -883,6 +884,37 @@ class ReTests(unittest.TestCase):
self.assertEqual(n, size + 1) self.assertEqual(n, size + 1)
def test_repeat_minmax_overflow(self):
# Issue #13169
string = "x" * 100000
self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
# 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
@cpython_only
def test_repeat_minmax_overflow_maxrepeat(self):
try:
from _sre import MAXREPEAT
except ImportError:
self.skipTest('requires _sre.MAXREPEAT constant')
string = "x" * 100000
self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
(0, 100000))
self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
def run_re_tests(): def run_re_tests():
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
if verbose: if verbose:
......
...@@ -224,6 +224,10 @@ Core and Builtins ...@@ -224,6 +224,10 @@ Core and Builtins
Library Library
------- -------
- Issue #13169: The maximal repetition number in a regular expression has been
increased from 65534 to 2147483647 (on 32-bit platform) or 4294967294 (on
64-bit).
- Issue #16743: Fix mmap overflow check on 32 bit Windows. - Issue #16743: Fix mmap overflow check on 32 bit Windows.
- Issue #16800: tempfile.gettempdir() no longer left temporary files when - Issue #16800: tempfile.gettempdir() no longer left temporary files when
......
...@@ -517,7 +517,7 @@ SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount) ...@@ -517,7 +517,7 @@ SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
Py_ssize_t i; Py_ssize_t i;
/* adjust end */ /* adjust end */
if (maxcount < end - ptr && maxcount != 65535) if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
end = ptr + maxcount; end = ptr + maxcount;
switch (pattern[0]) { switch (pattern[0]) {
...@@ -1132,7 +1132,7 @@ entrance: ...@@ -1132,7 +1132,7 @@ entrance:
} else { } else {
/* general case */ /* general case */
LASTMARK_SAVE(); LASTMARK_SAVE();
while ((Py_ssize_t)ctx->pattern[2] == 65535 while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
|| ctx->count <= (Py_ssize_t)ctx->pattern[2]) { || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
state->ptr = ctx->ptr; state->ptr = ctx->ptr;
DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one, DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
...@@ -1218,7 +1218,7 @@ entrance: ...@@ -1218,7 +1218,7 @@ entrance:
} }
if ((ctx->count < ctx->u.rep->pattern[2] || if ((ctx->count < ctx->u.rep->pattern[2] ||
ctx->u.rep->pattern[2] == 65535) && ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
state->ptr != ctx->u.rep->last_ptr) { state->ptr != ctx->u.rep->last_ptr) {
/* we may have enough matches, but if we can /* we may have enough matches, but if we can
match another item, do so */ match another item, do so */
...@@ -1296,7 +1296,7 @@ entrance: ...@@ -1296,7 +1296,7 @@ entrance:
LASTMARK_RESTORE(); LASTMARK_RESTORE();
if (ctx->count >= ctx->u.rep->pattern[2] if (ctx->count >= ctx->u.rep->pattern[2]
&& ctx->u.rep->pattern[2] != 65535) && ctx->u.rep->pattern[2] != SRE_MAXREPEAT)
RETURN_FAILURE; RETURN_FAILURE;
ctx->u.rep->count = ctx->count; ctx->u.rep->count = ctx->count;
...@@ -3072,7 +3072,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) ...@@ -3072,7 +3072,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
GET_ARG; max = arg; GET_ARG; max = arg;
if (min > max) if (min > max)
FAIL; FAIL;
if (max > 65535) if (max > SRE_MAXREPEAT)
FAIL; FAIL;
if (!_validate_inner(code, code+skip-4, groups)) if (!_validate_inner(code, code+skip-4, groups))
FAIL; FAIL;
...@@ -3091,7 +3091,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) ...@@ -3091,7 +3091,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
GET_ARG; max = arg; GET_ARG; max = arg;
if (min > max) if (min > max)
FAIL; FAIL;
if (max > 65535) if (max > SRE_MAXREPEAT)
FAIL; FAIL;
if (!_validate_inner(code, code+skip-3, groups)) if (!_validate_inner(code, code+skip-3, groups))
FAIL; FAIL;
...@@ -3979,6 +3979,12 @@ PyMODINIT_FUNC PyInit__sre(void) ...@@ -3979,6 +3979,12 @@ PyMODINIT_FUNC PyInit__sre(void)
Py_DECREF(x); Py_DECREF(x);
} }
x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
if (x) {
PyDict_SetItemString(d, "MAXREPEAT", x);
Py_DECREF(x);
}
x = PyUnicode_FromString(copyright); x = PyUnicode_FromString(copyright);
if (x) { if (x) {
PyDict_SetItemString(d, "copyright", x); PyDict_SetItemString(d, "copyright", x);
......
...@@ -16,6 +16,11 @@ ...@@ -16,6 +16,11 @@
/* size of a code word (must be unsigned short or larger, and /* size of a code word (must be unsigned short or larger, and
large enough to hold a UCS4 character) */ large enough to hold a UCS4 character) */
#define SRE_CODE Py_UCS4 #define SRE_CODE Py_UCS4
#if SIZEOF_SIZE_T > 4
# define SRE_MAXREPEAT (~(SRE_CODE)0)
#else
# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX + 1u)
#endif
typedef struct { typedef struct {
PyObject_VAR_HEAD PyObject_VAR_HEAD
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment