Kaydet (Commit) aa034209 authored tarafından Benjamin Peterson's avatar Benjamin Peterson

be extremely careful about overflows in encode_basestring_ascii (closes #28284)

üst de2d4d20
...@@ -42,6 +42,9 @@ Core and Builtins ...@@ -42,6 +42,9 @@ Core and Builtins
Library Library
------- -------
- Issue #28284: Strengthen resistance of ``_json.encode_basestring_ascii()`` to
integer overflow.
- Issue #27611: Fixed support of default root window in the Tix module. - Issue #27611: Fixed support of default root window in the Tix module.
- Issue #24363: When parsing HTTP header fields, if an invalid line is - Issue #24363: When parsing HTTP header fields, if an invalid line is
......
...@@ -203,6 +203,7 @@ ascii_escape_unicode(PyObject *pystr) ...@@ -203,6 +203,7 @@ ascii_escape_unicode(PyObject *pystr)
Py_ssize_t output_size; Py_ssize_t output_size;
Py_ssize_t max_output_size; Py_ssize_t max_output_size;
Py_ssize_t chars; Py_ssize_t chars;
Py_ssize_t incr;
PyObject *rval; PyObject *rval;
char *output; char *output;
Py_UNICODE *input_unicode; Py_UNICODE *input_unicode;
...@@ -210,13 +211,20 @@ ascii_escape_unicode(PyObject *pystr) ...@@ -210,13 +211,20 @@ ascii_escape_unicode(PyObject *pystr)
input_chars = PyUnicode_GET_SIZE(pystr); input_chars = PyUnicode_GET_SIZE(pystr);
input_unicode = PyUnicode_AS_UNICODE(pystr); input_unicode = PyUnicode_AS_UNICODE(pystr);
output_size = input_chars;
incr = 2; /* for quotes */
/* One char input can be up to 6 chars output, estimate 4 of these */ /* One char input can be up to 6 chars output, estimate 4 of these */
if (input_chars > (PY_SSIZE_T_MAX - 2)/ MAX_EXPANSION) { incr += MIN_EXPANSION * 4;
PyErr_SetString(PyExc_OverflowError, "string is too long to escape"); if (PY_SSIZE_T_MAX - incr < output_size) {
PyErr_NoMemory();
return NULL; return NULL;
} }
output_size = 2 + (MIN_EXPANSION * 4) + input_chars; output_size += incr;
max_output_size = 2 + (input_chars * MAX_EXPANSION); if (PY_SSIZE_T_MAX / MAX_EXPANSION < input_chars ||
PY_SSIZE_T_MAX - 2 < input_chars * MAX_EXPANSION)
max_output_size = PY_SSIZE_T_MAX;
else
max_output_size = 2 + (input_chars * MAX_EXPANSION);
rval = PyString_FromStringAndSize(NULL, output_size); rval = PyString_FromStringAndSize(NULL, output_size);
if (rval == NULL) { if (rval == NULL) {
return NULL; return NULL;
...@@ -233,20 +241,20 @@ ascii_escape_unicode(PyObject *pystr) ...@@ -233,20 +241,20 @@ ascii_escape_unicode(PyObject *pystr)
chars = ascii_escape_char(c, output, chars); chars = ascii_escape_char(c, output, chars);
} }
if (output_size - chars < (1 + MAX_EXPANSION)) { if (output_size - chars < (1 + MAX_EXPANSION)) {
/* There's more than four, so let's resize by a lot */ if (output_size == PY_SSIZE_T_MAX) {
Py_ssize_t new_output_size = output_size * 2; Py_DECREF(rval);
/* This is an upper bound */ PyErr_NoMemory();
if (new_output_size > max_output_size) { return NULL;
new_output_size = max_output_size;
} }
/* Make sure that the output size changed before resizing */ /* There's more than four, so let's resize by a lot */
if (new_output_size != output_size) { if (PY_SSIZE_T_MAX / 2 >= output_size && output_size * 2 < max_output_size)
output_size = new_output_size; output_size *= 2;
if (_PyString_Resize(&rval, output_size) == -1) { else
return NULL; output_size = max_output_size;
} if (_PyString_Resize(&rval, output_size) == -1) {
output = PyString_AS_STRING(rval); return NULL;
} }
output = PyString_AS_STRING(rval);
} }
} }
output[chars++] = '"'; output[chars++] = '"';
...@@ -263,7 +271,9 @@ ascii_escape_str(PyObject *pystr) ...@@ -263,7 +271,9 @@ ascii_escape_str(PyObject *pystr)
Py_ssize_t i; Py_ssize_t i;
Py_ssize_t input_chars; Py_ssize_t input_chars;
Py_ssize_t output_size; Py_ssize_t output_size;
Py_ssize_t max_output_size;
Py_ssize_t chars; Py_ssize_t chars;
Py_ssize_t incr;
PyObject *rval; PyObject *rval;
char *output; char *output;
char *input_str; char *input_str;
...@@ -295,14 +305,22 @@ ascii_escape_str(PyObject *pystr) ...@@ -295,14 +305,22 @@ ascii_escape_str(PyObject *pystr)
} }
} }
if (i == input_chars) { output_size = input_chars;
/* Input is already ASCII */ incr = 2; /* for quotes */
output_size = 2 + input_chars; if (i != input_chars) {
}
else {
/* One char input can be up to 6 chars output, estimate 4 of these */ /* One char input can be up to 6 chars output, estimate 4 of these */
output_size = 2 + (MIN_EXPANSION * 4) + input_chars; incr += MIN_EXPANSION * 4;
}
if (PY_SSIZE_T_MAX - incr < output_size) {
PyErr_NoMemory();
return NULL;
} }
output_size += incr;
if (PY_SSIZE_T_MAX / MIN_EXPANSION < input_chars ||
PY_SSIZE_T_MAX - 2 < input_chars * MIN_EXPANSION)
max_output_size = PY_SSIZE_T_MAX;
else
max_output_size = 2 + (input_chars * MIN_EXPANSION);
rval = PyString_FromStringAndSize(NULL, output_size); rval = PyString_FromStringAndSize(NULL, output_size);
if (rval == NULL) { if (rval == NULL) {
return NULL; return NULL;
...@@ -324,11 +342,16 @@ ascii_escape_str(PyObject *pystr) ...@@ -324,11 +342,16 @@ ascii_escape_str(PyObject *pystr)
} }
/* An ASCII char can't possibly expand to a surrogate! */ /* An ASCII char can't possibly expand to a surrogate! */
if (output_size - chars < (1 + MIN_EXPANSION)) { if (output_size - chars < (1 + MIN_EXPANSION)) {
/* There's more than four, so let's resize by a lot */ if (output_size == PY_SSIZE_T_MAX) {
output_size *= 2; Py_DECREF(rval);
if (output_size > 2 + (input_chars * MIN_EXPANSION)) { PyErr_NoMemory();
output_size = 2 + (input_chars * MIN_EXPANSION); return NULL;
} }
/* There's more than four, so let's resize by a lot */
if (PY_SSIZE_T_MAX / 2 >= output_size && output_size * 2 < max_output_size)
output_size *= 2;
else
output_size = max_output_size;
if (_PyString_Resize(&rval, output_size) == -1) { if (_PyString_Resize(&rval, output_size) == -1) {
return NULL; return NULL;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment