Kaydet (Commit) 37623ab5 authored tarafından Ezio Melotti's avatar Ezio Melotti

#16009: JSON error messages now provide more information. Patch by Serhiy Storchaka.

üst fd53a5a0
...@@ -188,8 +188,8 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook, ...@@ -188,8 +188,8 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
try: try:
value, end = scan_once(s, end) value, end = scan_once(s, end)
except StopIteration: except StopIteration as err:
raise ValueError(errmsg("Expecting object", s, end)) raise ValueError(errmsg("Expecting value", s, err.value)) from None
pairs_append((key, value)) pairs_append((key, value))
try: try:
nextchar = s[end] nextchar = s[end]
...@@ -232,8 +232,8 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): ...@@ -232,8 +232,8 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
while True: while True:
try: try:
value, end = scan_once(s, end) value, end = scan_once(s, end)
except StopIteration: except StopIteration as err:
raise ValueError(errmsg("Expecting object", s, end)) raise ValueError(errmsg("Expecting value", s, err.value)) from None
_append(value) _append(value)
nextchar = s[end:end + 1] nextchar = s[end:end + 1]
if nextchar in _ws: if nextchar in _ws:
...@@ -243,7 +243,7 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): ...@@ -243,7 +243,7 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
if nextchar == ']': if nextchar == ']':
break break
elif nextchar != ',': elif nextchar != ',':
raise ValueError(errmsg("Expecting ',' delimiter", s, end)) raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1))
try: try:
if s[end] in _ws: if s[end] in _ws:
end += 1 end += 1
...@@ -358,6 +358,6 @@ class JSONDecoder(object): ...@@ -358,6 +358,6 @@ class JSONDecoder(object):
""" """
try: try:
obj, end = self.scan_once(s, idx) obj, end = self.scan_once(s, idx)
except StopIteration: except StopIteration as err:
raise ValueError("No JSON object could be decoded") raise ValueError(errmsg("Expecting value", s, err.value)) from None
return obj, end return obj, end
...@@ -29,7 +29,7 @@ def py_make_scanner(context): ...@@ -29,7 +29,7 @@ def py_make_scanner(context):
try: try:
nextchar = string[idx] nextchar = string[idx]
except IndexError: except IndexError:
raise StopIteration raise StopIteration(idx)
if nextchar == '"': if nextchar == '"':
return parse_string(string, idx + 1, strict) return parse_string(string, idx + 1, strict)
...@@ -60,7 +60,7 @@ def py_make_scanner(context): ...@@ -60,7 +60,7 @@ def py_make_scanner(context):
elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
return parse_constant('-Infinity'), idx + 9 return parse_constant('-Infinity'), idx + 9
else: else:
raise StopIteration raise StopIteration(idx)
def scan_once(string, idx): def scan_once(string, idx):
try: try:
......
from test.json_tests import PyTest, CTest from test.json_tests import PyTest, CTest
import re
# 2007-10-05 # 2007-10-05
JSONDOCS = [ JSONDOCS = [
...@@ -100,6 +101,82 @@ class TestFail: ...@@ -100,6 +101,82 @@ class TestFail:
#This is for python encoder #This is for python encoder
self.assertRaises(TypeError, self.dumps, data, indent=True) self.assertRaises(TypeError, self.dumps, data, indent=True)
def test_truncated_input(self):
test_cases = [
('', 'Expecting value', 0),
('[', 'Expecting value', 1),
('[42', "Expecting ',' delimiter", 3),
('[42,', 'Expecting value', 4),
('["', 'Unterminated string starting at', 1),
('["spam', 'Unterminated string starting at', 1),
('["spam"', "Expecting ',' delimiter", 7),
('["spam",', 'Expecting value', 8),
('{', 'Expecting property name enclosed in double quotes', 1),
('{"', 'Unterminated string starting at', 1),
('{"spam', 'Unterminated string starting at', 1),
('{"spam"', "Expecting ':' delimiter", 7),
('{"spam":', 'Expecting value', 8),
('{"spam":42', "Expecting ',' delimiter", 10),
('{"spam":42,', 'Expecting property name enclosed in double quotes', 11),
]
test_cases += [
('"', 'Unterminated string starting at', 0),
('"spam', 'Unterminated string starting at', 0),
]
for data, msg, idx in test_cases:
self.assertRaisesRegex(ValueError,
r'^{0}: line 1 column {1} \(char {1}\)'.format(
re.escape(msg), idx),
self.loads, data)
def test_unexpected_data(self):
test_cases = [
('[,', 'Expecting value', 1),
('{"spam":[}', 'Expecting value', 9),
('[42:', "Expecting ',' delimiter", 3),
('[42 "spam"', "Expecting ',' delimiter", 4),
('[42,]', 'Expecting value', 4),
('{"spam":[42}', "Expecting ',' delimiter", 11),
('["]', 'Unterminated string starting at', 1),
('["spam":', "Expecting ',' delimiter", 7),
('["spam",]', 'Expecting value', 8),
('{:', 'Expecting property name enclosed in double quotes', 1),
('{,', 'Expecting property name enclosed in double quotes', 1),
('{42', 'Expecting property name enclosed in double quotes', 1),
('[{]', 'Expecting property name enclosed in double quotes', 2),
('{"spam",', "Expecting ':' delimiter", 7),
('{"spam"}', "Expecting ':' delimiter", 7),
('[{"spam"]', "Expecting ':' delimiter", 8),
('{"spam":}', 'Expecting value', 8),
('[{"spam":]', 'Expecting value', 9),
('{"spam":42 "ham"', "Expecting ',' delimiter", 11),
('[{"spam":42]', "Expecting ',' delimiter", 11),
('{"spam":42,}', 'Expecting property name enclosed in double quotes', 11),
]
for data, msg, idx in test_cases:
self.assertRaisesRegex(ValueError,
r'^{0}: line 1 column {1} \(char {1}\)'.format(
re.escape(msg), idx),
self.loads, data)
def test_extra_data(self):
test_cases = [
('[]]', 'Extra data', 2),
('{}}', 'Extra data', 2),
('[],[]', 'Extra data', 2),
('{},{}', 'Extra data', 2),
]
test_cases += [
('42,"spam"', 'Extra data', 2),
('"spam",42', 'Extra data', 6),
]
for data, msg, idx in test_cases:
self.assertRaisesRegex(ValueError,
r'^{0}: line 1 column {1} - line 1 column {2}'
r' \(char {1} - {2}\)'.format(
re.escape(msg), idx, len(data)),
self.loads, data)
class TestPyFail(TestFail, PyTest): pass class TestPyFail(TestFail, PyTest): pass
class TestCFail(TestFail, CTest): pass class TestCFail(TestFail, CTest): pass
...@@ -201,6 +201,8 @@ Core and Builtins ...@@ -201,6 +201,8 @@ Core and Builtins
Library Library
------- -------
- Issue #16009: JSON error messages now provide more information.
- Issue #16828: Fix error incorrectly raised by bz2.compress(b'') and - Issue #16828: Fix error incorrectly raised by bz2.compress(b'') and
bz2.BZ2Compressor.compress(b''). Initial patch by Martin Packman. bz2.BZ2Compressor.compress(b''). Initial patch by Martin Packman.
......
...@@ -237,6 +237,16 @@ raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) ...@@ -237,6 +237,16 @@ raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
} }
} }
static void
raise_stop_iteration(Py_ssize_t idx)
{
PyObject *value = PyLong_FromSsize_t(idx);
if (value != NULL) {
PyErr_SetObject(PyExc_StopIteration, value);
Py_DECREF(value);
}
}
static PyObject * static PyObject *
_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
/* return (rval, idx) tuple, stealing reference to rval */ /* return (rval, idx) tuple, stealing reference to rval */
...@@ -306,7 +316,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next ...@@ -306,7 +316,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
buf = PyUnicode_DATA(pystr); buf = PyUnicode_DATA(pystr);
kind = PyUnicode_KIND(pystr); kind = PyUnicode_KIND(pystr);
if (end < 0 || len <= end) { if (end < 0 || len < end) {
PyErr_SetString(PyExc_ValueError, "end is out of bounds"); PyErr_SetString(PyExc_ValueError, "end is out of bounds");
goto bail; goto bail;
} }
...@@ -604,12 +614,12 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss ...@@ -604,12 +614,12 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++; while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
/* only loop if the object is non-empty */ /* only loop if the object is non-empty */
if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != '}') { if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
while (idx <= end_idx) { while (1) {
PyObject *memokey; PyObject *memokey;
/* read key */ /* read key */
if (PyUnicode_READ(kind, str, idx) != '"') { if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx); raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
goto bail; goto bail;
} }
...@@ -666,11 +676,9 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss ...@@ -666,11 +676,9 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
/* bail if the object is closed or we didn't get the , delimiter */ /* bail if the object is closed or we didn't get the , delimiter */
if (idx > end_idx) break; if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
if (PyUnicode_READ(kind, str, idx) == '}') {
break; break;
} if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
else if (PyUnicode_READ(kind, str, idx) != ',') {
raise_errmsg("Expecting ',' delimiter", pystr, idx); raise_errmsg("Expecting ',' delimiter", pystr, idx);
goto bail; goto bail;
} }
...@@ -681,12 +689,6 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss ...@@ -681,12 +689,6 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
} }
} }
/* verify that idx < end_idx, str[idx] should be '}' */
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
raise_errmsg("Expecting object", pystr, end_idx);
goto bail;
}
*next_idx_ptr = idx + 1; *next_idx_ptr = idx + 1;
if (has_pairs_hook) { if (has_pairs_hook) {
...@@ -738,8 +740,8 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi ...@@ -738,8 +740,8 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
/* only loop if the array is non-empty */ /* only loop if the array is non-empty */
if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != ']') { if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
while (idx <= end_idx) { while (1) {
/* read any JSON term */ /* read any JSON term */
val = scan_once_unicode(s, pystr, idx, &next_idx); val = scan_once_unicode(s, pystr, idx, &next_idx);
...@@ -756,11 +758,9 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi ...@@ -756,11 +758,9 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
/* bail if the array is closed or we didn't get the , delimiter */ /* bail if the array is closed or we didn't get the , delimiter */
if (idx > end_idx) break; if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
if (PyUnicode_READ(kind, str, idx) == ']') {
break; break;
} if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
else if (PyUnicode_READ(kind, str, idx) != ',') {
raise_errmsg("Expecting ',' delimiter", pystr, idx); raise_errmsg("Expecting ',' delimiter", pystr, idx);
goto bail; goto bail;
} }
...@@ -773,7 +773,7 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi ...@@ -773,7 +773,7 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi
/* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */ /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') { if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
raise_errmsg("Expecting object", pystr, end_idx); raise_errmsg("Expecting value", pystr, end_idx);
goto bail; goto bail;
} }
*next_idx_ptr = idx + 1; *next_idx_ptr = idx + 1;
...@@ -841,7 +841,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ ...@@ -841,7 +841,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
if (PyUnicode_READ(kind, str, idx) == '-') { if (PyUnicode_READ(kind, str, idx) == '-') {
idx++; idx++;
if (idx > end_idx) { if (idx > end_idx) {
PyErr_SetNone(PyExc_StopIteration); raise_stop_iteration(start);
return NULL; return NULL;
} }
} }
...@@ -857,7 +857,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ ...@@ -857,7 +857,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
} }
/* no integer digits, error */ /* no integer digits, error */
else { else {
PyErr_SetNone(PyExc_StopIteration); raise_stop_iteration(start);
return NULL; return NULL;
} }
...@@ -950,7 +950,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ ...@@ -950,7 +950,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
length = PyUnicode_GET_LENGTH(pystr); length = PyUnicode_GET_LENGTH(pystr);
if (idx >= length) { if (idx >= length) {
PyErr_SetNone(PyExc_StopIteration); raise_stop_iteration(idx);
return NULL; return NULL;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment