Kaydet (Commit) d914e3f8 authored tarafından Bob Ippolito's avatar Bob Ippolito

merge json library with simplejson 2.0.9 (issue 4136)

üst 277859d5
r"""A simple, fast, extensible JSON encoder and decoder r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of
JSON (JavaScript Object Notation) <http://json.org> is a subset of
JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data
interchange format. interchange format.
json exposes an API familiar to uses of the standard library :mod:`json` exposes an API familiar to users of the standard library
marshal and pickle modules. :mod:`marshal` and :mod:`pickle` modules. It is the externally maintained
version of the :mod:`json` library contained in Python 2.6, but maintains
compatibility with Python 2.4 and Python 2.5 and (currently) has
significant performance advantages, even without using the optional C
extension for speedups.
Encoding basic Python object hierarchies:: Encoding basic Python object hierarchies::
...@@ -32,23 +34,28 @@ Compact encoding:: ...@@ -32,23 +34,28 @@ Compact encoding::
>>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':'))
'[1,2,3,{"4":5,"6":7}]' '[1,2,3,{"4":5,"6":7}]'
Pretty printing (using repr() because of extraneous whitespace in the output):: Pretty printing::
>>> import json >>> import json
>>> print repr(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4)) >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4)
'{\n "4": 5, \n "6": 7\n}' >>> print '\n'.join([l.rstrip() for l in s.splitlines()])
{
"4": 5,
"6": 7
}
Decoding JSON:: Decoding JSON::
>>> import json >>> import json
>>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}]
[u'foo', {u'bar': [u'baz', None, 1.0, 2]}] >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj
>>> json.loads('"\\"foo\\bar"') True
u'"foo\x08ar' >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar'
True
>>> from StringIO import StringIO >>> from StringIO import StringIO
>>> io = StringIO('["streaming API"]') >>> io = StringIO('["streaming API"]')
>>> json.load(io) >>> json.load(io)[0] == 'streaming API'
[u'streaming API'] True
Specializing JSON object decoding:: Specializing JSON object decoding::
...@@ -61,43 +68,36 @@ Specializing JSON object decoding:: ...@@ -61,43 +68,36 @@ Specializing JSON object decoding::
>>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}',
... object_hook=as_complex) ... object_hook=as_complex)
(1+2j) (1+2j)
>>> import decimal >>> from decimal import Decimal
>>> json.loads('1.1', parse_float=decimal.Decimal) >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1')
Decimal('1.1') True
Extending JSONEncoder:: Specializing JSON object encoding::
>>> import json >>> import json
>>> class ComplexEncoder(json.JSONEncoder): >>> def encode_complex(obj):
... def default(self, obj): ... if isinstance(obj, complex):
... if isinstance(obj, complex): ... return [obj.real, obj.imag]
... return [obj.real, obj.imag] ... raise TypeError(repr(o) + " is not JSON serializable")
... return json.JSONEncoder.default(self, obj)
... ...
>>> dumps(2 + 1j, cls=ComplexEncoder) >>> json.dumps(2 + 1j, default=encode_complex)
'[2.0, 1.0]' '[2.0, 1.0]'
>>> ComplexEncoder().encode(2 + 1j) >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j)
'[2.0, 1.0]'
>>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j))
'[2.0, 1.0]' '[2.0, 1.0]'
>>> list(ComplexEncoder().iterencode(2 + 1j))
['[', '2.0', ', ', '1.0', ']']
Using json.tool from the shell to validate and Using json.tool from the shell to validate and pretty-print::
pretty-print::
$ echo '{"json":"obj"}' | python -mjson.tool $ echo '{"json":"obj"}' | python -m json.tool
{ {
"json": "obj" "json": "obj"
} }
$ echo '{ 1.2:3.4}' | python -mjson.tool $ echo '{ 1.2:3.4}' | python -m json.tool
Expecting property name: line 1 column 2 (char 2) Expecting property name: line 1 column 2 (char 2)
Note that the JSON produced by this module's default settings
is a subset of YAML, so it may be used as a serializer for that as well.
""" """
__version__ = '2.0.9'
__version__ = '1.9'
__all__ = [ __all__ = [
'dump', 'dumps', 'load', 'loads', 'dump', 'dumps', 'load', 'loads',
'JSONDecoder', 'JSONEncoder', 'JSONDecoder', 'JSONEncoder',
...@@ -125,28 +125,29 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, ...@@ -125,28 +125,29 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
"""Serialize ``obj`` as a JSON formatted stream to ``fp`` (a """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
``.write()``-supporting file-like object). ``.write()``-supporting file-like object).
If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types If ``skipkeys`` is true then ``dict`` keys that are not basic types
(``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
will be skipped instead of raising a ``TypeError``. will be skipped instead of raising a ``TypeError``.
If ``ensure_ascii`` is ``False``, then the some chunks written to ``fp`` If ``ensure_ascii`` is false, then the some chunks written to ``fp``
may be ``unicode`` instances, subject to normal Python ``str`` to may be ``unicode`` instances, subject to normal Python ``str`` to
``unicode`` coercion rules. Unless ``fp.write()`` explicitly ``unicode`` coercion rules. Unless ``fp.write()`` explicitly
understands ``unicode`` (as in ``codecs.getwriter()``) this is likely understands ``unicode`` (as in ``codecs.getwriter()``) this is likely
to cause an error. to cause an error.
If ``check_circular`` is ``False``, then the circular reference check If ``check_circular`` is false, then the circular reference check
for container types will be skipped and a circular reference will for container types will be skipped and a circular reference will
result in an ``OverflowError`` (or worse). result in an ``OverflowError`` (or worse).
If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to If ``allow_nan`` is false, then it will be a ``ValueError`` to
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
in strict compliance of the JSON specification, instead of using the in strict compliance of the JSON specification, instead of using the
JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
If ``indent`` is a non-negative integer, then JSON array elements and object If ``indent`` is a non-negative integer, then JSON array elements and
members will be pretty-printed with that indent level. An indent level object members will be pretty-printed with that indent level. An indent
of 0 will only insert newlines. ``None`` is the most compact representation. level of 0 will only insert newlines. ``None`` is the most compact
representation.
If ``separators`` is an ``(item_separator, dict_separator)`` tuple If ``separators`` is an ``(item_separator, dict_separator)`` tuple
then it will be used instead of the default ``(', ', ': ')`` separators. then it will be used instead of the default ``(', ', ': ')`` separators.
...@@ -163,8 +164,8 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, ...@@ -163,8 +164,8 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
""" """
# cached encoder # cached encoder
if (skipkeys is False and ensure_ascii is True and if (not skipkeys and ensure_ascii and
check_circular is True and allow_nan is True and check_circular and allow_nan and
cls is None and indent is None and separators is None and cls is None and indent is None and separators is None and
encoding == 'utf-8' and default is None and not kw): encoding == 'utf-8' and default is None and not kw):
iterable = _default_encoder.iterencode(obj) iterable = _default_encoder.iterencode(obj)
...@@ -186,19 +187,19 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, ...@@ -186,19 +187,19 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
encoding='utf-8', default=None, **kw): encoding='utf-8', default=None, **kw):
"""Serialize ``obj`` to a JSON formatted ``str``. """Serialize ``obj`` to a JSON formatted ``str``.
If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types If ``skipkeys`` is false then ``dict`` keys that are not basic types
(``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
will be skipped instead of raising a ``TypeError``. will be skipped instead of raising a ``TypeError``.
If ``ensure_ascii`` is ``False``, then the return value will be a If ``ensure_ascii`` is false, then the return value will be a
``unicode`` instance subject to normal Python ``str`` to ``unicode`` ``unicode`` instance subject to normal Python ``str`` to ``unicode``
coercion rules instead of being escaped to an ASCII ``str``. coercion rules instead of being escaped to an ASCII ``str``.
If ``check_circular`` is ``False``, then the circular reference check If ``check_circular`` is false, then the circular reference check
for container types will be skipped and a circular reference will for container types will be skipped and a circular reference will
result in an ``OverflowError`` (or worse). result in an ``OverflowError`` (or worse).
If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to If ``allow_nan`` is false, then it will be a ``ValueError`` to
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
strict compliance of the JSON specification, instead of using the strict compliance of the JSON specification, instead of using the
JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
...@@ -223,8 +224,8 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, ...@@ -223,8 +224,8 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
""" """
# cached encoder # cached encoder
if (skipkeys is False and ensure_ascii is True and if (not skipkeys and ensure_ascii and
check_circular is True and allow_nan is True and check_circular and allow_nan and
cls is None and indent is None and separators is None and cls is None and indent is None and separators is None and
encoding == 'utf-8' and default is None and not kw): encoding == 'utf-8' and default is None and not kw):
return _default_encoder.encode(obj) return _default_encoder.encode(obj)
...@@ -242,8 +243,8 @@ _default_decoder = JSONDecoder(encoding=None, object_hook=None) ...@@ -242,8 +243,8 @@ _default_decoder = JSONDecoder(encoding=None, object_hook=None)
def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, **kw): parse_int=None, parse_constant=None, **kw):
"""Deserialize ``fp`` (a ``.read()``-supporting file-like object """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
containing a JSON document) to a Python object. a JSON document) to a Python object.
If the contents of ``fp`` is encoded with an ASCII based encoding other If the contents of ``fp`` is encoded with an ASCII based encoding other
than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must
......
This diff is collapsed.
This diff is collapsed.
"""Iterator based sre token scanner """JSON token scanner
""" """
import re import re
import sre_parse try:
import sre_compile from _json import make_scanner as c_make_scanner
import sre_constants except ImportError:
c_make_scanner = None
from re import VERBOSE, MULTILINE, DOTALL
from sre_constants import BRANCH, SUBPATTERN
__all__ = ['Scanner', 'pattern'] __all__ = ['make_scanner']
FLAGS = (VERBOSE | MULTILINE | DOTALL) NUMBER_RE = re.compile(
r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?',
(re.VERBOSE | re.MULTILINE | re.DOTALL))
class Scanner(object): def py_make_scanner(context):
def __init__(self, lexicon, flags=FLAGS): parse_object = context.parse_object
self.actions = [None] parse_array = context.parse_array
# Combine phrases into a compound pattern parse_string = context.parse_string
s = sre_parse.Pattern() match_number = NUMBER_RE.match
s.flags = flags encoding = context.encoding
p = [] strict = context.strict
for idx, token in enumerate(lexicon): parse_float = context.parse_float
phrase = token.pattern parse_int = context.parse_int
try: parse_constant = context.parse_constant
subpattern = sre_parse.SubPattern(s, object_hook = context.object_hook
[(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))])
except sre_constants.error:
raise
p.append(subpattern)
self.actions.append(token)
s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work def _scan_once(string, idx):
p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) try:
self.scanner = sre_compile.compile(p) nextchar = string[idx]
except IndexError:
raise StopIteration
def iterscan(self, string, idx=0, context=None): if nextchar == '"':
"""Yield match, end_idx for each match return parse_string(string, idx + 1, encoding, strict)
elif nextchar == '{':
return parse_object((string, idx + 1), encoding, strict,
_scan_once, object_hook)
elif nextchar == '[':
return parse_array((string, idx + 1), _scan_once)
elif nextchar == 'n' and string[idx:idx + 4] == 'null':
return None, idx + 4
elif nextchar == 't' and string[idx:idx + 4] == 'true':
return True, idx + 4
elif nextchar == 'f' and string[idx:idx + 5] == 'false':
return False, idx + 5
""" m = match_number(string, idx)
match = self.scanner.scanner(string, idx).match if m is not None:
actions = self.actions integer, frac, exp = m.groups()
lastend = idx if frac or exp:
end = len(string) res = parse_float(integer + (frac or '') + (exp or ''))
while True: else:
m = match() res = parse_int(integer)
if m is None: return res, m.end()
break elif nextchar == 'N' and string[idx:idx + 3] == 'NaN':
matchbegin, matchend = m.span() return parse_constant('NaN'), idx + 3
if lastend == matchend: elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
break return parse_constant('Infinity'), idx + 8
action = actions[m.lastindex] elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
if action is not None: return parse_constant('-Infinity'), idx + 9
rval, next_pos = action(m, context) else:
if next_pos is not None and next_pos != matchend: raise StopIteration
# "fast forward" the scanner
matchend = next_pos
match = self.scanner.scanner(string, matchend).match
yield rval, matchend
lastend = matchend
return _scan_once
def pattern(pattern, flags=FLAGS): make_scanner = c_make_scanner or py_make_scanner
def decorator(fn):
fn.pattern = pattern
fn.regex = re.compile(pattern, flags)
return fn
return decorator
from unittest import TestCase
import json
def default_iterable(obj):
return list(obj)
class TestCheckCircular(TestCase):
def test_circular_dict(self):
dct = {}
dct['a'] = dct
self.assertRaises(ValueError, json.dumps, dct)
def test_circular_list(self):
lst = []
lst.append(lst)
self.assertRaises(ValueError, json.dumps, lst)
def test_circular_composite(self):
dct2 = {}
dct2['a'] = []
dct2['a'].append(dct2)
self.assertRaises(ValueError, json.dumps, dct2)
def test_circular_default(self):
json.dumps([set()], default=default_iterable)
self.assertRaises(TypeError, json.dumps, [set()])
def test_circular_off_default(self):
json.dumps([set()], default=default_iterable, check_circular=False)
self.assertRaises(TypeError, json.dumps, [set()], check_circular=False)
...@@ -13,3 +13,10 @@ class TestDecode(TestCase): ...@@ -13,3 +13,10 @@ class TestDecode(TestCase):
rval = json.loads('1', parse_int=float) rval = json.loads('1', parse_int=float)
self.assert_(isinstance(rval, float)) self.assert_(isinstance(rval, float))
self.assertEquals(rval, 1.0) self.assertEquals(rval, 1.0)
def test_decoder_optimizations(self):
# Several optimizations were made that skip over calls to
# the whitespace regex, so this test is designed to try and
# exercise the uncommon cases. The array cases are already covered.
rval = json.loads('{ "key" : "value" , "k":"v" }')
self.assertEquals(rval, {"key":"value", "k":"v"})
...@@ -11,3 +11,11 @@ class TestDump(TestCase): ...@@ -11,3 +11,11 @@ class TestDump(TestCase):
def test_dumps(self): def test_dumps(self):
self.assertEquals(json.dumps({}), '{}') self.assertEquals(json.dumps({}), '{}')
def test_encode_truefalse(self):
self.assertEquals(json.dumps(
{True: False, False: True}, sort_keys=True),
'{"false": true, "true": false}')
self.assertEquals(json.dumps(
{2: 3.0, 4.0: 5L, False: 1, 6L: True, "7": 0}, sort_keys=True),
'{"false": 1, "2": 3.0, "4.0": 5, "6": true, "7": 0}')
...@@ -26,10 +26,14 @@ class TestEncodeBaseStringAscii(TestCase): ...@@ -26,10 +26,14 @@ class TestEncodeBaseStringAscii(TestCase):
self._test_encode_basestring_ascii(json.encoder.py_encode_basestring_ascii) self._test_encode_basestring_ascii(json.encoder.py_encode_basestring_ascii)
def test_c_encode_basestring_ascii(self): def test_c_encode_basestring_ascii(self):
if not json.encoder.c_encode_basestring_ascii:
return
self._test_encode_basestring_ascii(json.encoder.c_encode_basestring_ascii) self._test_encode_basestring_ascii(json.encoder.c_encode_basestring_ascii)
def _test_encode_basestring_ascii(self, encode_basestring_ascii): def _test_encode_basestring_ascii(self, encode_basestring_ascii):
fname = encode_basestring_ascii.__name__ fname = encode_basestring_ascii.__name__
for input_string, expect in CASES: for input_string, expect in CASES:
result = encode_basestring_ascii(input_string) result = encode_basestring_ascii(input_string)
self.assertEquals(result, expect) self.assertEquals(result, expect,
'{0!r} != {1!r} for {2}({3!r})'.format(
result, expect, fname, input_string))
...@@ -73,4 +73,4 @@ class TestFail(TestCase): ...@@ -73,4 +73,4 @@ class TestFail(TestCase):
except ValueError: except ValueError:
pass pass
else: else:
self.fail("Expected failure for fail%d.json: %r" % (idx, doc)) self.fail("Expected failure for fail{0}.json: {1!r}".format(idx, doc))
...@@ -5,5 +5,11 @@ import json ...@@ -5,5 +5,11 @@ import json
class TestFloat(TestCase): class TestFloat(TestCase):
def test_floats(self): def test_floats(self):
for num in [1617161771.7650001, math.pi, math.pi**100, math.pi**-100]: for num in [1617161771.7650001, math.pi, math.pi**100, math.pi**-100, 3.1]:
self.assertEquals(float(json.dumps(num)), num) self.assertEquals(float(json.dumps(num)), num)
self.assertEquals(json.loads(json.dumps(num)), num)
def test_ints(self):
for num in [1, 1L, 1<<32, 1<<64]:
self.assertEquals(json.dumps(num), str(num))
self.assertEquals(int(json.dumps(num)), num)
...@@ -51,5 +51,14 @@ class TestUnicode(TestCase): ...@@ -51,5 +51,14 @@ class TestUnicode(TestCase):
def test_unicode_decode(self): def test_unicode_decode(self):
for i in range(0, 0xd7ff): for i in range(0, 0xd7ff):
u = unichr(i) u = unichr(i)
js = '"\\u{0:04x}"'.format(i) s = '"\\u{0:04x}"'.format(i)
self.assertEquals(json.loads(js), u) self.assertEquals(json.loads(s), u)
def test_default_encoding(self):
self.assertEquals(json.loads(u'{"a": "\xe9"}'.encode('utf-8')),
{'a': u'\xe9'})
def test_unicode_preservation(self):
self.assertEquals(type(json.loads(u'""')), unicode)
self.assertEquals(type(json.loads(u'"a"')), unicode)
self.assertEquals(type(json.loads(u'["a"]')[0]), unicode)
...@@ -2,11 +2,11 @@ r"""Command-line tool to validate and pretty-print JSON ...@@ -2,11 +2,11 @@ r"""Command-line tool to validate and pretty-print JSON
Usage:: Usage::
$ echo '{"json":"obj"}' | python -mjson.tool $ echo '{"json":"obj"}' | python -m json.tool
{ {
"json": "obj" "json": "obj"
} }
$ echo '{ 1.2:3.4}' | python -mjson.tool $ echo '{ 1.2:3.4}' | python -m json.tool
Expecting property name: line 1 column 2 (char 2) Expecting property name: line 1 column 2 (char 2)
""" """
...@@ -24,7 +24,7 @@ def main(): ...@@ -24,7 +24,7 @@ def main():
infile = open(sys.argv[1], 'rb') infile = open(sys.argv[1], 'rb')
outfile = open(sys.argv[2], 'wb') outfile = open(sys.argv[2], 'wb')
else: else:
raise SystemExit("{0} [infile [outfile]]".format(sys.argv[0])) raise SystemExit(sys.argv[0] + " [infile [outfile]]")
try: try:
obj = json.load(infile) obj = json.load(infile)
except ValueError, e: except ValueError, e:
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment