__init__.py 13.8 KB
Newer Older
1
r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of
2 3 4
JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data
interchange format.

5
:mod:`json` exposes an API familiar to users of the standard library
6 7
:mod:`marshal` and :mod:`pickle` modules.  It is derived from a
version of the externally maintained simplejson library.
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30

Encoding basic Python object hierarchies::

    >>> import json
    >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])
    '["foo", {"bar": ["baz", null, 1.0, 2]}]'
    >>> print(json.dumps("\"foo\bar"))
    "\"foo\bar"
    >>> print(json.dumps('\u1234'))
    "\u1234"
    >>> print(json.dumps('\\'))
    "\\"
    >>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True))
    {"a": 0, "b": 0, "c": 0}
    >>> from io import StringIO
    >>> io = StringIO()
    >>> json.dump(['streaming API'], io)
    >>> io.getvalue()
    '["streaming API"]'

Compact encoding::

    >>> import json
31
    >>> mydict = {'4': 5, '6': 7}
32
    >>> json.dumps([1,2,3,mydict], separators=(',', ':'))
33 34
    '[1,2,3,{"4":5,"6":7}]'

35
Pretty printing::
36 37

    >>> import json
38
    >>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4))
39 40 41 42
    {
        "4": 5,
        "6": 7
    }
43 44 45 46

Decoding JSON::

    >>> import json
47 48 49 50 51
    >>> obj = ['foo', {'bar': ['baz', None, 1.0, 2]}]
    >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj
    True
    >>> json.loads('"\\"foo\\bar"') == '"foo\x08ar'
    True
52 53
    >>> from io import StringIO
    >>> io = StringIO('["streaming API"]')
54 55
    >>> json.load(io)[0] == 'streaming API'
    True
56 57 58 59 60 61 62 63 64 65 66 67

Specializing JSON object decoding::

    >>> import json
    >>> def as_complex(dct):
    ...     if '__complex__' in dct:
    ...         return complex(dct['real'], dct['imag'])
    ...     return dct
    ...
    >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}',
    ...     object_hook=as_complex)
    (1+2j)
68 69 70
    >>> from decimal import Decimal
    >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1')
    True
71

72
Specializing JSON object encoding::
73 74

    >>> import json
75 76 77
    >>> def encode_complex(obj):
    ...     if isinstance(obj, complex):
    ...         return [obj.real, obj.imag]
78 79
    ...     raise TypeError(f'Object of type {obj.__class__.__name__} '
    ...                     f'is not JSON serializable')
80
    ...
81 82 83
    >>> json.dumps(2 + 1j, default=encode_complex)
    '[2.0, 1.0]'
    >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j)
84
    '[2.0, 1.0]'
85
    >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j))
86 87 88
    '[2.0, 1.0]'


89
Using json.tool from the shell to validate and pretty-print::
90

91
    $ echo '{"json":"obj"}' | python -m json.tool
92 93 94
    {
        "json": "obj"
    }
95
    $ echo '{ 1.2:3.4}' | python -m json.tool
96
    Expecting property name enclosed in double quotes: line 1 column 3 (char 2)
97
"""
98
__version__ = '2.0.9'
99 100
__all__ = [
    'dump', 'dumps', 'load', 'loads',
101
    'JSONDecoder', 'JSONDecodeError', 'JSONEncoder',
102 103 104 105
]

__author__ = 'Bob Ippolito <bob@redivi.com>'

106
from .decoder import JSONDecoder, JSONDecodeError
107
from .encoder import JSONEncoder
108
import codecs
109 110 111 112 113 114 115 116 117 118 119

_default_encoder = JSONEncoder(
    skipkeys=False,
    ensure_ascii=True,
    check_circular=True,
    allow_nan=True,
    indent=None,
    separators=None,
    default=None,
)

120
def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True,
121
        allow_nan=True, cls=None, indent=None, separators=None,
122
        default=None, sort_keys=False, **kw):
123 124 125
    """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
    ``.write()``-supporting file-like object).

126
    If ``skipkeys`` is true then ``dict`` keys that are not basic types
Georg Brandl's avatar
Georg Brandl committed
127 128
    (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped
    instead of raising a ``TypeError``.
129

Georg Brandl's avatar
Georg Brandl committed
130 131 132
    If ``ensure_ascii`` is false, then the strings written to ``fp`` can
    contain non-ASCII characters if they appear in strings contained in
    ``obj``. Otherwise, all such characters are escaped in JSON strings.
133

134
    If ``check_circular`` is false, then the circular reference check
135 136 137
    for container types will be skipped and a circular reference will
    result in an ``OverflowError`` (or worse).

138
    If ``allow_nan`` is false, then it will be a ``ValueError`` to
139 140 141 142
    serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
    in strict compliance of the JSON specification, instead of using the
    JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).

143 144 145 146
    If ``indent`` is a non-negative integer, then JSON array elements and
    object members will be pretty-printed with that indent level. An indent
    level of 0 will only insert newlines. ``None`` is the most compact
    representation.
147

148 149 150 151
    If specified, ``separators`` should be an ``(item_separator, key_separator)``
    tuple.  The default is ``(', ', ': ')`` if *indent* is ``None`` and
    ``(',', ': ')`` otherwise.  To get the most compact JSON representation,
    you should specify ``(',', ':')`` to eliminate whitespace.
152 153 154 155

    ``default(obj)`` is a function that should return a serializable version
    of obj or raise TypeError. The default simply raises TypeError.

156
    If *sort_keys* is true (default: ``False``), then the output of
157 158
    dictionaries will be sorted by key.

159 160
    To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
    ``.default()`` method to serialize additional types), specify it with
161
    the ``cls`` kwarg; otherwise ``JSONEncoder`` is used.
162 163 164

    """
    # cached encoder
165 166
    if (not skipkeys and ensure_ascii and
        check_circular and allow_nan and
167
        cls is None and indent is None and separators is None and
168
        default is None and not sort_keys and not kw):
169 170 171 172 173 174
        iterable = _default_encoder.iterencode(obj)
    else:
        if cls is None:
            cls = JSONEncoder
        iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
            check_circular=check_circular, allow_nan=allow_nan, indent=indent,
175
            separators=separators,
176
            default=default, sort_keys=sort_keys, **kw).iterencode(obj)
177 178 179 180 181 182
    # could accelerate with writelines in some versions of Python, at
    # a debuggability cost
    for chunk in iterable:
        fp.write(chunk)


183
def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True,
184
        allow_nan=True, cls=None, indent=None, separators=None,
185
        default=None, sort_keys=False, **kw):
186 187
    """Serialize ``obj`` to a JSON formatted ``str``.

188
    If ``skipkeys`` is true then ``dict`` keys that are not basic types
Georg Brandl's avatar
Georg Brandl committed
189 190
    (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped
    instead of raising a ``TypeError``.
191

Georg Brandl's avatar
Georg Brandl committed
192 193 194
    If ``ensure_ascii`` is false, then the return value can contain non-ASCII
    characters if they appear in strings contained in ``obj``. Otherwise, all
    such characters are escaped in JSON strings.
195

196
    If ``check_circular`` is false, then the circular reference check
197 198 199
    for container types will be skipped and a circular reference will
    result in an ``OverflowError`` (or worse).

200
    If ``allow_nan`` is false, then it will be a ``ValueError`` to
201 202 203 204 205 206 207 208 209
    serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
    strict compliance of the JSON specification, instead of using the
    JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).

    If ``indent`` is a non-negative integer, then JSON array elements and
    object members will be pretty-printed with that indent level. An indent
    level of 0 will only insert newlines. ``None`` is the most compact
    representation.

210 211 212 213
    If specified, ``separators`` should be an ``(item_separator, key_separator)``
    tuple.  The default is ``(', ', ': ')`` if *indent* is ``None`` and
    ``(',', ': ')`` otherwise.  To get the most compact JSON representation,
    you should specify ``(',', ':')`` to eliminate whitespace.
214 215 216 217

    ``default(obj)`` is a function that should return a serializable version
    of obj or raise TypeError. The default simply raises TypeError.

218
    If *sort_keys* is true (default: ``False``), then the output of
219 220
    dictionaries will be sorted by key.

221 222
    To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
    ``.default()`` method to serialize additional types), specify it with
223
    the ``cls`` kwarg; otherwise ``JSONEncoder`` is used.
224 225 226

    """
    # cached encoder
227 228
    if (not skipkeys and ensure_ascii and
        check_circular and allow_nan and
229
        cls is None and indent is None and separators is None and
230
        default is None and not sort_keys and not kw):
231 232 233 234 235 236
        return _default_encoder.encode(obj)
    if cls is None:
        cls = JSONEncoder
    return cls(
        skipkeys=skipkeys, ensure_ascii=ensure_ascii,
        check_circular=check_circular, allow_nan=allow_nan, indent=indent,
237
        separators=separators, default=default, sort_keys=sort_keys,
238 239 240
        **kw).encode(obj)


241
_default_decoder = JSONDecoder(object_hook=None, object_pairs_hook=None)
242 243


244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259
def detect_encoding(b):
    bstartswith = b.startswith
    if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
        return 'utf-32'
    if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
        return 'utf-16'
    if bstartswith(codecs.BOM_UTF8):
        return 'utf-8-sig'

    if len(b) >= 4:
        if not b[0]:
            # 00 00 -- -- - utf-32-be
            # 00 XX -- -- - utf-16-be
            return 'utf-16-be' if b[1] else 'utf-32-be'
        if not b[1]:
            # XX 00 00 00 - utf-32-le
260 261
            # XX 00 00 XX - utf-16-le
            # XX 00 XX -- - utf-16-le
262 263 264 265 266 267 268 269 270 271 272 273
            return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
    elif len(b) == 2:
        if not b[0]:
            # 00 XX - utf-16-be
            return 'utf-16-be'
        if not b[1]:
            # XX 00 - utf-16-le
            return 'utf-16-le'
    # default
    return 'utf-8'


274
def load(fp, *, cls=None, object_hook=None, parse_float=None,
275
        parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
276 277
    """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
    a JSON document) to a Python object.
278 279 280 281 282 283

    ``object_hook`` is an optional function that will be called with the
    result of any object literal decode (a ``dict``). The return value of
    ``object_hook`` will be used instead of the ``dict``. This feature
    can be used to implement custom decoders (e.g. JSON-RPC class hinting).

284 285 286
    ``object_pairs_hook`` is an optional function that will be called with the
    result of any object literal decoded with an ordered list of pairs.  The
    return value of ``object_pairs_hook`` will be used instead of the ``dict``.
287 288
    This feature can be used to implement custom decoders.  If ``object_hook``
    is also defined, the ``object_pairs_hook`` takes priority.
289

290
    To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
291
    kwarg; otherwise ``JSONDecoder`` is used.
292 293
    """
    return loads(fp.read(),
294
        cls=cls, object_hook=object_hook,
295
        parse_float=parse_float, parse_int=parse_int,
296
        parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw)
297 298


299
def loads(s, *, encoding=None, cls=None, object_hook=None, parse_float=None,
300
        parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
301 302
    """Deserialize ``s`` (a ``str``, ``bytes`` or ``bytearray`` instance
    containing a JSON document) to a Python object.
303 304 305 306 307 308

    ``object_hook`` is an optional function that will be called with the
    result of any object literal decode (a ``dict``). The return value of
    ``object_hook`` will be used instead of the ``dict``. This feature
    can be used to implement custom decoders (e.g. JSON-RPC class hinting).

309 310 311
    ``object_pairs_hook`` is an optional function that will be called with the
    result of any object literal decoded with an ordered list of pairs.  The
    return value of ``object_pairs_hook`` will be used instead of the ``dict``.
312 313
    This feature can be used to implement custom decoders.  If ``object_hook``
    is also defined, the ``object_pairs_hook`` takes priority.
314

315 316 317 318 319 320 321 322 323 324 325
    ``parse_float``, if specified, will be called with the string
    of every JSON float to be decoded. By default this is equivalent to
    float(num_str). This can be used to use another datatype or parser
    for JSON floats (e.g. decimal.Decimal).

    ``parse_int``, if specified, will be called with the string
    of every JSON int to be decoded. By default this is equivalent to
    int(num_str). This can be used to use another datatype or parser
    for JSON integers (e.g. float).

    ``parse_constant``, if specified, will be called with one of the
326
    following strings: -Infinity, Infinity, NaN.
327 328 329 330
    This can be used to raise an exception if invalid JSON numbers
    are encountered.

    To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
331
    kwarg; otherwise ``JSONDecoder`` is used.
332

333
    The ``encoding`` argument is ignored and deprecated.
334
    """
335 336 337 338 339 340
    if isinstance(s, str):
        if s.startswith('\ufeff'):
            raise JSONDecodeError("Unexpected UTF-8 BOM (decode using utf-8-sig)",
                                  s, 0)
    else:
        if not isinstance(s, (bytes, bytearray)):
341 342
            raise TypeError(f'the JSON object must be str, bytes or bytearray, '
                            f'not {s.__class__.__name__}')
343 344
        s = s.decode(detect_encoding(s), 'surrogatepass')

345
    if (cls is None and object_hook is None and
346
            parse_int is None and parse_float is None and
347
            parse_constant is None and object_pairs_hook is None and not kw):
348 349 350 351 352
        return _default_decoder.decode(s)
    if cls is None:
        cls = JSONDecoder
    if object_hook is not None:
        kw['object_hook'] = object_hook
353 354
    if object_pairs_hook is not None:
        kw['object_pairs_hook'] = object_pairs_hook
355 356 357 358 359 360
    if parse_float is not None:
        kw['parse_float'] = parse_float
    if parse_int is not None:
        kw['parse_int'] = parse_int
    if parse_constant is not None:
        kw['parse_constant'] = parse_constant
361
    return cls(**kw).decode(s)