"""Implementation of JSONDecoder | |
""" | |
import re | |
import sys | |
import struct | |
from json import scanner | |
try: | |
from _json import scanstring as c_scanstring | |
except ImportError: | |
c_scanstring = None | |
__all__ = ['JSONDecoder'] | |
FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL | |
def _floatconstants(): | |
_BYTES = '7FF80000000000007FF0000000000000'.decode('hex') | |
if sys.byteorder != 'big': | |
_BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] | |
nan, inf = struct.unpack('dd', _BYTES) | |
return nan, inf, -inf | |
NaN, PosInf, NegInf = _floatconstants() | |
def linecol(doc, pos): | |
lineno = doc.count('\n', 0, pos) + 1 | |
if lineno == 1: | |
colno = pos + 1 | |
else: | |
colno = pos - doc.rindex('\n', 0, pos) | |
return lineno, colno | |
def errmsg(msg, doc, pos, end=None): | |
# Note that this function is called from _json | |
lineno, colno = linecol(doc, pos) | |
if end is None: | |
fmt = '{0}: line {1} column {2} (char {3})' | |
return fmt.format(msg, lineno, colno, pos) | |
#fmt = '%s: line %d column %d (char %d)' | |
#return fmt % (msg, lineno, colno, pos) | |
endlineno, endcolno = linecol(doc, end) | |
fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' | |
return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) | |
#fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' | |
#return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) | |
_CONSTANTS = { | |
'-Infinity': NegInf, | |
'Infinity': PosInf, | |
'NaN': NaN, | |
} | |
STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) | |
BACKSLASH = { | |
'"': u'"', '\\': u'\\', '/': u'/', | |
'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', | |
} | |
DEFAULT_ENCODING = "utf-8" | |
def _decode_uXXXX(s, pos): | |
esc = s[pos + 1:pos + 5] | |
if len(esc) == 4 and esc[1] not in 'xX': | |
try: | |
return int(esc, 16) | |
except ValueError: | |
pass | |
msg = "Invalid \\uXXXX escape" | |
raise ValueError(errmsg(msg, s, pos)) | |
def py_scanstring(s, end, encoding=None, strict=True, | |
_b=BACKSLASH, _m=STRINGCHUNK.match): | |
"""Scan the string s for a JSON string. End is the index of the | |
character in s after the quote that started the JSON string. | |
Unescapes all valid JSON string escape sequences and raises ValueError | |
on attempt to decode an invalid string. If strict is False then literal | |
control characters are allowed in the string. | |
Returns a tuple of the decoded string and the index of the character in s | |
after the end quote.""" | |
if encoding is None: | |
encoding = DEFAULT_ENCODING | |
chunks = [] | |
_append = chunks.append | |
begin = end - 1 | |
while 1: | |
chunk = _m(s, end) | |
if chunk is None: | |
raise ValueError( | |
errmsg("Unterminated string starting at", s, begin)) | |
end = chunk.end() | |
content, terminator = chunk.groups() | |
# Content is contains zero or more unescaped string characters | |
if content: | |
if not isinstance(content, unicode): | |
content = unicode(content, encoding) | |
_append(content) | |
# Terminator is the end of string, a literal control character, | |
# or a backslash denoting that an escape sequence follows | |
if terminator == '"': | |
break | |
elif terminator != '\\': | |
if strict: | |
#msg = "Invalid control character %r at" % (terminator,) | |
msg = "Invalid control character {0!r} at".format(terminator) | |
raise ValueError(errmsg(msg, s, end)) | |
else: | |
_append(terminator) | |
continue | |
try: | |
esc = s[end] | |
except IndexError: | |
raise ValueError( | |
errmsg("Unterminated string starting at", s, begin)) | |
# If not a unicode escape sequence, must be in the lookup table | |
if esc != 'u': | |
try: | |
char = _b[esc] | |
except KeyError: | |
msg = "Invalid \\escape: " + repr(esc) | |
raise ValueError(errmsg(msg, s, end)) | |
end += 1 | |
else: | |
# Unicode escape sequence | |
uni = _decode_uXXXX(s, end) | |
end += 5 | |
# Check for surrogate pair on UCS-4 systems | |
if sys.maxunicode > 65535 and \ | |
0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u': | |
uni2 = _decode_uXXXX(s, end + 1) | |
if 0xdc00 <= uni2 <= 0xdfff: | |
uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) | |
end += 6 | |
char = unichr(uni) | |
# Append the unescaped character | |
_append(char) | |
return u''.join(chunks), end | |
# Use speedup if available | |
scanstring = c_scanstring or py_scanstring | |
WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) | |
WHITESPACE_STR = ' \t\n\r' | |
def JSONObject(s_and_end, encoding, strict, scan_once, object_hook, | |
object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): | |
s, end = s_and_end | |
pairs = [] | |
pairs_append = pairs.append | |
# Use a slice to prevent IndexError from being raised, the following | |
# check will raise a more specific ValueError if the string is empty | |
nextchar = s[end:end + 1] | |
# Normally we expect nextchar == '"' | |
if nextchar != '"': | |
if nextchar in _ws: | |
end = _w(s, end).end() | |
nextchar = s[end:end + 1] | |
# Trivial empty object | |
if nextchar == '}': | |
if object_pairs_hook is not None: | |
result = object_pairs_hook(pairs) | |
return result, end + 1 | |
pairs = {} | |
if object_hook is not None: | |
pairs = object_hook(pairs) | |
return pairs, end + 1 | |
elif nextchar != '"': | |
raise ValueError(errmsg( | |
"Expecting property name enclosed in double quotes", s, end)) | |
end += 1 | |
while True: | |
key, end = scanstring(s, end, encoding, strict) | |
# To skip some function call overhead we optimize the fast paths where | |
# the JSON key separator is ": " or just ":". | |
if s[end:end + 1] != ':': | |
end = _w(s, end).end() | |
if s[end:end + 1] != ':': | |
raise ValueError(errmsg("Expecting ':' delimiter", s, end)) | |
end += 1 | |
try: | |
if s[end] in _ws: | |
end += 1 | |
if s[end] in _ws: | |
end = _w(s, end + 1).end() | |
except IndexError: | |
pass | |
try: | |
value, end = scan_once(s, end) | |
except StopIteration: | |
raise ValueError(errmsg("Expecting object", s, end)) | |
pairs_append((key, value)) | |
try: | |
nextchar = s[end] | |
if nextchar in _ws: | |
end = _w(s, end + 1).end() | |
nextchar = s[end] | |
except IndexError: | |
nextchar = '' | |
end += 1 | |
if nextchar == '}': | |
break | |
elif nextchar != ',': | |
raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1)) | |
try: | |
nextchar = s[end] | |
if nextchar in _ws: | |
end += 1 | |
nextchar = s[end] | |
if nextchar in _ws: | |
end = _w(s, end + 1).end() | |
nextchar = s[end] | |
except IndexError: | |
nextchar = '' | |
end += 1 | |
if nextchar != '"': | |
raise ValueError(errmsg( | |
"Expecting property name enclosed in double quotes", s, end - 1)) | |
if object_pairs_hook is not None: | |
result = object_pairs_hook(pairs) | |
return result, end | |
pairs = dict(pairs) | |
if object_hook is not None: | |
pairs = object_hook(pairs) | |
return pairs, end | |
def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): | |
s, end = s_and_end | |
values = [] | |
nextchar = s[end:end + 1] | |
if nextchar in _ws: | |
end = _w(s, end + 1).end() | |
nextchar = s[end:end + 1] | |
# Look-ahead for trivial empty array | |
if nextchar == ']': | |
return values, end + 1 | |
_append = values.append | |
while True: | |
try: | |
value, end = scan_once(s, end) | |
except StopIteration: | |
raise ValueError(errmsg("Expecting object", s, end)) | |
_append(value) | |
nextchar = s[end:end + 1] | |
if nextchar in _ws: | |
end = _w(s, end + 1).end() | |
nextchar = s[end:end + 1] | |
end += 1 | |
if nextchar == ']': | |
break | |
elif nextchar != ',': | |
raise ValueError(errmsg("Expecting ',' delimiter", s, end)) | |
try: | |
if s[end] in _ws: | |
end += 1 | |
if s[end] in _ws: | |
end = _w(s, end + 1).end() | |
except IndexError: | |
pass | |
return values, end | |
class JSONDecoder(object): | |
"""Simple JSON <http://json.org> decoder | |
Performs the following translations in decoding by default: | |
+---------------+-------------------+ | |
| JSON | Python | | |
+===============+===================+ | |
| object | dict | | |
+---------------+-------------------+ | |
| array | list | | |
+---------------+-------------------+ | |
| string | unicode | | |
+---------------+-------------------+ | |
| number (int) | int, long | | |
+---------------+-------------------+ | |
| number (real) | float | | |
+---------------+-------------------+ | |
| true | True | | |
+---------------+-------------------+ | |
| false | False | | |
+---------------+-------------------+ | |
| null | None | | |
+---------------+-------------------+ | |
It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as | |
their corresponding ``float`` values, which is outside the JSON spec. | |
""" | |
def __init__(self, encoding=None, object_hook=None, parse_float=None, | |
parse_int=None, parse_constant=None, strict=True, | |
object_pairs_hook=None): | |
"""``encoding`` determines the encoding used to interpret any ``str`` | |
objects decoded by this instance (utf-8 by default). It has no | |
effect when decoding ``unicode`` objects. | |
Note that currently only encodings that are a superset of ASCII work, | |
strings of other encodings should be passed in as ``unicode``. | |
``object_hook``, if specified, will be called with the result | |
of every JSON object decoded and its return value will be used in | |
place of the given ``dict``. This can be used to provide custom | |
deserializations (e.g. to support JSON-RPC class hinting). | |
``object_pairs_hook``, if specified will be called with the result of | |
every JSON object decoded with an ordered list of pairs. The return | |
value of ``object_pairs_hook`` will be used instead of the ``dict``. | |
This feature can be used to implement custom decoders that rely on the | |
order that the key and value pairs are decoded (for example, | |
collections.OrderedDict will remember the order of insertion). If | |
``object_hook`` is also defined, the ``object_pairs_hook`` takes | |
priority. | |
``parse_float``, if specified, will be called with the string | |
of every JSON float to be decoded. By default this is equivalent to | |
float(num_str). This can be used to use another datatype or parser | |
for JSON floats (e.g. decimal.Decimal). | |
``parse_int``, if specified, will be called with the string | |
of every JSON int to be decoded. By default this is equivalent to | |
int(num_str). This can be used to use another datatype or parser | |
for JSON integers (e.g. float). | |
``parse_constant``, if specified, will be called with one of the | |
following strings: -Infinity, Infinity, NaN. | |
This can be used to raise an exception if invalid JSON numbers | |
are encountered. | |
If ``strict`` is false (true is the default), then control | |
characters will be allowed inside strings. Control characters in | |
this context are those with character codes in the 0-31 range, | |
including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``. | |
""" | |
self.encoding = encoding | |
self.object_hook = object_hook | |
self.object_pairs_hook = object_pairs_hook | |
self.parse_float = parse_float or float | |
self.parse_int = parse_int or int | |
self.parse_constant = parse_constant or _CONSTANTS.__getitem__ | |
self.strict = strict | |
self.parse_object = JSONObject | |
self.parse_array = JSONArray | |
self.parse_string = scanstring | |
self.scan_once = scanner.make_scanner(self) | |
def decode(self, s, _w=WHITESPACE.match): | |
"""Return the Python representation of ``s`` (a ``str`` or ``unicode`` | |
instance containing a JSON document) | |
""" | |
obj, end = self.raw_decode(s, idx=_w(s, 0).end()) | |
end = _w(s, end).end() | |
if end != len(s): | |
raise ValueError(errmsg("Extra data", s, end, len(s))) | |
return obj | |
def raw_decode(self, s, idx=0): | |
"""Decode a JSON document from ``s`` (a ``str`` or ``unicode`` | |
beginning with a JSON document) and return a 2-tuple of the Python | |
representation and the index in ``s`` where the document ended. | |
This can be used to decode a JSON document from a string that may | |
have extraneous data at the end. | |
""" | |
try: | |
obj, end = self.scan_once(s, idx) | |
except StopIteration: | |
raise ValueError("No JSON object could be decoded") | |
return obj, end |