|  | /* | 
|  | * JSON Parser | 
|  | * | 
|  | * Copyright IBM, Corp. 2009 | 
|  | * | 
|  | * Authors: | 
|  | *  Anthony Liguori   <aliguori@us.ibm.com> | 
|  | * | 
|  | * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. | 
|  | * See the COPYING.LIB file in the top-level directory. | 
|  | * | 
|  | */ | 
|  |  | 
|  | #include "qemu/osdep.h" | 
|  | #include "qemu/ctype.h" | 
|  | #include "qemu/cutils.h" | 
|  | #include "qemu/unicode.h" | 
|  | #include "qapi/error.h" | 
|  | #include "qapi/qmp/qbool.h" | 
|  | #include "qapi/qmp/qdict.h" | 
|  | #include "qapi/qmp/qlist.h" | 
|  | #include "qapi/qmp/qnull.h" | 
|  | #include "qapi/qmp/qnum.h" | 
|  | #include "qapi/qmp/qstring.h" | 
|  | #include "json-parser-int.h" | 
|  |  | 
|  | struct JSONToken { | 
|  | JSONTokenType type; | 
|  | int x; | 
|  | int y; | 
|  | char str[]; | 
|  | }; | 
|  |  | 
|  | typedef struct JSONParserContext { | 
|  | Error *err; | 
|  | JSONToken *current; | 
|  | GQueue *buf; | 
|  | va_list *ap; | 
|  | } JSONParserContext; | 
|  |  | 
|  | #define BUG_ON(cond) assert(!(cond)) | 
|  |  | 
|  | /** | 
|  | * TODO | 
|  | * | 
|  | * 0) make errors meaningful again | 
|  | * 1) add geometry information to tokens | 
|  | * 3) should we return a parsed size? | 
|  | * 4) deal with premature EOI | 
|  | */ | 
|  |  | 
|  | static QObject *parse_value(JSONParserContext *ctxt); | 
|  |  | 
|  | /** | 
|  | * Error handler | 
|  | */ | 
|  | static void G_GNUC_PRINTF(3, 4) parse_error(JSONParserContext *ctxt, | 
|  | JSONToken *token, const char *msg, ...) | 
|  | { | 
|  | va_list ap; | 
|  | char message[1024]; | 
|  |  | 
|  | if (ctxt->err) { | 
|  | return; | 
|  | } | 
|  | va_start(ap, msg); | 
|  | vsnprintf(message, sizeof(message), msg, ap); | 
|  | va_end(ap); | 
|  | error_setg(&ctxt->err, "JSON parse error, %s", message); | 
|  | } | 
|  |  | 
|  | static int cvt4hex(const char *s) | 
|  | { | 
|  | int cp, i; | 
|  |  | 
|  | cp = 0; | 
|  | for (i = 0; i < 4; i++) { | 
|  | if (!qemu_isxdigit(s[i])) { | 
|  | return -1; | 
|  | } | 
|  | cp <<= 4; | 
|  | if (s[i] >= '0' && s[i] <= '9') { | 
|  | cp |= s[i] - '0'; | 
|  | } else if (s[i] >= 'a' && s[i] <= 'f') { | 
|  | cp |= 10 + s[i] - 'a'; | 
|  | } else if (s[i] >= 'A' && s[i] <= 'F') { | 
|  | cp |= 10 + s[i] - 'A'; | 
|  | } else { | 
|  | return -1; | 
|  | } | 
|  | } | 
|  | return cp; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * parse_string(): Parse a JSON string | 
|  | * | 
|  | * From RFC 8259 "The JavaScript Object Notation (JSON) Data | 
|  | * Interchange Format": | 
|  | * | 
|  | *    char = unescaped / | 
|  | *        escape ( | 
|  | *            %x22 /          ; "    quotation mark  U+0022 | 
|  | *            %x5C /          ; \    reverse solidus U+005C | 
|  | *            %x2F /          ; /    solidus         U+002F | 
|  | *            %x62 /          ; b    backspace       U+0008 | 
|  | *            %x66 /          ; f    form feed       U+000C | 
|  | *            %x6E /          ; n    line feed       U+000A | 
|  | *            %x72 /          ; r    carriage return U+000D | 
|  | *            %x74 /          ; t    tab             U+0009 | 
|  | *            %x75 4HEXDIG )  ; uXXXX                U+XXXX | 
|  | *    escape = %x5C              ; \ | 
|  | *    quotation-mark = %x22      ; " | 
|  | *    unescaped = %x20-21 / %x23-5B / %x5D-10FFFF | 
|  | * | 
|  | * Extensions over RFC 8259: | 
|  | * - Extra escape sequence in strings: | 
|  | *   0x27 (apostrophe) is recognized after escape, too | 
|  | * - Single-quoted strings: | 
|  | *   Like double-quoted strings, except they're delimited by %x27 | 
|  | *   (apostrophe) instead of %x22 (quotation mark), and can't contain | 
|  | *   unescaped apostrophe, but can contain unescaped quotation mark. | 
|  | * | 
|  | * Note: | 
|  | * - Encoding is modified UTF-8. | 
|  | * - Invalid Unicode characters are rejected. | 
|  | * - Control characters \x00..\x1F are rejected by the lexer. | 
|  | */ | 
|  | static QString *parse_string(JSONParserContext *ctxt, JSONToken *token) | 
|  | { | 
|  | const char *ptr = token->str; | 
|  | GString *str; | 
|  | char quote; | 
|  | const char *beg; | 
|  | int cp, trailing; | 
|  | char *end; | 
|  | ssize_t len; | 
|  | char utf8_buf[5]; | 
|  |  | 
|  | assert(*ptr == '"' || *ptr == '\''); | 
|  | quote = *ptr++; | 
|  | str = g_string_new(NULL); | 
|  |  | 
|  | while (*ptr != quote) { | 
|  | assert(*ptr); | 
|  | switch (*ptr) { | 
|  | case '\\': | 
|  | beg = ptr++; | 
|  | switch (*ptr++) { | 
|  | case '"': | 
|  | g_string_append_c(str, '"'); | 
|  | break; | 
|  | case '\'': | 
|  | g_string_append_c(str, '\''); | 
|  | break; | 
|  | case '\\': | 
|  | g_string_append_c(str, '\\'); | 
|  | break; | 
|  | case '/': | 
|  | g_string_append_c(str, '/'); | 
|  | break; | 
|  | case 'b': | 
|  | g_string_append_c(str, '\b'); | 
|  | break; | 
|  | case 'f': | 
|  | g_string_append_c(str, '\f'); | 
|  | break; | 
|  | case 'n': | 
|  | g_string_append_c(str, '\n'); | 
|  | break; | 
|  | case 'r': | 
|  | g_string_append_c(str, '\r'); | 
|  | break; | 
|  | case 't': | 
|  | g_string_append_c(str, '\t'); | 
|  | break; | 
|  | case 'u': | 
|  | cp = cvt4hex(ptr); | 
|  | ptr += 4; | 
|  |  | 
|  | /* handle surrogate pairs */ | 
|  | if (cp >= 0xD800 && cp <= 0xDBFF | 
|  | && ptr[0] == '\\' && ptr[1] == 'u') { | 
|  | /* leading surrogate followed by \u */ | 
|  | cp = 0x10000 + ((cp & 0x3FF) << 10); | 
|  | trailing = cvt4hex(ptr + 2); | 
|  | if (trailing >= 0xDC00 && trailing <= 0xDFFF) { | 
|  | /* followed by trailing surrogate */ | 
|  | cp |= trailing & 0x3FF; | 
|  | ptr += 6; | 
|  | } else { | 
|  | cp = -1; /* invalid */ | 
|  | } | 
|  | } | 
|  |  | 
|  | if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) { | 
|  | parse_error(ctxt, token, | 
|  | "%.*s is not a valid Unicode character", | 
|  | (int)(ptr - beg), beg); | 
|  | goto out; | 
|  | } | 
|  | g_string_append(str, utf8_buf); | 
|  | break; | 
|  | default: | 
|  | parse_error(ctxt, token, "invalid escape sequence in string"); | 
|  | goto out; | 
|  | } | 
|  | break; | 
|  | case '%': | 
|  | if (ctxt->ap) { | 
|  | if (ptr[1] != '%') { | 
|  | parse_error(ctxt, token, "can't interpolate into string"); | 
|  | goto out; | 
|  | } | 
|  | ptr++; | 
|  | } | 
|  | /* fall through */ | 
|  | default: | 
|  | cp = mod_utf8_codepoint(ptr, 6, &end); | 
|  | if (cp < 0) { | 
|  | parse_error(ctxt, token, "invalid UTF-8 sequence in string"); | 
|  | goto out; | 
|  | } | 
|  | ptr = end; | 
|  | len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp); | 
|  | assert(len >= 0); | 
|  | g_string_append(str, utf8_buf); | 
|  | } | 
|  | } | 
|  |  | 
|  | return qstring_from_gstring(str); | 
|  |  | 
|  | out: | 
|  | g_string_free(str, true); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | /* Note: the token object returned by parser_context_peek_token or | 
|  | * parser_context_pop_token is deleted as soon as parser_context_pop_token | 
|  | * is called again. | 
|  | */ | 
|  | static JSONToken *parser_context_pop_token(JSONParserContext *ctxt) | 
|  | { | 
|  | g_free(ctxt->current); | 
|  | ctxt->current = g_queue_pop_head(ctxt->buf); | 
|  | return ctxt->current; | 
|  | } | 
|  |  | 
|  | static JSONToken *parser_context_peek_token(JSONParserContext *ctxt) | 
|  | { | 
|  | return g_queue_peek_head(ctxt->buf); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Parsing rules | 
|  | */ | 
|  | static int parse_pair(JSONParserContext *ctxt, QDict *dict) | 
|  | { | 
|  | QObject *key_obj = NULL; | 
|  | QString *key; | 
|  | QObject *value; | 
|  | JSONToken *peek, *token; | 
|  |  | 
|  | peek = parser_context_peek_token(ctxt); | 
|  | if (peek == NULL) { | 
|  | parse_error(ctxt, NULL, "premature EOI"); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | key_obj = parse_value(ctxt); | 
|  | key = qobject_to(QString, key_obj); | 
|  | if (!key) { | 
|  | parse_error(ctxt, peek, "key is not a string in object"); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | token = parser_context_pop_token(ctxt); | 
|  | if (token == NULL) { | 
|  | parse_error(ctxt, NULL, "premature EOI"); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | if (token->type != JSON_COLON) { | 
|  | parse_error(ctxt, token, "missing : in object pair"); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | value = parse_value(ctxt); | 
|  | if (value == NULL) { | 
|  | parse_error(ctxt, token, "Missing value in dict"); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | if (qdict_haskey(dict, qstring_get_str(key))) { | 
|  | parse_error(ctxt, token, "duplicate key"); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | qdict_put_obj(dict, qstring_get_str(key), value); | 
|  |  | 
|  | qobject_unref(key_obj); | 
|  | return 0; | 
|  |  | 
|  | out: | 
|  | qobject_unref(key_obj); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | static QObject *parse_object(JSONParserContext *ctxt) | 
|  | { | 
|  | QDict *dict = NULL; | 
|  | JSONToken *token, *peek; | 
|  |  | 
|  | token = parser_context_pop_token(ctxt); | 
|  | assert(token && token->type == JSON_LCURLY); | 
|  |  | 
|  | dict = qdict_new(); | 
|  |  | 
|  | peek = parser_context_peek_token(ctxt); | 
|  | if (peek == NULL) { | 
|  | parse_error(ctxt, NULL, "premature EOI"); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | if (peek->type != JSON_RCURLY) { | 
|  | if (parse_pair(ctxt, dict) == -1) { | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | token = parser_context_pop_token(ctxt); | 
|  | if (token == NULL) { | 
|  | parse_error(ctxt, NULL, "premature EOI"); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | while (token->type != JSON_RCURLY) { | 
|  | if (token->type != JSON_COMMA) { | 
|  | parse_error(ctxt, token, "expected separator in dict"); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | if (parse_pair(ctxt, dict) == -1) { | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | token = parser_context_pop_token(ctxt); | 
|  | if (token == NULL) { | 
|  | parse_error(ctxt, NULL, "premature EOI"); | 
|  | goto out; | 
|  | } | 
|  | } | 
|  | } else { | 
|  | (void)parser_context_pop_token(ctxt); | 
|  | } | 
|  |  | 
|  | return QOBJECT(dict); | 
|  |  | 
|  | out: | 
|  | qobject_unref(dict); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | static QObject *parse_array(JSONParserContext *ctxt) | 
|  | { | 
|  | QList *list = NULL; | 
|  | JSONToken *token, *peek; | 
|  |  | 
|  | token = parser_context_pop_token(ctxt); | 
|  | assert(token && token->type == JSON_LSQUARE); | 
|  |  | 
|  | list = qlist_new(); | 
|  |  | 
|  | peek = parser_context_peek_token(ctxt); | 
|  | if (peek == NULL) { | 
|  | parse_error(ctxt, NULL, "premature EOI"); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | if (peek->type != JSON_RSQUARE) { | 
|  | QObject *obj; | 
|  |  | 
|  | obj = parse_value(ctxt); | 
|  | if (obj == NULL) { | 
|  | parse_error(ctxt, token, "expecting value"); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | qlist_append_obj(list, obj); | 
|  |  | 
|  | token = parser_context_pop_token(ctxt); | 
|  | if (token == NULL) { | 
|  | parse_error(ctxt, NULL, "premature EOI"); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | while (token->type != JSON_RSQUARE) { | 
|  | if (token->type != JSON_COMMA) { | 
|  | parse_error(ctxt, token, "expected separator in list"); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | obj = parse_value(ctxt); | 
|  | if (obj == NULL) { | 
|  | parse_error(ctxt, token, "expecting value"); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | qlist_append_obj(list, obj); | 
|  |  | 
|  | token = parser_context_pop_token(ctxt); | 
|  | if (token == NULL) { | 
|  | parse_error(ctxt, NULL, "premature EOI"); | 
|  | goto out; | 
|  | } | 
|  | } | 
|  | } else { | 
|  | (void)parser_context_pop_token(ctxt); | 
|  | } | 
|  |  | 
|  | return QOBJECT(list); | 
|  |  | 
|  | out: | 
|  | qobject_unref(list); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | static QObject *parse_keyword(JSONParserContext *ctxt) | 
|  | { | 
|  | JSONToken *token; | 
|  |  | 
|  | token = parser_context_pop_token(ctxt); | 
|  | assert(token && token->type == JSON_KEYWORD); | 
|  |  | 
|  | if (!strcmp(token->str, "true")) { | 
|  | return QOBJECT(qbool_from_bool(true)); | 
|  | } else if (!strcmp(token->str, "false")) { | 
|  | return QOBJECT(qbool_from_bool(false)); | 
|  | } else if (!strcmp(token->str, "null")) { | 
|  | return QOBJECT(qnull()); | 
|  | } | 
|  | parse_error(ctxt, token, "invalid keyword '%s'", token->str); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | static QObject *parse_interpolation(JSONParserContext *ctxt) | 
|  | { | 
|  | JSONToken *token; | 
|  |  | 
|  | token = parser_context_pop_token(ctxt); | 
|  | assert(token && token->type == JSON_INTERP); | 
|  |  | 
|  | if (!strcmp(token->str, "%p")) { | 
|  | return va_arg(*ctxt->ap, QObject *); | 
|  | } else if (!strcmp(token->str, "%i")) { | 
|  | return QOBJECT(qbool_from_bool(va_arg(*ctxt->ap, int))); | 
|  | } else if (!strcmp(token->str, "%d")) { | 
|  | return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int))); | 
|  | } else if (!strcmp(token->str, "%ld")) { | 
|  | return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long))); | 
|  | } else if (!strcmp(token->str, "%lld")) { | 
|  | return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long long))); | 
|  | } else if (!strcmp(token->str, "%" PRId64)) { | 
|  | return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int64_t))); | 
|  | } else if (!strcmp(token->str, "%u")) { | 
|  | return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned int))); | 
|  | } else if (!strcmp(token->str, "%lu")) { | 
|  | return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long))); | 
|  | } else if (!strcmp(token->str, "%llu")) { | 
|  | return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long long))); | 
|  | } else if (!strcmp(token->str, "%" PRIu64)) { | 
|  | return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, uint64_t))); | 
|  | } else if (!strcmp(token->str, "%s")) { | 
|  | return QOBJECT(qstring_from_str(va_arg(*ctxt->ap, const char *))); | 
|  | } else if (!strcmp(token->str, "%f")) { | 
|  | return QOBJECT(qnum_from_double(va_arg(*ctxt->ap, double))); | 
|  | } | 
|  | parse_error(ctxt, token, "invalid interpolation '%s'", token->str); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | static QObject *parse_literal(JSONParserContext *ctxt) | 
|  | { | 
|  | JSONToken *token; | 
|  |  | 
|  | token = parser_context_pop_token(ctxt); | 
|  | assert(token); | 
|  |  | 
|  | switch (token->type) { | 
|  | case JSON_STRING: | 
|  | return QOBJECT(parse_string(ctxt, token)); | 
|  | case JSON_INTEGER: { | 
|  | /* | 
|  | * Represent JSON_INTEGER as QNUM_I64 if possible, else as | 
|  | * QNUM_U64, else as QNUM_DOUBLE.  Note that qemu_strtoi64() | 
|  | * and qemu_strtou64() fail with ERANGE when it's not | 
|  | * possible. | 
|  | * | 
|  | * qnum_get_int() will then work for any signed 64-bit | 
|  | * JSON_INTEGER, qnum_get_uint() for any unsigned 64-bit | 
|  | * integer, and qnum_get_double() both for any JSON_INTEGER | 
|  | * and any JSON_FLOAT (with precision loss for integers beyond | 
|  | * 53 bits) | 
|  | */ | 
|  | int ret; | 
|  | int64_t value; | 
|  | uint64_t uvalue; | 
|  |  | 
|  | ret = qemu_strtoi64(token->str, NULL, 10, &value); | 
|  | if (!ret) { | 
|  | return QOBJECT(qnum_from_int(value)); | 
|  | } | 
|  | assert(ret == -ERANGE); | 
|  |  | 
|  | if (token->str[0] != '-') { | 
|  | ret = qemu_strtou64(token->str, NULL, 10, &uvalue); | 
|  | if (!ret) { | 
|  | return QOBJECT(qnum_from_uint(uvalue)); | 
|  | } | 
|  | assert(ret == -ERANGE); | 
|  | } | 
|  | } | 
|  | /* fall through to JSON_FLOAT */ | 
|  | case JSON_FLOAT: | 
|  | /* FIXME dependent on locale; a pervasive issue in QEMU */ | 
|  | /* FIXME our lexer matches RFC 8259 in forbidding Inf or NaN, | 
|  | * but those might be useful extensions beyond JSON */ | 
|  | return QOBJECT(qnum_from_double(strtod(token->str, NULL))); | 
|  | default: | 
|  | abort(); | 
|  | } | 
|  | } | 
|  |  | 
|  | static QObject *parse_value(JSONParserContext *ctxt) | 
|  | { | 
|  | JSONToken *token; | 
|  |  | 
|  | token = parser_context_peek_token(ctxt); | 
|  | if (token == NULL) { | 
|  | parse_error(ctxt, NULL, "premature EOI"); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | switch (token->type) { | 
|  | case JSON_LCURLY: | 
|  | return parse_object(ctxt); | 
|  | case JSON_LSQUARE: | 
|  | return parse_array(ctxt); | 
|  | case JSON_INTERP: | 
|  | return parse_interpolation(ctxt); | 
|  | case JSON_INTEGER: | 
|  | case JSON_FLOAT: | 
|  | case JSON_STRING: | 
|  | return parse_literal(ctxt); | 
|  | case JSON_KEYWORD: | 
|  | return parse_keyword(ctxt); | 
|  | default: | 
|  | parse_error(ctxt, token, "expecting value"); | 
|  | return NULL; | 
|  | } | 
|  | } | 
|  |  | 
|  | JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr) | 
|  | { | 
|  | JSONToken *token = g_malloc(sizeof(JSONToken) + tokstr->len + 1); | 
|  |  | 
|  | token->type = type; | 
|  | memcpy(token->str, tokstr->str, tokstr->len); | 
|  | token->str[tokstr->len] = 0; | 
|  | token->x = x; | 
|  | token->y = y; | 
|  | return token; | 
|  | } | 
|  |  | 
|  | QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp) | 
|  | { | 
|  | JSONParserContext ctxt = { .buf = tokens, .ap = ap }; | 
|  | QObject *result; | 
|  |  | 
|  | result = parse_value(&ctxt); | 
|  | assert(ctxt.err || g_queue_is_empty(ctxt.buf)); | 
|  |  | 
|  | error_propagate(errp, ctxt.err); | 
|  |  | 
|  | while (!g_queue_is_empty(ctxt.buf)) { | 
|  | parser_context_pop_token(&ctxt); | 
|  | } | 
|  | g_free(ctxt.current); | 
|  |  | 
|  | return result; | 
|  | } |