json-parser.c (16327B)
1/* 2 * JSON Parser 3 * 4 * Copyright IBM, Corp. 2009 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. 10 * See the COPYING.LIB file in the top-level directory. 11 * 12 */ 13 14#include "qemu/osdep.h" 15#include "qemu/ctype.h" 16#include "qemu/cutils.h" 17#include "qemu/unicode.h" 18#include "qapi/error.h" 19#include "qapi/qmp/qbool.h" 20#include "qapi/qmp/qdict.h" 21#include "qapi/qmp/qlist.h" 22#include "qapi/qmp/qnull.h" 23#include "qapi/qmp/qnum.h" 24#include "qapi/qmp/qstring.h" 25#include "json-parser-int.h" 26 27struct JSONToken { 28 JSONTokenType type; 29 int x; 30 int y; 31 char str[]; 32}; 33 34typedef struct JSONParserContext { 35 Error *err; 36 JSONToken *current; 37 GQueue *buf; 38 va_list *ap; 39} JSONParserContext; 40 41#define BUG_ON(cond) assert(!(cond)) 42 43/** 44 * TODO 45 * 46 * 0) make errors meaningful again 47 * 1) add geometry information to tokens 48 * 3) should we return a parsed size? 49 * 4) deal with premature EOI 50 */ 51 52static QObject *parse_value(JSONParserContext *ctxt); 53 54/** 55 * Error handler 56 */ 57static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt, 58 JSONToken *token, const char *msg, ...) 59{ 60 va_list ap; 61 char message[1024]; 62 63 if (ctxt->err) { 64 return; 65 } 66 va_start(ap, msg); 67 vsnprintf(message, sizeof(message), msg, ap); 68 va_end(ap); 69 error_setg(&ctxt->err, "JSON parse error, %s", message); 70} 71 72static int cvt4hex(const char *s) 73{ 74 int cp, i; 75 76 cp = 0; 77 for (i = 0; i < 4; i++) { 78 if (!qemu_isxdigit(s[i])) { 79 return -1; 80 } 81 cp <<= 4; 82 if (s[i] >= '0' && s[i] <= '9') { 83 cp |= s[i] - '0'; 84 } else if (s[i] >= 'a' && s[i] <= 'f') { 85 cp |= 10 + s[i] - 'a'; 86 } else if (s[i] >= 'A' && s[i] <= 'F') { 87 cp |= 10 + s[i] - 'A'; 88 } else { 89 return -1; 90 } 91 } 92 return cp; 93} 94 95/** 96 * parse_string(): Parse a JSON string 97 * 98 * From RFC 8259 "The JavaScript Object Notation (JSON) Data 99 * Interchange Format": 100 * 101 * char = unescaped / 102 * escape ( 103 * %x22 / ; " quotation mark U+0022 104 * %x5C / ; \ reverse solidus U+005C 105 * %x2F / ; / solidus U+002F 106 * %x62 / ; b backspace U+0008 107 * %x66 / ; f form feed U+000C 108 * %x6E / ; n line feed U+000A 109 * %x72 / ; r carriage return U+000D 110 * %x74 / ; t tab U+0009 111 * %x75 4HEXDIG ) ; uXXXX U+XXXX 112 * escape = %x5C ; \ 113 * quotation-mark = %x22 ; " 114 * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF 115 * 116 * Extensions over RFC 8259: 117 * - Extra escape sequence in strings: 118 * 0x27 (apostrophe) is recognized after escape, too 119 * - Single-quoted strings: 120 * Like double-quoted strings, except they're delimited by %x27 121 * (apostrophe) instead of %x22 (quotation mark), and can't contain 122 * unescaped apostrophe, but can contain unescaped quotation mark. 123 * 124 * Note: 125 * - Encoding is modified UTF-8. 126 * - Invalid Unicode characters are rejected. 127 * - Control characters \x00..\x1F are rejected by the lexer. 128 */ 129static QString *parse_string(JSONParserContext *ctxt, JSONToken *token) 130{ 131 const char *ptr = token->str; 132 GString *str; 133 char quote; 134 const char *beg; 135 int cp, trailing; 136 char *end; 137 ssize_t len; 138 char utf8_buf[5]; 139 140 assert(*ptr == '"' || *ptr == '\''); 141 quote = *ptr++; 142 str = g_string_new(NULL); 143 144 while (*ptr != quote) { 145 assert(*ptr); 146 switch (*ptr) { 147 case '\\': 148 beg = ptr++; 149 switch (*ptr++) { 150 case '"': 151 g_string_append_c(str, '"'); 152 break; 153 case '\'': 154 g_string_append_c(str, '\''); 155 break; 156 case '\\': 157 g_string_append_c(str, '\\'); 158 break; 159 case '/': 160 g_string_append_c(str, '/'); 161 break; 162 case 'b': 163 g_string_append_c(str, '\b'); 164 break; 165 case 'f': 166 g_string_append_c(str, '\f'); 167 break; 168 case 'n': 169 g_string_append_c(str, '\n'); 170 break; 171 case 'r': 172 g_string_append_c(str, '\r'); 173 break; 174 case 't': 175 g_string_append_c(str, '\t'); 176 break; 177 case 'u': 178 cp = cvt4hex(ptr); 179 ptr += 4; 180 181 /* handle surrogate pairs */ 182 if (cp >= 0xD800 && cp <= 0xDBFF 183 && ptr[0] == '\\' && ptr[1] == 'u') { 184 /* leading surrogate followed by \u */ 185 cp = 0x10000 + ((cp & 0x3FF) << 10); 186 trailing = cvt4hex(ptr + 2); 187 if (trailing >= 0xDC00 && trailing <= 0xDFFF) { 188 /* followed by trailing surrogate */ 189 cp |= trailing & 0x3FF; 190 ptr += 6; 191 } else { 192 cp = -1; /* invalid */ 193 } 194 } 195 196 if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) { 197 parse_error(ctxt, token, 198 "%.*s is not a valid Unicode character", 199 (int)(ptr - beg), beg); 200 goto out; 201 } 202 g_string_append(str, utf8_buf); 203 break; 204 default: 205 parse_error(ctxt, token, "invalid escape sequence in string"); 206 goto out; 207 } 208 break; 209 case '%': 210 if (ctxt->ap) { 211 if (ptr[1] != '%') { 212 parse_error(ctxt, token, "can't interpolate into string"); 213 goto out; 214 } 215 ptr++; 216 } 217 /* fall through */ 218 default: 219 cp = mod_utf8_codepoint(ptr, 6, &end); 220 if (cp < 0) { 221 parse_error(ctxt, token, "invalid UTF-8 sequence in string"); 222 goto out; 223 } 224 ptr = end; 225 len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp); 226 assert(len >= 0); 227 g_string_append(str, utf8_buf); 228 } 229 } 230 231 return qstring_from_gstring(str); 232 233out: 234 g_string_free(str, true); 235 return NULL; 236} 237 238/* Note: the token object returned by parser_context_peek_token or 239 * parser_context_pop_token is deleted as soon as parser_context_pop_token 240 * is called again. 241 */ 242static JSONToken *parser_context_pop_token(JSONParserContext *ctxt) 243{ 244 g_free(ctxt->current); 245 ctxt->current = g_queue_pop_head(ctxt->buf); 246 return ctxt->current; 247} 248 249static JSONToken *parser_context_peek_token(JSONParserContext *ctxt) 250{ 251 return g_queue_peek_head(ctxt->buf); 252} 253 254/** 255 * Parsing rules 256 */ 257static int parse_pair(JSONParserContext *ctxt, QDict *dict) 258{ 259 QObject *key_obj = NULL; 260 QString *key; 261 QObject *value; 262 JSONToken *peek, *token; 263 264 peek = parser_context_peek_token(ctxt); 265 if (peek == NULL) { 266 parse_error(ctxt, NULL, "premature EOI"); 267 goto out; 268 } 269 270 key_obj = parse_value(ctxt); 271 key = qobject_to(QString, key_obj); 272 if (!key) { 273 parse_error(ctxt, peek, "key is not a string in object"); 274 goto out; 275 } 276 277 token = parser_context_pop_token(ctxt); 278 if (token == NULL) { 279 parse_error(ctxt, NULL, "premature EOI"); 280 goto out; 281 } 282 283 if (token->type != JSON_COLON) { 284 parse_error(ctxt, token, "missing : in object pair"); 285 goto out; 286 } 287 288 value = parse_value(ctxt); 289 if (value == NULL) { 290 parse_error(ctxt, token, "Missing value in dict"); 291 goto out; 292 } 293 294 if (qdict_haskey(dict, qstring_get_str(key))) { 295 parse_error(ctxt, token, "duplicate key"); 296 goto out; 297 } 298 299 qdict_put_obj(dict, qstring_get_str(key), value); 300 301 qobject_unref(key_obj); 302 return 0; 303 304out: 305 qobject_unref(key_obj); 306 return -1; 307} 308 309static QObject *parse_object(JSONParserContext *ctxt) 310{ 311 QDict *dict = NULL; 312 JSONToken *token, *peek; 313 314 token = parser_context_pop_token(ctxt); 315 assert(token && token->type == JSON_LCURLY); 316 317 dict = qdict_new(); 318 319 peek = parser_context_peek_token(ctxt); 320 if (peek == NULL) { 321 parse_error(ctxt, NULL, "premature EOI"); 322 goto out; 323 } 324 325 if (peek->type != JSON_RCURLY) { 326 if (parse_pair(ctxt, dict) == -1) { 327 goto out; 328 } 329 330 token = parser_context_pop_token(ctxt); 331 if (token == NULL) { 332 parse_error(ctxt, NULL, "premature EOI"); 333 goto out; 334 } 335 336 while (token->type != JSON_RCURLY) { 337 if (token->type != JSON_COMMA) { 338 parse_error(ctxt, token, "expected separator in dict"); 339 goto out; 340 } 341 342 if (parse_pair(ctxt, dict) == -1) { 343 goto out; 344 } 345 346 token = parser_context_pop_token(ctxt); 347 if (token == NULL) { 348 parse_error(ctxt, NULL, "premature EOI"); 349 goto out; 350 } 351 } 352 } else { 353 (void)parser_context_pop_token(ctxt); 354 } 355 356 return QOBJECT(dict); 357 358out: 359 qobject_unref(dict); 360 return NULL; 361} 362 363static QObject *parse_array(JSONParserContext *ctxt) 364{ 365 QList *list = NULL; 366 JSONToken *token, *peek; 367 368 token = parser_context_pop_token(ctxt); 369 assert(token && token->type == JSON_LSQUARE); 370 371 list = qlist_new(); 372 373 peek = parser_context_peek_token(ctxt); 374 if (peek == NULL) { 375 parse_error(ctxt, NULL, "premature EOI"); 376 goto out; 377 } 378 379 if (peek->type != JSON_RSQUARE) { 380 QObject *obj; 381 382 obj = parse_value(ctxt); 383 if (obj == NULL) { 384 parse_error(ctxt, token, "expecting value"); 385 goto out; 386 } 387 388 qlist_append_obj(list, obj); 389 390 token = parser_context_pop_token(ctxt); 391 if (token == NULL) { 392 parse_error(ctxt, NULL, "premature EOI"); 393 goto out; 394 } 395 396 while (token->type != JSON_RSQUARE) { 397 if (token->type != JSON_COMMA) { 398 parse_error(ctxt, token, "expected separator in list"); 399 goto out; 400 } 401 402 obj = parse_value(ctxt); 403 if (obj == NULL) { 404 parse_error(ctxt, token, "expecting value"); 405 goto out; 406 } 407 408 qlist_append_obj(list, obj); 409 410 token = parser_context_pop_token(ctxt); 411 if (token == NULL) { 412 parse_error(ctxt, NULL, "premature EOI"); 413 goto out; 414 } 415 } 416 } else { 417 (void)parser_context_pop_token(ctxt); 418 } 419 420 return QOBJECT(list); 421 422out: 423 qobject_unref(list); 424 return NULL; 425} 426 427static QObject *parse_keyword(JSONParserContext *ctxt) 428{ 429 JSONToken *token; 430 431 token = parser_context_pop_token(ctxt); 432 assert(token && token->type == JSON_KEYWORD); 433 434 if (!strcmp(token->str, "true")) { 435 return QOBJECT(qbool_from_bool(true)); 436 } else if (!strcmp(token->str, "false")) { 437 return QOBJECT(qbool_from_bool(false)); 438 } else if (!strcmp(token->str, "null")) { 439 return QOBJECT(qnull()); 440 } 441 parse_error(ctxt, token, "invalid keyword '%s'", token->str); 442 return NULL; 443} 444 445static QObject *parse_interpolation(JSONParserContext *ctxt) 446{ 447 JSONToken *token; 448 449 token = parser_context_pop_token(ctxt); 450 assert(token && token->type == JSON_INTERP); 451 452 if (!strcmp(token->str, "%p")) { 453 return va_arg(*ctxt->ap, QObject *); 454 } else if (!strcmp(token->str, "%i")) { 455 return QOBJECT(qbool_from_bool(va_arg(*ctxt->ap, int))); 456 } else if (!strcmp(token->str, "%d")) { 457 return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int))); 458 } else if (!strcmp(token->str, "%ld")) { 459 return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long))); 460 } else if (!strcmp(token->str, "%lld")) { 461 return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long long))); 462 } else if (!strcmp(token->str, "%" PRId64)) { 463 return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int64_t))); 464 } else if (!strcmp(token->str, "%u")) { 465 return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned int))); 466 } else if (!strcmp(token->str, "%lu")) { 467 return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long))); 468 } else if (!strcmp(token->str, "%llu")) { 469 return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long long))); 470 } else if (!strcmp(token->str, "%" PRIu64)) { 471 return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, uint64_t))); 472 } else if (!strcmp(token->str, "%s")) { 473 return QOBJECT(qstring_from_str(va_arg(*ctxt->ap, const char *))); 474 } else if (!strcmp(token->str, "%f")) { 475 return QOBJECT(qnum_from_double(va_arg(*ctxt->ap, double))); 476 } 477 parse_error(ctxt, token, "invalid interpolation '%s'", token->str); 478 return NULL; 479} 480 481static QObject *parse_literal(JSONParserContext *ctxt) 482{ 483 JSONToken *token; 484 485 token = parser_context_pop_token(ctxt); 486 assert(token); 487 488 switch (token->type) { 489 case JSON_STRING: 490 return QOBJECT(parse_string(ctxt, token)); 491 case JSON_INTEGER: { 492 /* 493 * Represent JSON_INTEGER as QNUM_I64 if possible, else as 494 * QNUM_U64, else as QNUM_DOUBLE. Note that qemu_strtoi64() 495 * and qemu_strtou64() fail with ERANGE when it's not 496 * possible. 497 * 498 * qnum_get_int() will then work for any signed 64-bit 499 * JSON_INTEGER, qnum_get_uint() for any unsigned 64-bit 500 * integer, and qnum_get_double() both for any JSON_INTEGER 501 * and any JSON_FLOAT (with precision loss for integers beyond 502 * 53 bits) 503 */ 504 int ret; 505 int64_t value; 506 uint64_t uvalue; 507 508 ret = qemu_strtoi64(token->str, NULL, 10, &value); 509 if (!ret) { 510 return QOBJECT(qnum_from_int(value)); 511 } 512 assert(ret == -ERANGE); 513 514 if (token->str[0] != '-') { 515 ret = qemu_strtou64(token->str, NULL, 10, &uvalue); 516 if (!ret) { 517 return QOBJECT(qnum_from_uint(uvalue)); 518 } 519 assert(ret == -ERANGE); 520 } 521 } 522 /* fall through to JSON_FLOAT */ 523 case JSON_FLOAT: 524 /* FIXME dependent on locale; a pervasive issue in QEMU */ 525 /* FIXME our lexer matches RFC 8259 in forbidding Inf or NaN, 526 * but those might be useful extensions beyond JSON */ 527 return QOBJECT(qnum_from_double(strtod(token->str, NULL))); 528 default: 529 abort(); 530 } 531} 532 533static QObject *parse_value(JSONParserContext *ctxt) 534{ 535 JSONToken *token; 536 537 token = parser_context_peek_token(ctxt); 538 if (token == NULL) { 539 parse_error(ctxt, NULL, "premature EOI"); 540 return NULL; 541 } 542 543 switch (token->type) { 544 case JSON_LCURLY: 545 return parse_object(ctxt); 546 case JSON_LSQUARE: 547 return parse_array(ctxt); 548 case JSON_INTERP: 549 return parse_interpolation(ctxt); 550 case JSON_INTEGER: 551 case JSON_FLOAT: 552 case JSON_STRING: 553 return parse_literal(ctxt); 554 case JSON_KEYWORD: 555 return parse_keyword(ctxt); 556 default: 557 parse_error(ctxt, token, "expecting value"); 558 return NULL; 559 } 560} 561 562JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr) 563{ 564 JSONToken *token = g_malloc(sizeof(JSONToken) + tokstr->len + 1); 565 566 token->type = type; 567 memcpy(token->str, tokstr->str, tokstr->len); 568 token->str[tokstr->len] = 0; 569 token->x = x; 570 token->y = y; 571 return token; 572} 573 574QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp) 575{ 576 JSONParserContext ctxt = { .buf = tokens, .ap = ap }; 577 QObject *result; 578 579 result = parse_value(&ctxt); 580 assert(ctxt.err || g_queue_is_empty(ctxt.buf)); 581 582 error_propagate(errp, ctxt.err); 583 584 while (!g_queue_is_empty(ctxt.buf)) { 585 parser_context_pop_token(&ctxt); 586 } 587 g_free(ctxt.current); 588 589 return result; 590}