flatcc_json_parser.h (36496B)
1 #ifndef FLATCC_JSON_PARSE_H 2 #define FLATCC_JSON_PARSE_H 3 4 #ifdef __cplusplus 5 extern "C" { 6 #endif 7 8 /* 9 * JSON RFC: 10 * http://www.ietf.org/rfc/rfc4627.txt?number=4627 11 * 12 * With several flatbuffers specific extensions. 13 */ 14 15 #include <stdlib.h> 16 #include <string.h> 17 18 #include "flatcc_rtconfig.h" 19 #include "flatcc_builder.h" 20 #include "flatcc_unaligned.h" 21 22 #define PDIAGNOSTIC_IGNORE_UNUSED 23 #include "portable/pdiagnostic_push.h" 24 25 typedef uint32_t flatcc_json_parser_flags_t; 26 static const flatcc_json_parser_flags_t flatcc_json_parser_f_skip_unknown = 1; 27 static const flatcc_json_parser_flags_t flatcc_json_parser_f_force_add = 2; 28 static const flatcc_json_parser_flags_t flatcc_json_parser_f_with_size = 4; 29 static const flatcc_json_parser_flags_t flatcc_json_parser_f_skip_array_overflow = 8; 30 static const flatcc_json_parser_flags_t flatcc_json_parser_f_reject_array_underflow = 16; 31 32 #define FLATCC_JSON_PARSE_ERROR_MAP(XX) \ 33 XX(ok, "ok") \ 34 XX(eof, "eof") \ 35 XX(deep_nesting, "deep nesting") \ 36 XX(trailing_comma, "trailing comma") \ 37 XX(expected_colon, "expected colon") \ 38 XX(unexpected_character, "unexpected character") \ 39 XX(invalid_numeric, "invalid numeric") \ 40 XX(overflow, "overflow") \ 41 XX(underflow, "underflow") \ 42 XX(unbalanced_array, "unbalanced array") \ 43 XX(unbalanced_object, "unbalanced object") \ 44 XX(precision_loss, "precision loss") \ 45 XX(float_unexpected, "float unexpected") \ 46 XX(unknown_symbol, "unknown symbol") \ 47 XX(unquoted_symbolic_list, "unquoted list of symbols") \ 48 XX(unknown_union, "unknown union type") \ 49 XX(expected_string, "expected string") \ 50 XX(invalid_character, "invalid character") \ 51 XX(invalid_escape, "invalid escape") \ 52 XX(invalid_type, "invalid type") \ 53 XX(unterminated_string, "unterminated string") \ 54 XX(expected_object, "expected object") \ 55 XX(expected_array, "expected array") \ 56 XX(expected_scalar, "expected literal or symbolic scalar") \ 57 XX(expected_union_type, "expected union type") \ 58 XX(union_none_present, "union present with type NONE") \ 59 XX(union_none_not_null, "union of type NONE is not null") \ 60 XX(union_incomplete, "table has incomplete union") \ 61 XX(duplicate, "table has duplicate field") \ 62 XX(required, "required field missing") \ 63 XX(union_vector_length, "union vector length mismatch") \ 64 XX(base64, "invalid base64 content") \ 65 XX(base64url, "invalid base64url content") \ 66 XX(array_underflow, "fixed length array underflow") \ 67 XX(array_overflow, "fixed length array overflow") \ 68 XX(runtime, "runtime error") \ 69 XX(not_supported, "not supported") 70 71 enum flatcc_json_parser_error_no { 72 #define XX(no, str) flatcc_json_parser_error_##no, 73 FLATCC_JSON_PARSE_ERROR_MAP(XX) 74 #undef XX 75 }; 76 77 const char *flatcc_json_parser_error_string(int err); 78 79 #define flatcc_json_parser_ok flatcc_json_parser_error_ok 80 #define flatcc_json_parser_eof flatcc_json_parser_error_eof 81 82 /* 83 * The struct may be zero initialized in which case the line count will 84 * start at line zero, or the line may be set to 1 initially. The ctx 85 * is only used for error reporting and tracking non-standard unquoted 86 * ctx. 87 * 88 * `ctx` may for example hold a flatcc_builder_t pointer. 89 */ 90 typedef struct flatcc_json_parser_ctx flatcc_json_parser_t; 91 struct flatcc_json_parser_ctx { 92 flatcc_builder_t *ctx; 93 const char *line_start; 94 flatcc_json_parser_flags_t flags; 95 #if FLATCC_JSON_PARSE_ALLOW_UNQUOTED 96 int unquoted; 97 #endif 98 99 int line, pos; 100 int error; 101 const char *start; 102 const char *end; 103 const char *error_loc; 104 /* Set at end of successful parse. */ 105 const char *end_loc; 106 }; 107 108 static inline int flatcc_json_parser_get_error(flatcc_json_parser_t *ctx) 109 { 110 return ctx->error; 111 } 112 113 static inline void flatcc_json_parser_init(flatcc_json_parser_t *ctx, flatcc_builder_t *B, const char *buf, const char *end, flatcc_json_parser_flags_t flags) 114 { 115 memset(ctx, 0, sizeof(*ctx)); 116 ctx->ctx = B; 117 ctx->line_start = buf; 118 ctx->line = 1; 119 ctx->flags = flags; 120 /* These are not needed for parsing, but may be helpful in reporting etc. */ 121 ctx->start = buf; 122 ctx->end = end; 123 ctx->error_loc = buf; 124 } 125 126 const char *flatcc_json_parser_set_error(flatcc_json_parser_t *ctx, const char *loc, const char *end, int reason); 127 128 /* 129 * Wide space is not necessarily beneficial in the typical space, but it 130 * also isn't expensive so it may be added when there are applications 131 * that can benefit. 132 */ 133 const char *flatcc_json_parser_space_ext(flatcc_json_parser_t *ctx, const char *buf, const char *end); 134 135 static inline const char *flatcc_json_parser_space(flatcc_json_parser_t *ctx, const char *buf, const char *end) 136 { 137 if (end - buf > 1) { 138 if (buf[0] > 0x20) { 139 return buf; 140 } 141 if (buf[0] == 0x20 && buf[1] > 0x20) { 142 return buf + 1; 143 } 144 } 145 return flatcc_json_parser_space_ext(ctx, buf, end); 146 } 147 148 149 static inline const char *flatcc_json_parser_string_start(flatcc_json_parser_t *ctx, const char *buf, const char *end) 150 { 151 if (buf == end || *buf != '\"') { 152 return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_expected_string); 153 } 154 return ++buf; 155 } 156 157 static inline const char *flatcc_json_parser_string_end(flatcc_json_parser_t *ctx, const char *buf, const char *end) 158 { 159 if (buf == end || *buf != '\"') { 160 return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_unterminated_string); 161 } 162 return ++buf; 163 } 164 165 /* 166 * Parse a string as a fixed length char array as `s` with length `n`. 167 * and raise errors according to overflow/underflow runtime flags. Zero 168 * and truncate as needed. A trailing zero is not inserted if the input 169 * is at least the same length as the char array. 170 * 171 * Runtime flags: `skip_array_overflow`, `pad_array_underflow`. 172 */ 173 const char *flatcc_json_parser_char_array(flatcc_json_parser_t *ctx, 174 const char *buf, const char *end, char *s, size_t n); 175 176 /* 177 * Creates a string. Returns *ref == 0 on unrecoverable error or 178 * sets *ref to a valid new string reference. 179 */ 180 const char *flatcc_json_parser_build_string(flatcc_json_parser_t *ctx, 181 const char *buf, const char *end, flatcc_builder_ref_t *ref); 182 183 typedef char flatcc_json_parser_escape_buffer_t[5]; 184 /* 185 * If the buffer does not hold a valid escape sequence, an error is 186 * returned with code[0] = 0/ 187 * 188 * Otherwise code[0] the length (1-4) of the remaining 189 * characters in the code, transcoded from the escape sequence 190 * where a length of 4 only happens with escapaped surrogate pairs. 191 * 192 * The JSON extension `\xXX` is supported and may produced invalid UTF-8 193 * characters such as 0xff. The standard JSON escape `\uXXXX` is not 194 * checked for invalid code points and may produce invalid UTF-8. 195 * 196 * Regular characters are expected to valid UTF-8 but they are not checked 197 * and may therefore produce invalid UTF-8. 198 * 199 * Control characters within a string are rejected except in the 200 * standard JSON escpaped form for `\n \r \t \b \f`. 201 * 202 * Additional escape codes as per standard JSON: `\\ \/ \"`. 203 */ 204 const char *flatcc_json_parser_string_escape(flatcc_json_parser_t *ctx, const char *buf, const char *end, flatcc_json_parser_escape_buffer_t code); 205 206 /* 207 * Parses the longest unescaped run of string content followed by either 208 * an escape encoding, string termination, or error. 209 */ 210 const char *flatcc_json_parser_string_part(flatcc_json_parser_t *ctx, const char *buf, const char *end); 211 212 static inline const char *flatcc_json_parser_symbol_start(flatcc_json_parser_t *ctx, const char *buf, const char *end) 213 { 214 if (buf == end) { 215 return buf; 216 } 217 if (*buf == '\"') { 218 ++buf; 219 #if FLATCC_JSON_PARSE_ALLOW_UNQUOTED 220 ctx->unquoted = 0; 221 #endif 222 } else { 223 #if FLATCC_JSON_PARSE_ALLOW_UNQUOTED 224 if (*buf == '.') { 225 return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_unexpected_character); 226 } 227 ctx->unquoted = 1; 228 #else 229 return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_unexpected_character); 230 #endif 231 } 232 return buf; 233 } 234 235 static inline uint64_t flatcc_json_parser_symbol_part_ext(const char *buf, const char *end) 236 { 237 uint64_t w = 0; 238 size_t n = (size_t)(end - buf); 239 const uint8_t *ubuf = (const uint8_t*)buf; 240 241 if (n > 8) { 242 n = 8; 243 } 244 /* This can bloat inlining for a rarely executed case. */ 245 #if 1 246 switch (n) { 247 case 8: 248 w |= ((uint64_t)ubuf[7]) << (0 * 8); 249 goto lbl_n_7; 250 case 7: 251 lbl_n_7: 252 w |= ((uint64_t)ubuf[6]) << (1 * 8); 253 goto lbl_n_6; 254 case 6: 255 lbl_n_6: 256 w |= ((uint64_t)ubuf[5]) << (2 * 8); 257 goto lbl_n_5; 258 case 5: 259 lbl_n_5: 260 w |= ((uint64_t)ubuf[4]) << (3 * 8); 261 goto lbl_n_4; 262 case 4: 263 lbl_n_4: 264 w |= ((uint64_t)ubuf[3]) << (4 * 8); 265 goto lbl_n_3; 266 case 3: 267 lbl_n_3: 268 w |= ((uint64_t)ubuf[2]) << (5 * 8); 269 goto lbl_n_2; 270 case 2: 271 lbl_n_2: 272 w |= ((uint64_t)ubuf[1]) << (6 * 8); 273 goto lbl_n_1; 274 case 1: 275 lbl_n_1: 276 w |= ((uint64_t)ubuf[0]) << (7 * 8); 277 break; 278 case 0: 279 break; 280 } 281 #else 282 /* But this is hardly much of an improvement. */ 283 { 284 size_t i; 285 for (i = 0; i < n; ++i) { 286 w <<= 8; 287 if (i < n) { 288 w = buf[i]; 289 } 290 } 291 } 292 #endif 293 return w; 294 } 295 296 /* 297 * Read out string as a big endian word. This allows for trie lookup, 298 * also when trailing characters are beyond keyword. This assumes the 299 * external words tested against are valid and therefore there need be 300 * no checks here. If a match is not made, the symbol_end function will 301 * consume and check any unmatched content - from _before_ this function 302 * was called - i.e. the returned buffer is tentative for use only if we 303 * accept the part returned here. 304 * 305 * Used for both symbols and symbolic constants. 306 */ 307 static inline uint64_t flatcc_json_parser_symbol_part(const char *buf, const char *end) 308 { 309 size_t n = (size_t)(end - buf); 310 311 #if FLATCC_ALLOW_UNALIGNED_ACCESS 312 if (n >= 8) { 313 return be64toh(*(uint64_t *)buf); 314 } 315 #endif 316 return flatcc_json_parser_symbol_part_ext(buf, end); 317 } 318 319 /* Don't allow space in dot notation neither inside nor outside strings. */ 320 static inline const char *flatcc_json_parser_match_scope(flatcc_json_parser_t *ctx, const char *buf, const char *end, int pos) 321 { 322 const char *mark = buf; 323 324 (void)ctx; 325 326 if (end - buf <= pos) { 327 return mark; 328 } 329 if (buf[pos] != '.') { 330 return mark; 331 } 332 return buf + pos + 1; 333 } 334 335 const char *flatcc_json_parser_match_constant(flatcc_json_parser_t *ctx, const char *buf, const char *end, int pos, int *more); 336 337 /* We allow '.' in unquoted symbols, but not at the start or end. */ 338 static inline const char *flatcc_json_parser_symbol_end(flatcc_json_parser_t *ctx, const char *buf, const char *end) 339 { 340 char c, clast = 0; 341 342 343 #if FLATCC_JSON_PARSE_ALLOW_UNQUOTED 344 if (ctx->unquoted) { 345 while (buf != end && *buf > 0x20) { 346 clast = c = *buf; 347 if (c == '_' || c == '.' || (c & 0x80) || (c >= '0' && c <= '9')) { 348 ++buf; 349 continue; 350 } 351 /* Lower case. */ 352 c |= 0x20; 353 if (c >= 'a' && c <= 'z') { 354 ++buf; 355 continue; 356 } 357 break; 358 } 359 if (clast == '.') { 360 return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_unexpected_character); 361 } 362 } else { 363 #else 364 { 365 #endif 366 while (buf != end && *buf != '\"') { 367 if (*buf == '\\') { 368 if (end - buf < 2) { 369 break; 370 } 371 ++buf; 372 } 373 ++buf; 374 } 375 if (buf == end || *buf != '\"') { 376 return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_unterminated_string); 377 } 378 ++buf; 379 } 380 return buf; 381 } 382 383 static inline const char *flatcc_json_parser_constant_start(flatcc_json_parser_t *ctx, const char *buf, const char *end) 384 { 385 buf = flatcc_json_parser_symbol_start(ctx, buf, end); 386 #if FLATCC_JSON_PARSE_ALLOW_UNQUOTED 387 if (!ctx->unquoted) { 388 #else 389 { 390 #endif 391 buf = flatcc_json_parser_space(ctx, buf, end); 392 } 393 return buf; 394 } 395 396 static inline const char *flatcc_json_parser_object_start(flatcc_json_parser_t *ctx, const char *buf, const char *end, int *more) 397 { 398 if (buf == end || *buf != '{') { 399 *more = 0; 400 return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_expected_object); 401 } 402 buf = flatcc_json_parser_space(ctx, buf + 1, end); 403 if (buf != end && *buf == '}') { 404 *more = 0; 405 return flatcc_json_parser_space(ctx, buf + 1, end); 406 } 407 *more = 1; 408 return buf; 409 } 410 411 static inline const char *flatcc_json_parser_object_end(flatcc_json_parser_t *ctx, const char *buf, 412 const char *end, int *more) 413 { 414 buf = flatcc_json_parser_space(ctx, buf, end); 415 if (buf == end) { 416 *more = 0; 417 return buf; 418 } 419 if (*buf != ',') { 420 *more = 0; 421 if (*buf != '}') { 422 return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_unbalanced_object); 423 } else { 424 return flatcc_json_parser_space(ctx, buf + 1, end); 425 } 426 } 427 buf = flatcc_json_parser_space(ctx, buf + 1, end); 428 if (buf == end) { 429 *more = 0; 430 return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_unbalanced_object); 431 } 432 #if FLATCC_JSON_PARSE_ALLOW_TRAILING_COMMA 433 if (*buf == '}') { 434 *more = 0; 435 return flatcc_json_parser_space(ctx, buf + 1, end); 436 } 437 #endif 438 *more = 1; 439 return buf; 440 } 441 442 static inline const char *flatcc_json_parser_array_start(flatcc_json_parser_t *ctx, const char *buf, const char *end, int *more) 443 { 444 if (buf == end || *buf != '[') { 445 *more = 0; 446 return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_expected_array); 447 } 448 buf = flatcc_json_parser_space(ctx, buf + 1, end); 449 if (buf != end && *buf == ']') { 450 *more = 0; 451 return flatcc_json_parser_space(ctx, buf + 1, end); 452 } 453 *more = 1; 454 return buf; 455 } 456 457 static inline const char *flatcc_json_parser_array_end(flatcc_json_parser_t *ctx, const char *buf, 458 const char *end, int *more) 459 { 460 buf = flatcc_json_parser_space(ctx, buf, end); 461 if (buf == end) { 462 *more = 0; 463 return buf; 464 } 465 if (*buf != ',') { 466 *more = 0; 467 if (*buf != ']') { 468 return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_unbalanced_array); 469 } else { 470 return flatcc_json_parser_space(ctx, buf + 1, end); 471 } 472 } 473 buf = flatcc_json_parser_space(ctx, buf + 1, end); 474 if (buf == end) { 475 *more = 0; 476 return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_unbalanced_array); 477 } 478 #if FLATCC_JSON_PARSE_ALLOW_TRAILING_COMMA 479 if (*buf == ']') { 480 *more = 0; 481 return flatcc_json_parser_space(ctx, buf + 1, end); 482 } 483 #endif 484 *more = 1; 485 return buf; 486 } 487 488 /* 489 * Detects if a symbol terminates at a given `pos` relative to the 490 * buffer pointer, or return fast. 491 * 492 * Failure to match is not an error but a recommendation to try 493 * alternative longer suffixes - only if such do not exist will 494 * there be an error. If a match was not eventually found, 495 * the `flatcc_json_parser_unmatched_symbol` should be called to consume 496 * the symbol and generate error messages. 497 * 498 * If a match was detected, ':' and surrounding space is consumed, 499 * or an error is generated. 500 */ 501 static inline const char *flatcc_json_parser_match_symbol(flatcc_json_parser_t *ctx, const char *buf, 502 const char *end, int pos) 503 { 504 const char *mark = buf; 505 506 if (end - buf <= pos) { 507 return mark; 508 } 509 #if FLATCC_JSON_PARSE_ALLOW_UNQUOTED 510 if (ctx->unquoted) { 511 if (buf[pos] > 0x20 && buf[pos] != ':') { 512 return mark; 513 } 514 buf += pos; 515 ctx->unquoted = 0; 516 } else { 517 #else 518 { 519 #endif 520 if (buf[pos] != '\"') { 521 return mark; 522 } 523 buf += pos + 1; 524 } 525 buf = flatcc_json_parser_space(ctx, buf, end); 526 if (buf != end && *buf == ':') { 527 ++buf; 528 return flatcc_json_parser_space(ctx, buf, end); 529 } 530 return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_expected_colon); 531 } 532 533 static inline const char *flatcc_json_parser_match_type_suffix(flatcc_json_parser_t *ctx, const char *buf, const char *end, int pos) 534 { 535 if (end - buf <= pos + 5) { 536 return buf; 537 } 538 if (memcmp(buf + pos, "_type", 5)) { 539 return buf; 540 } 541 return flatcc_json_parser_match_symbol(ctx, buf, end, pos + 5); 542 } 543 544 const char *flatcc_json_parser_unmatched_symbol(flatcc_json_parser_t *ctx, const char *buf, const char *end); 545 546 static inline const char *flatcc_json_parser_coerce_uint64( 547 flatcc_json_parser_t *ctx, const char *buf, 548 const char *end, int value_sign, uint64_t value, uint64_t *v) 549 { 550 if (value_sign) { 551 return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_underflow); 552 } 553 *v = value; 554 return buf; 555 } 556 557 static inline const char *flatcc_json_parser_coerce_bool(flatcc_json_parser_t *ctx, const char *buf, 558 const char *end, int value_sign, uint64_t value, uint8_t *v) 559 { 560 if (value_sign) { 561 return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_underflow); 562 } 563 *v = (uint8_t)!!value; 564 return buf; 565 } 566 567 #define __flatcc_json_parser_define_coerce_unsigned(type, basetype, uctype) \ 568 static inline const char *flatcc_json_parser_coerce_ ## type( \ 569 flatcc_json_parser_t *ctx, const char *buf, \ 570 const char *end, int value_sign, uint64_t value, basetype *v) \ 571 { \ 572 if (value_sign) { \ 573 return flatcc_json_parser_set_error(ctx, buf, end, \ 574 flatcc_json_parser_error_underflow); \ 575 } \ 576 if (value > uctype ## _MAX) { \ 577 return flatcc_json_parser_set_error(ctx, buf, end, \ 578 flatcc_json_parser_error_overflow); \ 579 } \ 580 *v = (basetype)value; \ 581 return buf; \ 582 } 583 584 __flatcc_json_parser_define_coerce_unsigned(uint32, uint32_t, UINT32) 585 __flatcc_json_parser_define_coerce_unsigned(uint16, uint16_t, UINT16) 586 __flatcc_json_parser_define_coerce_unsigned(uint8, uint8_t, UINT8) 587 588 #define __flatcc_json_parser_define_coerce_signed(type, basetype, uctype) \ 589 static inline const char *flatcc_json_parser_coerce_ ## type( \ 590 flatcc_json_parser_t *ctx, const char *buf, \ 591 const char *end, int value_sign, uint64_t value, basetype *v) \ 592 { \ 593 if (value_sign) { \ 594 if (value > (uint64_t)(uctype ## _MAX) + 1) { \ 595 return flatcc_json_parser_set_error(ctx, buf, end, \ 596 flatcc_json_parser_error_underflow); \ 597 } \ 598 *v = (basetype)-(int64_t)value; \ 599 } else { \ 600 if (value > uctype ## _MAX) { \ 601 return flatcc_json_parser_set_error(ctx, buf, end, \ 602 flatcc_json_parser_error_overflow); \ 603 } \ 604 *v = (basetype)value; \ 605 } \ 606 return buf; \ 607 } 608 609 __flatcc_json_parser_define_coerce_signed(int64, int64_t, INT64) 610 __flatcc_json_parser_define_coerce_signed(int32, int32_t, INT32) 611 __flatcc_json_parser_define_coerce_signed(int16, int16_t, INT16) 612 __flatcc_json_parser_define_coerce_signed(int8, int8_t, INT8) 613 614 static inline const char *flatcc_json_parser_coerce_float( 615 flatcc_json_parser_t *ctx, const char *buf, 616 const char *end, int value_sign, uint64_t value, float *v) 617 { 618 (void)ctx; 619 (void)end; 620 621 *v = value_sign ? -(float)value : (float)value; 622 return buf; 623 } 624 625 static inline const char *flatcc_json_parser_coerce_double( 626 flatcc_json_parser_t *ctx, const char *buf, 627 const char *end, int value_sign, uint64_t value, double *v) 628 { 629 (void)ctx; 630 (void)end; 631 632 *v = value_sign ? -(double)value : (double)value; 633 return buf; 634 } 635 636 const char *flatcc_json_parser_double(flatcc_json_parser_t *ctx, const char *buf, const char *end, double *v); 637 638 const char *flatcc_json_parser_float(flatcc_json_parser_t *ctx, const char *buf, const char *end, float *v); 639 640 /* 641 * If the buffer does not contain a valid start character for a numeric 642 * value, the function will return the the input buffer without failure. 643 * This makes is possible to try a symbolic parse. 644 */ 645 const char *flatcc_json_parser_integer(flatcc_json_parser_t *ctx, const char *buf, const char *end, 646 int *value_sign, uint64_t *value); 647 648 /* Returns unchanged buffer without error if `null` is not matched. */ 649 static inline const char *flatcc_json_parser_null(const char *buf, const char *end) 650 { 651 if (end - buf >= 4 && memcmp(buf, "null", 4) == 0) { 652 return buf + 4; 653 } 654 return buf; 655 } 656 657 static inline const char *flatcc_json_parser_none(flatcc_json_parser_t *ctx, 658 const char *buf, const char *end) 659 { 660 if (end - buf >= 4 && memcmp(buf, "null", 4) == 0) { 661 return buf + 4; 662 } 663 return flatcc_json_parser_set_error(ctx, buf, end, 664 flatcc_json_parser_error_union_none_not_null); 665 } 666 667 /* 668 * `parsers` is a null terminated array of parsers with at least one 669 * valid parser. A numeric literal parser may also be included. 670 */ 671 #define __flatcc_json_parser_define_integral_parser(type, basetype) \ 672 static inline const char *flatcc_json_parser_ ## type( \ 673 flatcc_json_parser_t *ctx, \ 674 const char *buf, const char *end, basetype *v) \ 675 { \ 676 uint64_t value = 0; \ 677 int value_sign = 0; \ 678 const char *mark = buf; \ 679 \ 680 *v = 0; \ 681 if (buf == end) { \ 682 return buf; \ 683 } \ 684 buf = flatcc_json_parser_integer(ctx, buf, end, &value_sign, &value); \ 685 if (buf != mark) { \ 686 return flatcc_json_parser_coerce_ ## type(ctx, \ 687 buf, end, value_sign, value, v); \ 688 } \ 689 return buf; \ 690 } 691 692 __flatcc_json_parser_define_integral_parser(uint64, uint64_t) 693 __flatcc_json_parser_define_integral_parser(uint32, uint32_t) 694 __flatcc_json_parser_define_integral_parser(uint16, uint16_t) 695 __flatcc_json_parser_define_integral_parser(uint8, uint8_t) 696 __flatcc_json_parser_define_integral_parser(int64, int64_t) 697 __flatcc_json_parser_define_integral_parser(int32, int32_t) 698 __flatcc_json_parser_define_integral_parser(int16, int16_t) 699 __flatcc_json_parser_define_integral_parser(int8, int8_t) 700 701 static inline const char *flatcc_json_parser_bool(flatcc_json_parser_t *ctx, const char *buf, const char *end, uint8_t *v) 702 { 703 const char *k; 704 uint8_t tmp; 705 706 k = buf; 707 if (end - buf >= 4 && memcmp(buf, "true", 4) == 0) { 708 *v = 1; 709 return k + 4; 710 } else if (end - buf >= 5 && memcmp(buf, "false", 5) == 0) { 711 *v = 0; 712 return k + 5; 713 } 714 buf = flatcc_json_parser_uint8(ctx, buf, end, &tmp); 715 *v = !!tmp; 716 return buf; 717 } 718 719 /* 720 * The `parsers` argument is a zero terminated array of parser 721 * functions with increasingly general scopes. 722 * 723 * Symbols can be be or'ed together by listing multiple space separated 724 * flags in source being parsed, like `{ x : "Red Blue" }`. 725 * Intended for flags, but generally available. 726 * 727 * `aggregate` means there are more symbols to follow. 728 * 729 * This function does not return input `buf` value if match was 730 * unsuccessful. It will either match or error. 731 */ 732 typedef const char *flatcc_json_parser_integral_symbol_f(flatcc_json_parser_t *ctx, 733 const char *buf, const char *end, int *value_sign, uint64_t *value, int *aggregate); 734 735 /* 736 * Raise an error if a syntax like `color: Red Green` is seen unless 737 * explicitly permitted. `color: "Red Green"` or `"color": "Red Green" 738 * or `color: Red` is permitted if unquoted is permitted but not 739 * unquoted list. Googles flatc JSON parser does not allow multiple 740 * symbolic values unless quoted, so this is the default. 741 */ 742 #if !FLATCC_JSON_PARSE_ALLOW_UNQUOTED || FLATCC_JSON_PARSE_ALLOW_UNQUOTED_LIST 743 #define __flatcc_json_parser_init_check_unquoted_list() 744 #define __flatcc_json_parser_check_unquoted_list() 745 #else 746 #define __flatcc_json_parser_init_check_unquoted_list() int list_count = 0; 747 #define __flatcc_json_parser_check_unquoted_list() \ 748 if (list_count++ && ctx->unquoted) { \ 749 return flatcc_json_parser_set_error(ctx, buf, end, \ 750 flatcc_json_parser_error_unquoted_symbolic_list); \ 751 } 752 #endif 753 754 #define __flatcc_json_parser_define_symbolic_integral_parser(type, basetype)\ 755 static const char *flatcc_json_parser_symbolic_ ## type( \ 756 flatcc_json_parser_t *ctx, \ 757 const char *buf, const char *end, \ 758 flatcc_json_parser_integral_symbol_f *parsers[], \ 759 basetype *v) \ 760 { \ 761 flatcc_json_parser_integral_symbol_f **p; \ 762 const char *mark; \ 763 basetype tmp = 0; \ 764 uint64_t value; \ 765 int value_sign, aggregate; \ 766 __flatcc_json_parser_init_check_unquoted_list() \ 767 \ 768 *v = 0; \ 769 buf = flatcc_json_parser_constant_start(ctx, buf, end); \ 770 if (buf == end) { \ 771 return buf; \ 772 } \ 773 do { \ 774 p = parsers; \ 775 do { \ 776 /* call parser function */ \ 777 buf = (*p)(ctx, (mark = buf), end, \ 778 &value_sign, &value, &aggregate); \ 779 if (buf == end) { \ 780 return buf; \ 781 } \ 782 } while (buf == mark && *++p); \ 783 if (mark == buf) { \ 784 return flatcc_json_parser_set_error(ctx, buf, end, \ 785 flatcc_json_parser_error_expected_scalar); \ 786 } \ 787 __flatcc_json_parser_check_unquoted_list() \ 788 if (end == flatcc_json_parser_coerce_ ## type(ctx, \ 789 buf, end, value_sign, value, &tmp)) { \ 790 return end; \ 791 } \ 792 /* \ 793 * `+=`, not `|=` because we also coerce to float and double, \ 794 * and because we need to handle signed values. This may give \ 795 * unexpected results with duplicate flags. \ 796 */ \ 797 *v += tmp; \ 798 } while (aggregate); \ 799 return buf; \ 800 } 801 802 __flatcc_json_parser_define_symbolic_integral_parser(uint64, uint64_t) 803 __flatcc_json_parser_define_symbolic_integral_parser(uint32, uint32_t) 804 __flatcc_json_parser_define_symbolic_integral_parser(uint16, uint16_t) 805 __flatcc_json_parser_define_symbolic_integral_parser(uint8, uint8_t) 806 __flatcc_json_parser_define_symbolic_integral_parser(int64, int64_t) 807 __flatcc_json_parser_define_symbolic_integral_parser(int32, int32_t) 808 __flatcc_json_parser_define_symbolic_integral_parser(int16, int16_t) 809 __flatcc_json_parser_define_symbolic_integral_parser(int8, int8_t) 810 811 __flatcc_json_parser_define_symbolic_integral_parser(bool, uint8_t) 812 813 /* We still parse integral values, but coerce to float or double. */ 814 __flatcc_json_parser_define_symbolic_integral_parser(float, float) 815 __flatcc_json_parser_define_symbolic_integral_parser(double, double) 816 817 /* Parse vector as a base64 or base64url encoded string with no spaces permitted. */ 818 const char *flatcc_json_parser_build_uint8_vector_base64(flatcc_json_parser_t *ctx, 819 const char *buf, const char *end, flatcc_builder_ref_t *ref, int urlsafe); 820 821 /* 822 * This doesn't do anything other than validate and advance past 823 * a JSON value which may use unquoted symbols. 824 * 825 * Upon call it is assumed that leading space has been stripped and that 826 * a JSON value is expected (i.e. root, or just after ':' in a 827 * container object, or less likely as an array member). Any trailing 828 * comma is assumed to belong to the parent context. Returns a parse 829 * location stripped from space so container should post call expect 830 * ',', '}', or ']', or EOF if the JSON is valid. 831 */ 832 const char *flatcc_json_parser_generic_json(flatcc_json_parser_t *ctx, const char *buf, const char *end); 833 834 /* Parse a JSON table. */ 835 typedef const char *flatcc_json_parser_table_f(flatcc_json_parser_t *ctx, 836 const char *buf, const char *end, flatcc_builder_ref_t *pref); 837 838 /* Parses a JSON struct. */ 839 typedef const char *flatcc_json_parser_struct_f(flatcc_json_parser_t *ctx, 840 const char *buf, const char *end, flatcc_builder_ref_t *pref); 841 842 /* Constructs a table, struct, or string object unless the type is 0 or unknown. */ 843 typedef const char *flatcc_json_parser_union_f(flatcc_json_parser_t *ctx, 844 const char *buf, const char *end, uint8_t type, flatcc_builder_ref_t *pref); 845 846 typedef int flatcc_json_parser_is_known_type_f(uint8_t type); 847 848 /* Called at start by table parsers with at least 1 union. */ 849 const char *flatcc_json_parser_prepare_unions(flatcc_json_parser_t *ctx, 850 const char *buf, const char *end, size_t union_total, size_t *handle); 851 852 const char *flatcc_json_parser_finalize_unions(flatcc_json_parser_t *ctx, 853 const char *buf, const char *end, size_t handle); 854 855 const char *flatcc_json_parser_union(flatcc_json_parser_t *ctx, 856 const char *buf, const char *end, size_t union_index, 857 flatbuffers_voffset_t id, size_t handle, 858 flatcc_json_parser_union_f *union_parser); 859 860 const char *flatcc_json_parser_union_type(flatcc_json_parser_t *ctx, 861 const char *buf, const char *end, size_t union_index, 862 flatbuffers_voffset_t id, size_t handle, 863 flatcc_json_parser_integral_symbol_f *type_parsers[], 864 flatcc_json_parser_union_f *union_parser); 865 866 const char *flatcc_json_parser_union_vector(flatcc_json_parser_t *ctx, 867 const char *buf, const char *end, size_t union_index, 868 flatbuffers_voffset_t id, size_t handle, 869 flatcc_json_parser_union_f *union_parser); 870 871 const char *flatcc_json_parser_union_type_vector(flatcc_json_parser_t *ctx, 872 const char *buf, const char *end, size_t union_index, 873 flatbuffers_voffset_t id, size_t handle, 874 flatcc_json_parser_integral_symbol_f *type_parsers[], 875 flatcc_json_parser_union_f *union_parser, 876 flatcc_json_parser_is_known_type_f accept_type); 877 878 /* 879 * Parses a table as root. 880 * 881 * Use the flag `flatcc_json_parser_f_with_size` to create a buffer with 882 * size prefix. 883 * 884 * `ctx` may be null or an uninitialized json parser to receive parse results. 885 * `builder` must a newly initialized or reset builder object. 886 * `buf`, `bufsiz` may be larger than the parsed json if trailing 887 * space or zeroes are expected, but they must represent a valid memory buffer. 888 * `fid` must be null, or a valid file identifier. 889 * `flags` default to 0. See also `flatcc_json_parser_f_` constants. 890 */ 891 int flatcc_json_parser_table_as_root(flatcc_builder_t *B, flatcc_json_parser_t *ctx, 892 const char *buf, size_t bufsiz, flatcc_json_parser_flags_t flags, const char *fid, 893 flatcc_json_parser_table_f *parser); 894 895 /* 896 * Similar to `flatcc_json_parser_table_as_root` but parses a struct as 897 * root. 898 */ 899 int flatcc_json_parser_struct_as_root(flatcc_builder_t *B, flatcc_json_parser_t *ctx, 900 const char *buf, size_t bufsiz, flatcc_json_parser_flags_t flags, const char *fid, 901 flatcc_json_parser_struct_f *parser); 902 903 #include "portable/pdiagnostic_pop.h" 904 905 #ifdef __cplusplus 906 } 907 #endif 908 909 #endif /* FLATCC_JSON_PARSE_H */