parse.c (31923B)
1 2 #include "util.h" 3 #include "parse.h" 4 #include "parser.h" 5 #include "io.h" 6 7 #include <stdio.h> 8 #include <string.h> 9 #include <ctype.h> 10 #include <stdlib.h> 11 #include <assert.h> 12 #include <tgmath.h> 13 14 #ifdef DEBUG 15 #define tokdebug printf 16 #else 17 #define tokdebug(...) 18 #define print_spaces(...) 19 #endif 20 21 enum known_symbol { 22 S_OBJECT, 23 S_SPACE, 24 S_OBJECTS, 25 S_NAME, 26 S_TYPE, 27 S_SHAPE, 28 S_WIDTH, 29 S_DEPTH, 30 S_HEIGHT, 31 S_CONDITION, 32 S_LOCATION, 33 S_MATERIAL, 34 }; 35 36 enum tok_state { 37 TS_OPEN, 38 TS_CLOSE, 39 TS_ATOM, 40 }; 41 42 union token { 43 struct tok_str str; 44 }; 45 46 47 #ifdef DEBUG 48 #endif 49 static const char *attr_type_str(enum attribute_type type) 50 { 51 switch (type) { 52 case A_CONDITION: return "condition"; 53 case A_DEPTH: return "depth"; 54 case A_HEIGHT: return "height"; 55 case A_ID: return "id"; 56 case A_LOCATION: return "location"; 57 case A_MATERIAL: return "material"; 58 case A_COLOR: return "color"; 59 case A_NAME: return "name"; 60 case A_SHAPE: return "shape"; 61 case A_TYPE: return "type"; 62 case A_WIDTH: return "width"; 63 case A_STATE: return "state"; 64 case A_DATA: return "data"; 65 } 66 67 return "unknown"; 68 } 69 70 static const char *token_error_string(enum token_error err) 71 { 72 switch (err) { 73 case TE_OK: return "all good"; 74 case TE_STR_START_CHAR: return "string didn't start with \""; 75 case TE_SYM_START_CHAR: return "symbol didn't start with a-z"; 76 case TE_NUM_START_CHAR: return "number didn't start with 0-9 or -"; 77 case TE_NAN_START_CHAR: return "nan number didn't start with nan:"; 78 case TE_SYM_CHAR: return "invalid symbol character"; 79 case TE_NUM_CHAR: return "invalid number character"; 80 case TE_SYM_OVERFLOW: return "symbol push overflow"; 81 case TE_UNEXPECTED_TOKEN: return "unexpected token during parsing"; 82 case TE_UNEXPECTED_SYMBOL: return "unexpected symbol during parsing"; 83 } 84 85 return "unknown"; 86 } 87 88 static void copy_token_cursor(struct token_cursor *src, struct token_cursor *dest) 89 { 90 copy_cursor(&src->c, &dest->c); 91 dest->err = src->err; 92 memcpy(&dest->err_data, &src->err_data, sizeof(src->err_data)); 93 } 94 95 static void init_token_cursor(struct token_cursor *cursor) 96 { 97 cursor->err = TE_OK; 98 memset(&cursor->err_data, 0, sizeof(cursor->err_data)); 99 } 100 101 struct attribute *get_attr(struct cursor *attributes, int index) 102 { 103 return (struct attribute*)index_cursor(attributes, index, 104 sizeof(struct attribute)); 105 } 106 107 /* 108 */ 109 static const char *shape_str(enum shape shape) 110 { 111 switch (shape) { 112 case SHAPE_RECTANGLE: return "rectangle"; 113 case SHAPE_CIRCLE: return "circle"; 114 case SHAPE_SQUARE: return "square"; 115 } 116 117 return "unknown"; 118 } 119 120 121 static void print_attribute(struct attribute *attr) 122 { 123 printf("%s ", attr_type_str(attr->type)); 124 125 switch (attr->type) { 126 case A_NAME: 127 printf("%.*s ", attr->data.str.len, attr->data.str.ptr); 128 break; 129 case A_SHAPE: 130 printf("%s ", shape_str(attr->data.shape)); 131 break; 132 default: 133 break; 134 } 135 136 } 137 138 static void print_attributes(struct cursor *attributes, struct cell *cell) 139 { 140 int i; 141 struct attribute *attr; 142 143 printf("%d attrs: ", cell->n_attributes); 144 for (i = 0; i < cell->n_attributes; i++) { 145 attr = get_attr(attributes, cell->attributes[i]); 146 assert(attr); 147 printf("[%d]", cell->attributes[i]); 148 print_attribute(attr); 149 } 150 } 151 152 void print_cell(struct cursor *attributes, struct cell *cell) 153 { 154 const char *name; 155 int name_len; 156 157 if (cell->type == C_GROUP) { 158 printf("---\n"); 159 return; 160 } 161 162 cell_attr_str(attributes, cell, &name, &name_len, A_NAME); 163 164 printf("%.*s%s%s ", name_len, name, name_len > 0?" ":"", 165 cell->type == C_OBJECT 166 ? object_type_str(cell->obj_type) 167 : cell_type_str(cell->type)); 168 169 //print_attributes(attributes, cell); 170 } 171 172 173 static const char *token_type_str(enum token_type type) 174 { 175 switch (type) { 176 case T_OPEN: return "("; 177 case T_CLOSE: return ")"; 178 case T_SYMBOL: return "symbol"; 179 case T_STRING: return "string"; 180 case T_NUMBER: return "number"; 181 } 182 183 printf("UNUSUAL: unknown token_type %d\n", type); 184 185 return "unknown"; 186 } 187 188 void print_token_error(struct token_cursor *cursor) 189 { 190 printf("error [%d]: ", cursor->err_data.pos); 191 if (cursor->err == TE_UNEXPECTED_TOKEN) { 192 printf("%s: expected '%s' got '%s'\n", 193 token_error_string(cursor->err), 194 token_type_str(cursor->err_data.lex.expected), 195 token_type_str(cursor->err_data.lex.got)); 196 } 197 else if (cursor->err == TE_UNEXPECTED_SYMBOL) { 198 printf("%s: expected symbol '%.*s' got '%.*s'\n", 199 token_error_string(cursor->err), 200 cursor->err_data.symbol.expected.len, 201 cursor->err_data.symbol.expected.data, 202 cursor->err_data.symbol.got.len, 203 cursor->err_data.symbol.got.data); 204 } 205 else { 206 int is_chr_data = cursor->err == TE_STR_START_CHAR || 207 cursor->err == TE_SYM_START_CHAR || 208 cursor->err == TE_NUM_START_CHAR || 209 cursor->err == TE_NUM_CHAR || 210 cursor->err == TE_SYM_CHAR; 211 212 printf("\nerror: %s: %.*s\n", token_error_string(cursor->err), 213 is_chr_data?1:0, (char*)&cursor->err_data.c); 214 } 215 216 } 217 218 219 #ifdef DEBUG 220 static void print_spaces(int n) 221 { 222 int i; 223 for (i = 0; i < n; i++) 224 putchar(' '); 225 } 226 #endif 227 228 static int push_token_data(struct token_cursor *tokens, 229 enum token_type token, 230 void *token_data, int token_data_size) 231 { 232 static int depth = 0; 233 #ifdef DEBUG 234 struct tok_str *str; 235 #endif 236 if (!cursor_push_byte(&tokens->c, token)) 237 return 0; 238 239 switch (token) { 240 241 case T_OPEN: 242 depth++; 243 break; 244 245 case T_CLOSE: 246 depth--; 247 break; /* nothing to write after these tokens */ 248 249 case T_NUMBER: 250 case T_SYMBOL: 251 252 /* fallthrough */ 253 case T_STRING: 254 #ifdef DEBUG 255 print_spaces(depth*2); 256 str = (struct tok_str*)token_data; 257 if (token == T_STRING) { 258 tokdebug("str \"%.*s\"", str->len, str->data); 259 } else if (token == T_NUMBER) { 260 tokdebug("num %.*s", str->len, str->data); 261 } else { 262 tokdebug("sym %.*s", str->len, str->data); 263 } 264 #endif 265 if (!cursor_push(&tokens->c, token_data, token_data_size)) { 266 printf("hmm? %d\n", token_data_size); 267 cursor_print_around(&tokens->c, 10); 268 return 0; 269 } 270 #ifdef DEBUG 271 printf("\n"); 272 #endif 273 break; 274 } 275 276 277 return 1; 278 } 279 280 281 static int push_token(struct token_cursor *tokens, enum token_type token) 282 { 283 return push_token_data(tokens, token, NULL, 0); 284 } 285 286 static int push_symbol(struct token_cursor *tokens, struct tok_str symbol) 287 { 288 return push_token_data(tokens, T_SYMBOL, &symbol, sizeof(symbol)); 289 } 290 291 static int push_string(struct token_cursor *tokens, struct tok_str str) 292 { 293 return push_token_data(tokens, T_STRING, &str, sizeof(str)); 294 } 295 296 static int push_number(struct token_cursor *tokens, struct tok_str str) 297 { 298 return push_token_data(tokens, T_NUMBER, &str, sizeof(str)); 299 } 300 301 302 static int is_start_symbol_char(char c) 303 { 304 return (c >= 'a' && c <= 'z') || c == '$' || c == '='; 305 } 306 307 static int is_symbol_char(char c) 308 { 309 return is_start_symbol_char(c) || c == '.' || c == '-' || c == '_' || 310 (c >= '0' && c <= '9'); 311 } 312 313 static int pull_escaped_char(struct cursor *cursor, u8 *c) 314 { 315 int ok; 316 317 ok = pull_byte(cursor, c); 318 if (!ok) 319 return 0; 320 321 if (*c != '\\') { 322 return 1; 323 } 324 325 ok = pull_byte(cursor, c); 326 327 /* we saw an escape char but input ended!? */ 328 if (!ok) { 329 return 0; 330 } 331 332 return 2; 333 } 334 335 static int pull_str(struct cursor *cur, const char *str) 336 { 337 int len = strlen(str); 338 339 if (cur->p + len >= cur->end) 340 return 0; 341 342 if (memcmp(cur->p, str, len) != 0) 343 return 0; 344 345 cur->p += len; 346 return 1; 347 } 348 349 static int pull_nan(struct cursor *cur, enum nantype *type) 350 { 351 u8 *start = cur->p; 352 353 if (!consume_byte(cur, 'n')) goto fail; 354 if (!consume_byte(cur, 'a')) goto fail; 355 if (!consume_byte(cur, 'n')) goto fail; 356 if (!consume_byte(cur, ':')) goto fail; 357 358 if (pull_str(cur, "arithmetic")) { 359 if (type) *type = NAN_ARITHMETIC; 360 return 1; 361 } 362 363 if (pull_str(cur, "canonical")) { 364 if (type) *type = NAN_CANONICAL; 365 return 1; 366 } 367 368 while (cur->p < cur->end) { 369 if (*cur->p == ')' || isspace(*cur->p)) 370 return 1; 371 cur->p++; 372 } 373 374 fail: 375 cur->p = start; 376 return 0; 377 } 378 379 static int pull_inf(struct cursor *cur) { 380 u8 *start = cur->p; 381 u8 c; 382 consume_byte(cur, '-'); 383 consume_byte(cur, '+'); 384 if (!consume_byte(cur, 'i')) goto fail; 385 if (!consume_byte(cur, 'n')) goto fail; 386 if (!consume_byte(cur, 'f')) goto fail; 387 if (pull_byte(cur, &c)) { 388 if (isspace(c) || c == ')') { 389 cur->p--; 390 return 1; 391 } 392 goto fail; 393 } 394 395 return 1; 396 fail: 397 cur->p = start; 398 return 0; 399 } 400 401 static int pull_number(struct token_cursor *cursor) 402 { 403 int ok = 1; 404 int chars = 0; 405 u8 c; 406 u8 *start = cursor->c.p; 407 408 if (pull_nan(&cursor->c, NULL)) 409 return 1; 410 411 if (pull_inf(&cursor->c)) 412 return 1; 413 414 while (1) { 415 if (!pull_byte(&cursor->c, &c)) 416 goto fail; 417 418 /* first char shouldn't start with a letter */ 419 if (chars == 0 && !isdigit(c) && c != '-') { 420 cursor->err = TE_NUM_START_CHAR; 421 cursor->err_data.c = c; 422 goto fail; 423 } else if (chars > 0 && (isspace(c) || c == ')')) { 424 /* we got a number */ 425 break; 426 } else if (chars > 0 && !isxdigit(c) && c != ':' && c != '.' && c != 'n' && c != 'a' && c != 'x' && c != 'p' && c != '-' && c != '+') { 427 cursor->err = TE_NUM_CHAR; 428 cursor->err_data.c = c; 429 goto fail; 430 } 431 432 chars++; 433 434 if (!ok) { 435 cursor->err = TE_SYM_OVERFLOW; 436 goto fail; 437 } 438 } 439 440 if (!ok) { 441 cursor->err = TE_SYM_OVERFLOW; 442 goto fail; 443 } 444 445 cursor->c.p--; 446 cursor->err = TE_OK; 447 448 /* remove the first counted quote since this was not pushed */ 449 return chars; 450 451 fail: 452 cursor->c.p = start; 453 return 0; 454 } 455 456 static int pull_string(struct token_cursor *cursor, u8 **start, int *len) 457 { 458 int ok = 1; 459 int chars = 0; 460 u8 c; 461 462 struct token_cursor temp; 463 464 copy_token_cursor(cursor, &temp); 465 466 while (1) { 467 if (chars == 0) 468 *start = temp.c.p+1; 469 470 ok = pull_escaped_char(&temp.c, &c); 471 if (!ok) return 0; 472 473 474 /* first char should start with a letter */ 475 if (chars == 0 && c != '"') { 476 cursor->err = TE_STR_START_CHAR; 477 cursor->err_data.c = c; 478 return 0; 479 } else if (chars == 0 && c == '"') { 480 /* this increment will get removed at end */ 481 chars++; 482 continue; 483 } else if (chars > 0 && c == '"' && ok == 1) { 484 /* ok == 2 would mean that it was escaped, so 485 we're done here */ 486 break; 487 } 488 489 if (!ok) { 490 cursor->err = TE_SYM_OVERFLOW; 491 return 0; 492 } 493 } 494 495 if (!ok) { 496 cursor->err = TE_SYM_OVERFLOW; 497 return 0; 498 } 499 500 copy_token_cursor(&temp, cursor); 501 502 /* remove the first counted quote since this was not pushed */ 503 *len = temp.c.p - *start - 1; 504 return 1; 505 } 506 507 static int pull_symbol(struct token_cursor *cursor, u8 **start) 508 { 509 int ok = 1; 510 int chars = 0; 511 u8 c; 512 513 struct token_cursor temp; 514 515 copy_token_cursor(cursor, &temp); 516 517 *start = temp.c.p; 518 519 while (1) { 520 ok = pull_byte(&temp.c, &c); 521 if (!ok) return 0; 522 523 /* first char should start with a letter */ 524 if (chars == 0 && !is_start_symbol_char(c)) { 525 cursor->err = TE_SYM_START_CHAR; 526 cursor->err_data.c = c; 527 return 0; 528 } else if (chars > 0 && (isspace(c) || c == ')')) { 529 /* we're done here */ 530 break; 531 } else if (chars > 0 && !is_symbol_char(c)) { 532 cursor->err = TE_SYM_CHAR; 533 cursor->err_data.c = c; 534 return 0; 535 } 536 537 chars++; 538 539 if (!ok) { 540 cursor->err = TE_SYM_OVERFLOW; 541 return 0; 542 } 543 } 544 545 if (!ok) { 546 cursor->err = TE_SYM_OVERFLOW; 547 return 0; 548 } 549 550 temp.c.p--; 551 copy_token_cursor(&temp, cursor); 552 553 return chars; 554 } 555 556 static void init_cell(struct cell *cell) 557 { 558 memset(cell, 0, sizeof(*cell)); 559 } 560 561 static int read_and_push_atom(struct token_cursor *cursor, struct token_cursor *tokens) 562 { 563 struct tok_str str; 564 u8 *start; 565 int ok, len; 566 567 ok = pull_symbol(cursor, &start); 568 if (ok) { 569 str.len = ok; 570 str.data = start; 571 if (!push_symbol(tokens, str)) { 572 cursor_print_around(&tokens->c, 10); 573 printf("read_and_push_atom identifier push overflow\n"); 574 return 0; 575 } 576 return 1; 577 } 578 579 ok = pull_string(cursor, &start, &len); 580 if (ok) { 581 str.len = len; 582 str.data = start; 583 ok = push_string(tokens, str); 584 if (!ok) { 585 printf("read_and_push_atom string push overflow\n"); 586 return 0; 587 } 588 return 1; 589 } 590 591 start = cursor->c.p; 592 if (pull_number(cursor)) { 593 str.len = cursor->c.p - start; 594 str.data = start; 595 if (!push_number(tokens, str)) { 596 printf("read_and_push_atom number push overflow\n"); 597 return 0; 598 } 599 return 1; 600 } 601 602 /* TODO: read_number */ 603 604 return 0; 605 } 606 607 static void consume_whitespace(struct cursor *cur) { 608 while (cur->p < cur->end) { 609 if (!isspace(*cur->p)) 610 break; 611 cur->p++; 612 } 613 } 614 615 static void consume_line(struct cursor *cur) { 616 int line_breaking = 0; 617 while (cur->p < cur->end) { 618 if (*cur->p == '\n' || *cur->p == '\r') { 619 line_breaking = 1; 620 } else if (line_breaking) { 621 break; 622 } 623 cur->p++; 624 } 625 } 626 627 static int consume_comment(struct cursor *cur) { 628 u8* start = cur->p; 629 if (!consume_byte(cur, ';')) 630 goto fail; 631 if (!consume_byte(cur, ';')) 632 goto fail; 633 consume_line(cur); 634 return 1; 635 636 fail: 637 cur->p = start; 638 return 0; 639 } 640 641 static int parse_hex_float(const char *str, int len, double *result) { 642 struct cursor _cur = {0}, *cur = NULL; 643 double significand = 0.0; 644 char *endptr; 645 int significand_digits = 0; 646 int fractional_digits = 0; 647 int found_period = 0; 648 int exponent = 0; 649 int sign = 1; 650 u8 c, *start; 651 652 cur = &_cur; 653 make_cursor((u8*)str, (u8*)str + len, cur); 654 start = cur->p; 655 656 if (!pull_byte(cur, &c)) 657 goto fail; 658 659 if (c == '-') { 660 sign = -1; 661 if (!pull_byte(cur, &c)) goto fail; 662 } else if (c == '+') { 663 if (!pull_byte(cur, &c)) goto fail; 664 } 665 666 if (c != '0') 667 goto fail; 668 669 if (!pull_byte(cur, &c)) 670 goto fail; 671 672 if (tolower(c) != 'x') 673 goto fail; 674 675 while (pull_byte(cur, &c) && (isxdigit(c) || c == '.')) { 676 if (c == '.') { 677 if (found_period) goto fail; 678 found_period = 1; 679 } else { 680 int digit_value = isdigit(c) ? c - '0' : 10 + tolower(c) - 'a'; 681 significand = significand * 16 + digit_value; 682 if (found_period) 683 fractional_digits++; 684 significand_digits++; 685 } 686 } 687 688 if (significand_digits == 0) 689 goto fail; 690 691 if (tolower(c) == 'p') { 692 if (!pull_byte(cur, &c)) 693 goto fail; 694 exponent = strtol((char *)cur->p - 1, &endptr, 10); 695 if ((char *)cur->p - 1 == endptr) 696 goto fail; 697 cur->p = (u8 *)endptr; 698 } else { 699 cur->p--; // Unpull the byte that is not 'p' 700 } 701 702 exponent -= 4 * fractional_digits; 703 *result = sign * ldexp(significand, exponent); 704 705 return 1; 706 707 fail: 708 cur->p = start; 709 return 0; 710 } 711 712 int tokenize_cells(u8 *buf, int buf_size, struct token_cursor *tokens) 713 { 714 enum tok_state state; 715 struct token_cursor cursor; 716 /* u8 *start = buf; */ 717 u8 *token_buf = tokens->c.p; 718 u8 c; 719 720 cursor.c.p = buf; 721 cursor.c.end = buf + buf_size; 722 723 state = TS_OPEN; 724 725 while (cursor.c.p < cursor.c.end) { 726 do { 727 consume_whitespace(&cursor.c); 728 } while (consume_comment(&cursor.c)); 729 730 if (!pull_byte(&cursor.c, &c)) 731 break; 732 733 if (state == TS_OPEN) { 734 if (isspace(c)) 735 continue; 736 737 if (c != '(') { 738 printf("expected '(' or whitespace\n"); 739 return 0; 740 } 741 742 push_token(tokens, T_OPEN); 743 state = TS_ATOM; 744 continue; 745 } 746 else if (state == TS_ATOM) { 747 if (isspace(c)) 748 continue; 749 750 if (c == '(') { 751 push_token(tokens, T_OPEN); 752 continue; 753 } 754 755 if (c == ')') { 756 push_token(tokens, T_CLOSE); 757 continue; 758 } 759 760 cursor.c.p--; 761 /* printf("\nat %c (%ld) before reading atom\n", *cursor.p, cursor.p - start); */ 762 if (!read_and_push_atom(&cursor, tokens)) { 763 print_token_error(&cursor); 764 return 0; 765 } 766 767 } 768 } 769 770 /* just seal the buffer now since we won't be adding to it */ 771 tokens->c.end = tokens->c.p; 772 tokens->c.p = token_buf; 773 774 return 1; 775 } 776 777 778 static int pull_token_data(struct cursor *tokens, union token *token, 779 enum token_type type) 780 { 781 int ok; 782 783 switch (type) { 784 case T_OPEN: 785 case T_CLOSE: 786 return 1; 787 case T_STRING: 788 case T_SYMBOL: 789 case T_NUMBER: 790 ok = cursor_pull(tokens, (void*)&token->str, 791 sizeof(struct tok_str)); 792 return ok; 793 } 794 795 return 0; 796 } 797 798 static int pull_token_type(struct cursor *cursor, enum token_type *type) 799 { 800 int ok; 801 u8 c; 802 803 ok = pull_byte(cursor, &c); 804 if (!ok) return 0; 805 806 *type = (enum token_type)c; 807 return 1; 808 } 809 810 static int pull_token(struct token_cursor *tokens, 811 union token *token, 812 enum token_type expected_type) 813 { 814 struct token_cursor temp; 815 enum token_type type; 816 int ok; 817 818 copy_token_cursor(tokens, &temp); 819 820 ok = pull_token_type(&temp.c, &type); 821 if (!ok) return 0; 822 823 if (type != expected_type) { 824 tokens->err = TE_UNEXPECTED_TOKEN; 825 tokens->err_data.lex.expected = expected_type; 826 tokens->err_data.lex.got = type; 827 tokens->err_data.pos = cursor_count(&temp.c, 1); 828 return 0; 829 } 830 831 ok = pull_token_data(&temp.c, token, type); 832 if (!ok) { 833 return 0; 834 } 835 836 copy_token_cursor(&temp, tokens); 837 838 return 1; 839 } 840 841 #ifdef DEBUG 842 /* 843 static void print_token_data(union token *token, enum token_type type) 844 { 845 switch (type) { 846 case T_OPEN: 847 printf("("); 848 return; 849 case T_CLOSE: 850 printf(")"); 851 return; 852 case T_NUMBER: 853 case T_SYMBOL: 854 printf("%.*s", token->str.len, token->str.data); 855 return; 856 case T_STRING: 857 printf("\"%.*s\"", token->str.len, token->str.data); 858 return; 859 } 860 } 861 862 863 static void print_current_token(struct token_cursor *tokens) 864 { 865 struct token_cursor temp; 866 enum token_type type; 867 union token token; 868 int ok; 869 870 copy_token_cursor(tokens, &temp); 871 872 ok = pull_token_type(&temp.c, &type); 873 if (!ok) { 874 printf("could not peek token\n"); 875 return; 876 } 877 878 printf("current token: %s ", token_type_str(type)); 879 880 ok = pull_token_data(&temp.c, &token, type); 881 if (!ok) { 882 printf("[could not peek token data]\n"); 883 return; 884 } 885 886 print_token_data(&token, type); 887 printf("\n"); 888 } 889 */ 890 #endif 891 892 /* 893 * PARSING 894 */ 895 896 897 static int parse_open(struct token_cursor *tokens) 898 { 899 return pull_token(tokens, NULL, T_OPEN); 900 } 901 902 static int parse_close(struct token_cursor *tokens) 903 { 904 return pull_token(tokens, NULL, T_CLOSE); 905 } 906 907 static int parse_stringy_token(struct token_cursor *tokens, 908 struct tok_str *str, 909 enum token_type type) 910 { 911 union token token; 912 913 if (!pull_token(tokens, &token, type)) 914 return 0; 915 916 str->data = token.str.data; 917 str->len = token.str.len; 918 919 return 1; 920 } 921 922 923 static int pull_symbol_token(struct token_cursor *tokens, struct tok_str *str) 924 { 925 return parse_stringy_token(tokens, str, T_SYMBOL); 926 } 927 928 static int pull_number_token(struct token_cursor *tokens, struct tok_str *str) 929 { 930 return parse_stringy_token(tokens, str, T_NUMBER); 931 } 932 933 static int parse_number(struct token_cursor *tokens, struct number *number) 934 { 935 int ok; 936 struct tok_str str; 937 char *end; 938 939 ok = pull_number_token(tokens, &str); 940 if (!ok) return 0; 941 942 if (parse_hex_float((const char*)str.data, str.len, &number->value.fdouble)) { 943 number->type = NUM_FLOAT; 944 } else { 945 number->type = NUM_INTEGER; 946 number->value.integer = strtol((char*)str.data, &end, 10); 947 948 if ((u8*)end != (str.data + str.len)) { 949 printf("parse_number failed\n"); 950 return 0; 951 } 952 } 953 954 return 1; 955 } 956 957 958 struct cell *get_cell(struct cursor *cells, int index) 959 { 960 return (struct cell*)index_cursor(cells, index, 961 sizeof(struct cell)); 962 } 963 964 965 static int symbol_eq(struct tok_str *a, const char *b, int b_len) 966 { 967 return memeq(a->data, a->len, (char*)b, b_len); 968 } 969 970 static int parse_symbol(struct token_cursor *tokens, const char *match) 971 { 972 struct tok_str str; 973 974 if (!pull_symbol_token(tokens, &str)) 975 return 0; 976 977 if (!symbol_eq(&str, match, strlen(match))) 978 return 0; 979 980 return 1; 981 } 982 983 static int parse_shape(struct token_cursor *tokens, struct attribute *attr) 984 { 985 struct token_cursor temp; 986 struct tok_str str; 987 int ok; 988 989 copy_token_cursor(tokens, &temp); 990 991 ok = parse_symbol(&temp, "shape"); 992 if (!ok) return 0; 993 994 attr->type = A_SHAPE; 995 996 ok = pull_symbol_token(&temp, &str); 997 if (!ok) return 0; 998 999 if (symbol_eq(&str, "rectangle", 9)) { 1000 attr->data.shape = SHAPE_RECTANGLE; 1001 } else if (symbol_eq(&str, "circle", 6)) { 1002 attr->data.shape = SHAPE_CIRCLE; 1003 } else if (symbol_eq(&str, "square", 6)) { 1004 attr->data.shape = SHAPE_SQUARE; 1005 } else { 1006 tokens->err = TE_UNEXPECTED_SYMBOL; 1007 tokens->err_data.symbol.expected.data = (u8*)"rectangle"; 1008 tokens->err_data.symbol.expected.len = 9; 1009 tokens->err_data.symbol.got.data = str.data; 1010 tokens->err_data.symbol.got.len = str.len; 1011 } 1012 1013 copy_token_cursor(&temp, tokens); 1014 1015 return 1; 1016 } 1017 1018 static int parse_str_attr(struct token_cursor *tokens, 1019 struct attribute *attr, 1020 const char *sym, 1021 enum attribute_type type, 1022 enum token_type tok_type) 1023 { 1024 struct token_cursor temp; 1025 struct tok_str str; 1026 struct bufstr *bufstr; 1027 1028 assert(tok_type == T_NUMBER || tok_type == T_SYMBOL || tok_type == T_STRING); 1029 1030 copy_token_cursor(tokens, &temp); 1031 1032 if (sym == NULL) { 1033 if (!pull_symbol_token(&temp, &str)) 1034 return 0; 1035 1036 attr->data.data_attr.sym.ptr = (char*)str.data; 1037 attr->data.data_attr.sym.len = str.len; 1038 1039 bufstr = &attr->data.data_attr.str; 1040 } else { 1041 if (!parse_symbol(&temp, sym)) 1042 return 0; 1043 bufstr = &attr->data.str; 1044 } 1045 1046 if (!parse_stringy_token(&temp, &str, tok_type)) 1047 return 0; 1048 1049 bufstr->ptr = (char*)str.data; 1050 bufstr->len = str.len; 1051 attr->type = type; 1052 1053 #ifdef DEBUG 1054 if (sym == NULL) 1055 tokdebug("attribute %.*s %.*s\n", 1056 attr->data.data_attr.sym.len, 1057 attr->data.data_attr.sym.ptr, 1058 str.len, 1059 str.data); 1060 else 1061 tokdebug("attribute %s %.*s\n", sym, str.len, str.data); 1062 #endif 1063 1064 copy_token_cursor(&temp, tokens); 1065 1066 return 1; 1067 } 1068 1069 static int parse_size(struct token_cursor *tokens, struct attribute *attr) 1070 { 1071 struct token_cursor temp; 1072 struct tok_str str; 1073 int ok; 1074 1075 copy_token_cursor(tokens, &temp); 1076 1077 ok = pull_symbol_token(&temp, &str); 1078 if (!ok) return 0; 1079 1080 if (symbol_eq(&str, "height", 6)) { 1081 attr->type = A_HEIGHT; 1082 } else if (symbol_eq(&str, "width", 5)) { 1083 attr->type = A_WIDTH; 1084 } else if (symbol_eq(&str, "depth", 5)) { 1085 attr->type = A_DEPTH; 1086 } 1087 1088 ok = parse_number(&temp, &attr->data.number); 1089 if (!ok) return 0; 1090 1091 #ifdef DEBUG 1092 if (attr->data.number.type == NUM_INTEGER) { 1093 tokdebug("attribute %s %d\n", 1094 attr_type_str(attr->type), 1095 attr->data.number.value.integer); 1096 } else if (attr->data.number.type == NUM_FLOAT) { 1097 tokdebug("attribute %s %f\n", 1098 attr_type_str(attr->type), 1099 attr->data.number.value.fdouble); 1100 } 1101 #endif 1102 1103 copy_token_cursor(&temp, tokens); 1104 1105 return 1; 1106 } 1107 1108 static struct bufstr *attr_bufstr(struct attribute *attr) 1109 { 1110 if (attr->type == A_DATA) 1111 return &attr->data.data_attr.str; 1112 return &attr->data.str; 1113 } 1114 1115 int cell_attr_str(struct cursor *attributes, struct cell *cell, 1116 const char** name, int *len, enum attribute_type type) 1117 { 1118 int i; 1119 struct attribute *attr; 1120 struct bufstr *bufstr; 1121 *len = 0; 1122 *name = ""; 1123 1124 for (i = 0; i < cell->n_attributes; i++) { 1125 attr = get_attr(attributes, cell->attributes[i]); 1126 if (attr->type == type) { 1127 bufstr = attr_bufstr(attr); 1128 *name = bufstr->ptr; 1129 *len = bufstr->len; 1130 return 1; 1131 } 1132 } 1133 1134 return 0; 1135 } 1136 1137 const char *cell_name(struct cursor *attributes, struct cell *cell, int *len) 1138 { 1139 const char *name = NULL; 1140 if (!cell_attr_str(attributes, cell, &name, len, A_NAME)) 1141 return NULL; 1142 return name; 1143 } 1144 1145 static int parse_attribute(struct token_cursor *tokens, struct attribute *attr) 1146 { 1147 int ok; 1148 struct token_cursor temp; 1149 1150 copy_token_cursor(tokens, &temp); 1151 1152 ok = parse_open(&temp); 1153 if (!ok) return 0; 1154 1155 ok = parse_shape(&temp, attr); 1156 if (ok) goto close; 1157 1158 ok = parse_str_attr(&temp, attr, "id", A_ID, T_SYMBOL); 1159 if (ok) goto close; 1160 1161 ok = parse_str_attr(&temp, attr, "name", A_NAME, T_STRING); 1162 if (ok) { 1163 goto close; 1164 } 1165 1166 ok = parse_str_attr(&temp, attr, "material", A_MATERIAL, T_STRING); 1167 if (ok) goto close; 1168 1169 ok = parse_str_attr(&temp, attr, "color", A_COLOR, T_STRING); 1170 if (ok) goto close; 1171 1172 /* TODO: parse multiple conditions */ 1173 ok = parse_str_attr(&temp, attr, "condition", A_CONDITION, T_STRING); 1174 if (ok) goto close; 1175 1176 ok = parse_str_attr(&temp, attr, "location", A_LOCATION, T_SYMBOL); 1177 if (ok) goto close; 1178 1179 ok = parse_str_attr(&temp, attr, "state", A_STATE, T_SYMBOL); 1180 if (ok) goto close; 1181 1182 ok = parse_str_attr(&temp, attr, NULL, A_DATA, T_STRING); 1183 if (ok) goto close; 1184 1185 ok = parse_size(&temp, attr); 1186 if (ok) goto close; 1187 1188 return 0; 1189 close: 1190 ok = parse_close(&temp); 1191 if (!ok) return 0; 1192 1193 copy_token_cursor(&temp, tokens); 1194 1195 return 1; 1196 } 1197 1198 static int parse_attributes(struct token_cursor *tokens, 1199 struct cursor *attributes, 1200 int attr_inds[2]) 1201 { 1202 int ok; 1203 int index = -1; 1204 int first = 1; 1205 int parsed = 1; 1206 struct attribute attr; 1207 1208 1209 while (1) { 1210 ok = parse_attribute(tokens, &attr); 1211 if (!ok) break; 1212 1213 index = cursor_count(attributes, sizeof(attr)); 1214 ok = cursor_push(attributes, (u8*)&attr, sizeof(attr)); 1215 1216 if (!ok) { 1217 printf("attribute data overflow\n"); 1218 return 0; 1219 } 1220 1221 parsed++; 1222 1223 if (first) { 1224 first = 0; 1225 attr_inds[0] = index; 1226 } 1227 } 1228 1229 attr_inds[1] = index; 1230 1231 return parsed; 1232 } 1233 1234 /* 1235 static int parse_group(struct cursor *tokens) 1236 { 1237 int ok; 1238 struct tok_str str; 1239 1240 ok = pull_symbol_token(tokens, &str); 1241 if (!ok) return 0; 1242 1243 if (!memeq(str.data, str.len, "group", 5)) 1244 return 0; 1245 1246 parse_attributes(&tokens) 1247 } 1248 */ 1249 1250 1251 static int push_cell(struct cursor *cells, struct cell *cell, int *cell_index) 1252 { 1253 int index; 1254 int ok; 1255 1256 index = cursor_count(cells, sizeof(*cell)); 1257 1258 tokdebug("push_cell %d (%zu) %s\n", index, cells->p - cells->start, cell_type_str(cell->type)); 1259 1260 if (index > 0xFFFF) { 1261 /* TODO: actual error message here */ 1262 printf("push_cell_child overflow\n"); 1263 return 0; 1264 } 1265 1266 ok = cursor_push(cells, (u8*)cell, sizeof(*cell)); 1267 if (!ok) return 0; 1268 1269 if (cell_index) 1270 *cell_index = index; 1271 1272 return 1; 1273 } 1274 1275 static void copy_parser(struct parser *from, struct parser *to) 1276 { 1277 copy_token_cursor(&from->tokens, &to->tokens); 1278 copy_cursor(&from->cells, &to->cells); 1279 copy_cursor(&from->attributes, &to->attributes); 1280 } 1281 1282 static int push_cell_child(struct cell *parent, int child_ind) 1283 { 1284 int ok; 1285 struct cursor child_inds; 1286 1287 make_cursor((u8*)parent->children, 1288 (u8*)parent->children + sizeof(parent->children), 1289 &child_inds); 1290 1291 child_inds.p += parent->n_children * sizeof(parent->children[0]); 1292 1293 ok = cursor_push_u16(&child_inds, child_ind); 1294 if (!ok) return 0; 1295 1296 parent->n_children++; 1297 1298 return 1; 1299 } 1300 1301 const char *object_type_str(enum object_type type) 1302 { 1303 switch (type) { 1304 case O_DOOR: return "door"; 1305 case O_TABLE: return "table"; 1306 case O_CHAIR: return "chair"; 1307 case O_LIGHT: return "light"; 1308 case O_OBJECT: return ""; 1309 } 1310 1311 return "unknown"; 1312 } 1313 1314 const char *cell_type_str(enum cell_type type) 1315 { 1316 switch (type) { 1317 case C_GROUP: return "group"; 1318 case C_SPACE: return "space"; 1319 case C_OBJECT: return "object"; 1320 case C_ROOM: return "room"; 1321 } 1322 1323 return "unknown"; 1324 } 1325 1326 static int parse_cell_attrs(struct parser *parser, int *index, struct cell *cell) 1327 { 1328 struct cursor cell_attr_inds; 1329 struct cell *child_cell; 1330 int child_cell_index; 1331 int attr_inds[2] = {0}; 1332 int i, ok; 1333 1334 make_cursor((u8*)cell->attributes, 1335 (u8*)cell->attributes + sizeof(cell->attributes), 1336 &cell_attr_inds); 1337 1338 cell_attr_inds.p += cell->n_attributes * sizeof(cell->attributes[0]); 1339 1340 /* 0 attributes returns 1, 1 attrs returns 2, etc 1341 0 is a real error, an attribute push overflow */ 1342 ok = parse_attributes(&parser->tokens, &parser->attributes, attr_inds); 1343 if (!ok) return 0; 1344 1345 tokdebug("parse_attributes %d\n", ok); 1346 1347 for (i = attr_inds[0]; i <= attr_inds[1]; i++) { 1348 ok = cursor_push_u16(&cell_attr_inds, i); 1349 if (!ok) return 0; 1350 cell->n_attributes++; 1351 } 1352 1353 /* Optional child cell */ 1354 tokdebug("optional child cell in parse_cell_attrs\n"); 1355 ok = parse_cell(parser, &child_cell_index); 1356 if (ok) { 1357 if (!(child_cell = get_cell(&parser->cells, child_cell_index))) 1358 return 0; 1359 tokdebug("parse_cell_attrs push child cell\n"); 1360 if (!push_cell_child(cell, child_cell_index)) 1361 return 0; 1362 1363 } 1364 else { 1365 tokdebug("no child cells found\n"); 1366 } 1367 1368 ok = push_cell(&parser->cells, cell, index); 1369 if (!ok) return 0; 1370 1371 return 1; 1372 } 1373 1374 static int parse_cell_by_name(struct parser *parser, 1375 int *index, 1376 const char *name, 1377 enum cell_type type) 1378 { 1379 int ok; 1380 struct parser backtracked; 1381 struct cell cell; 1382 int ind; 1383 1384 init_cell(&cell); 1385 1386 copy_parser(parser, &backtracked); 1387 1388 cell.type = type; 1389 1390 ok = parse_symbol(&backtracked.tokens, name); 1391 if (!ok) return 0; 1392 1393 ok = parse_cell_attrs(&backtracked, &ind, &cell); 1394 if (!ok) return 0; 1395 1396 if (index) 1397 *index = ind; 1398 1399 copy_parser(&backtracked, parser); 1400 1401 return 1; 1402 } 1403 1404 static int parse_space(struct parser *parser, int *index) 1405 { 1406 return parse_cell_by_name(parser, index, "space", C_SPACE); 1407 } 1408 1409 static int parse_room(struct parser *parser, int *index) 1410 { 1411 return parse_cell_by_name(parser, index, "room", C_ROOM); 1412 } 1413 1414 static int parse_group(struct parser *parser, int *index) 1415 { 1416 int ncells = 0; 1417 int child_ind; 1418 1419 struct parser backtracked; 1420 struct cell group; 1421 struct cell *child_cell; 1422 1423 init_cell(&group); 1424 1425 copy_parser(parser, &backtracked); 1426 1427 if (!parse_symbol(&backtracked.tokens, "group")) 1428 return 0; 1429 1430 while (1) { 1431 if (!parse_cell(&backtracked, &child_ind)) 1432 break; 1433 1434 child_cell = get_cell(&backtracked.cells, child_ind); 1435 if (child_cell == NULL) { 1436 printf("UNUSUAL: group get_cell was NULL\n"); 1437 return 0; 1438 } 1439 1440 tokdebug("group child cell type %s\n", cell_type_str(child_cell->type)); 1441 if (!push_cell_child(&group, child_ind)) 1442 return 0; 1443 1444 ncells++; 1445 } 1446 1447 tokdebug("parse_group cells %d\n", ncells); 1448 1449 if (ncells == 0) 1450 return 0; 1451 1452 group.type = C_GROUP; 1453 1454 if (!push_cell(&backtracked.cells, &group, index)) 1455 return 0; 1456 1457 copy_parser(&backtracked, parser); 1458 1459 return ncells; 1460 } 1461 1462 struct object_def { 1463 const char *name; 1464 enum object_type type; 1465 }; 1466 1467 static struct object_def object_defs[] = { 1468 {"table", O_TABLE}, 1469 {"chair", O_CHAIR}, 1470 {"door", O_DOOR}, 1471 {"light", O_LIGHT}, 1472 {"obj", O_OBJECT}, 1473 }; 1474 1475 static int parse_object(struct parser *parser, int *index) 1476 { 1477 int i; 1478 struct parser backtracked; 1479 struct tok_str str; 1480 struct object_def *def; 1481 struct cell cell; 1482 int ind; 1483 1484 init_cell(&cell); 1485 cell.type = C_OBJECT; 1486 1487 copy_parser(parser, &backtracked); 1488 1489 if (!pull_symbol_token(&backtracked.tokens, &str)) 1490 return 0; 1491 1492 for (i = 0; i < ARRAY_SIZE(object_defs); i++) { 1493 def = &object_defs[i]; 1494 1495 if (symbol_eq(&str, def->name, strlen(def->name))) { 1496 cell.obj_type = def->type; 1497 break; 1498 } 1499 } 1500 1501 if (!parse_cell_attrs(&backtracked, &ind, &cell)) 1502 return 0; 1503 1504 if (index) 1505 *index = ind; 1506 1507 copy_parser(&backtracked, parser); 1508 1509 return 1; 1510 } 1511 1512 int parse_cell(struct parser *parser, int *index) 1513 { 1514 int ok; 1515 struct parser backtracked; 1516 1517 /* mostly needed for parse_open and parse_close */ 1518 copy_parser(parser, &backtracked); 1519 1520 ok = parse_open(&backtracked.tokens); 1521 if (!ok) { 1522 tokdebug("parse_open failed in parse_cell\n"); 1523 return 0; 1524 } 1525 1526 ok = parse_group(&backtracked, index); 1527 if (ok) { 1528 tokdebug("got parse_group\n"); 1529 goto close; 1530 } 1531 1532 /* print_current_token(&backtracked.tokens); */ 1533 1534 ok = parse_room(&backtracked, index); 1535 if (ok) { 1536 tokdebug("got parse_room\n"); 1537 goto close; 1538 } 1539 1540 ok = parse_space(&backtracked, index); 1541 if (ok) { 1542 tokdebug("got parse_space\n"); 1543 goto close; 1544 } 1545 1546 ok = parse_object(&backtracked, index); 1547 if (ok) goto close; 1548 1549 return 0; 1550 close: 1551 ok = parse_close(&backtracked.tokens); 1552 if (!ok) return 0; 1553 1554 copy_parser(&backtracked, parser); 1555 1556 return 1; 1557 } 1558 1559 1560 int init_parser(struct parser *parser) 1561 { 1562 struct cursor mem; 1563 u8 *pmem; 1564 int ok; 1565 1566 int attrs_size = sizeof(struct attribute) * 1024; 1567 int tokens_size = 2048*256; 1568 int cells_size = sizeof(struct cell) * 1024; 1569 int memsize = attrs_size + tokens_size + cells_size; 1570 1571 if (!(pmem = calloc(1, memsize))) { 1572 return 0; 1573 } 1574 1575 make_cursor(pmem, pmem + memsize, &mem); 1576 1577 ok = 1578 cursor_slice(&mem, &parser->cells, cells_size) && 1579 cursor_slice(&mem, &parser->attributes, attrs_size) && 1580 cursor_slice(&mem, &parser->tokens.c, tokens_size); 1581 assert(ok); 1582 1583 init_token_cursor(&parser->tokens); 1584 1585 return 1; 1586 } 1587 1588 int free_parser(struct parser *parser) 1589 { 1590 free(parser->cells.start); 1591 1592 return 1; 1593 } 1594 1595 int parse_buffer(struct parser *parser, u8 *file_buf, int len, int *root) 1596 { 1597 int ok; 1598 1599 ok = tokenize_cells(file_buf, len, &parser->tokens); 1600 1601 if (!ok) { 1602 printf("failed to tokenize\n"); 1603 return 0; 1604 } 1605 1606 ok = parse_cell(parser, root); 1607 if (!ok) { 1608 print_token_error(&parser->tokens); 1609 return 0; 1610 } 1611 1612 return 1; 1613 } 1614 1615 1616 int parse_file(struct parser *parser, const char *filename, int *root, u8 *buf, 1617 u32 bufsize) 1618 { 1619 int count, ok; 1620 1621 ok = read_file(filename, buf, bufsize, &count); 1622 1623 if (!ok) { 1624 printf("failed to load '%s'\n", filename); 1625 return 0; 1626 } 1627 1628 ok = parse_buffer(parser, buf, count, root); 1629 return ok; 1630 } 1631