protoverse

A metaverse protocol
git clone git://jb55.com/protoverse
Log | Files | Refs | README | LICENSE

parse.c (28062B)


      1 
      2 #include "util.h"
      3 #include "parse.h"
      4 #include "io.h"
      5 
      6 #include <stdio.h>
      7 #include <string.h>
      8 #include <ctype.h>
      9 #include <stdlib.h>
     10 #include <assert.h>
     11 
     12 #ifdef DEBUG
     13 #define tokdebug printf
     14 #else
     15 #define tokdebug(...)
     16 #define print_spaces(...)
     17 #endif
     18 
     19 enum known_symbol {
     20 	S_OBJECT,
     21 	S_SPACE,
     22 	S_OBJECTS,
     23 	S_NAME,
     24 	S_TYPE,
     25 	S_SHAPE,
     26 	S_WIDTH,
     27 	S_DEPTH,
     28 	S_HEIGHT,
     29 	S_CONDITION,
     30 	S_LOCATION,
     31 	S_MATERIAL,
     32 };
     33 
     34 enum tok_state {
     35 	TS_OPEN,
     36 	TS_CLOSE,
     37 	TS_ATOM,
     38 };
     39 
     40 union token {
     41 	struct tok_str str;
     42 };
     43 
     44 
     45 #ifdef DEBUG
     46 #endif
     47 static const char *attr_type_str(enum attribute_type type)
     48 {
     49 	switch (type) {
     50 	case A_CONDITION: return "condition";
     51 	case A_DEPTH: return "depth";
     52 	case A_HEIGHT: return "height";
     53 	case A_ID: return "id";
     54 	case A_LOCATION: return "location";
     55 	case A_MATERIAL: return "material";
     56 	case A_COLOR: return "color";
     57 	case A_NAME: return "name";
     58 	case A_SHAPE: return "shape";
     59 	case A_TYPE: return "type";
     60 	case A_WIDTH: return "width";
     61 	case A_STATE: return "state";
     62 	case A_DATA: return "data";
     63 	}
     64 
     65 	return "unknown";
     66 }
     67 
     68 static const char *token_error_string(enum token_error err)
     69 {
     70 	switch (err) {
     71 	case TE_OK: return "all good";
     72 	case TE_STR_START_CHAR: return "string didn't start with \"";
     73 	case TE_SYM_START_CHAR: return "symbol didn't start with a-z";
     74 	case TE_NUM_START_CHAR: return "number didn't start with 0-9 or -";
     75 	case TE_SYM_CHAR: return "invalid symbol character";
     76 	case TE_NUM_CHAR: return "invalid number character";
     77 	case TE_SYM_OVERFLOW: return "symbol push overflow";
     78 	case TE_UNEXPECTED_TOKEN: return "unexpected token during parsing";
     79 	case TE_UNEXPECTED_SYMBOL: return "unexpected symbol during parsing";
     80 	}
     81 
     82 	return "unknown";
     83 }
     84 
     85 static void copy_token_cursor(struct token_cursor *src, struct token_cursor *dest)
     86 {
     87 	copy_cursor(&src->c, &dest->c);
     88 	dest->err = src->err;
     89 	memcpy(&dest->err_data, &src->err_data, sizeof(src->err_data));
     90 }
     91 
     92 static void init_token_cursor(struct token_cursor *cursor)
     93 {
     94 	cursor->err = TE_OK;
     95 	memset(&cursor->err_data, 0, sizeof(cursor->err_data));
     96 }
     97 
     98 struct attribute *get_attr(struct cursor *attributes, int index)
     99 {
    100 	return (struct attribute*)index_cursor(attributes, index,
    101 					       sizeof(struct attribute));
    102 }
    103 
    104 /*
    105 */
    106 static const char *shape_str(enum shape shape)
    107 {
    108 	switch (shape) {
    109 	case SHAPE_RECTANGLE: return "rectangle";
    110 	case SHAPE_CIRCLE: return "circle";
    111 	case SHAPE_SQUARE: return "square";
    112 	}
    113 
    114 	return "unknown";
    115 }
    116 
    117 
    118 static void print_attribute(struct attribute *attr)
    119 {
    120 	printf("%s ", attr_type_str(attr->type));
    121 
    122 	switch (attr->type) {
    123 	case A_NAME:
    124 		printf("%.*s ", attr->data.str.len, attr->data.str.ptr);
    125 	        break;
    126 	case A_SHAPE:
    127 		printf("%s ", shape_str(attr->data.shape));
    128 		break;
    129 	default:
    130 		break;
    131 	}
    132 
    133 }
    134 
    135 static void print_attributes(struct cursor *attributes, struct cell *cell)
    136 {
    137 	int i;
    138 	struct attribute *attr;
    139 
    140 	printf("%d attrs: ", cell->n_attributes);
    141 	for (i = 0; i < cell->n_attributes; i++) {
    142 		attr = get_attr(attributes, cell->attributes[i]);
    143 		assert(attr);
    144 		printf("[%d]", cell->attributes[i]);
    145 		print_attribute(attr);
    146 	}
    147 }
    148 
    149 void print_cell(struct cursor *attributes, struct cell *cell)
    150 {
    151 	const char *name;
    152 	int name_len;
    153 
    154 	if (cell->type == C_GROUP) {
    155 		printf("---\n");
    156 		return;
    157 	}
    158 
    159 	cell_attr_str(attributes, cell, &name, &name_len, A_NAME);
    160 
    161 	printf("%.*s%s%s ", name_len, name, name_len > 0?" ":"",
    162 	       cell->type == C_OBJECT
    163 	       ? object_type_str(cell->obj_type)
    164 	       : cell_type_str(cell->type));
    165 
    166 	//print_attributes(attributes, cell);
    167 }
    168 
    169 
    170 static const char *token_type_str(enum token_type type)
    171 {
    172 	switch (type) {
    173 	case T_OPEN: return "(";
    174 	case T_CLOSE: return ")";
    175 	case T_SYMBOL: return "symbol";
    176 	case T_STRING: return "string";
    177 	case T_NUMBER: return "number";
    178 	}
    179 
    180 	printf("UNUSUAL: unknown token_type %d\n", type);
    181 
    182 	return "unknown";
    183 }
    184 
    185 void print_token_error(struct token_cursor *cursor)
    186 {
    187 	printf("error [%d]: ", cursor->err_data.pos);
    188 	if (cursor->err == TE_UNEXPECTED_TOKEN) {
    189 		printf("%s: expected '%s' got '%s'\n",
    190 		       token_error_string(cursor->err),
    191 		       token_type_str(cursor->err_data.lex.expected),
    192 		       token_type_str(cursor->err_data.lex.got));
    193 	}
    194 	else if (cursor->err == TE_UNEXPECTED_SYMBOL) {
    195 		printf("%s: expected symbol '%.*s' got '%.*s'\n",
    196 		       token_error_string(cursor->err),
    197 		       cursor->err_data.symbol.expected.len,
    198 		       cursor->err_data.symbol.expected.data,
    199 		       cursor->err_data.symbol.got.len,
    200 		       cursor->err_data.symbol.got.data);
    201 	}
    202 	else {
    203 		int is_chr_data = cursor->err == TE_STR_START_CHAR ||
    204 			cursor->err == TE_SYM_START_CHAR ||
    205 			cursor->err == TE_NUM_START_CHAR ||
    206 			cursor->err == TE_NUM_CHAR ||
    207 			cursor->err == TE_SYM_CHAR;
    208 
    209 		printf("\nerror: %s %.*s\n", token_error_string(cursor->err),
    210 		       is_chr_data?1:0, (char*)&cursor->err_data.c);
    211 	}
    212 
    213 }
    214 
    215 
    216 #ifdef DEBUG
    217 static void print_spaces(int n)
    218 {
    219 	int i;
    220 	for (i = 0; i < n; i++)
    221 		putchar(' ');
    222 }
    223 #endif
    224 
    225 static int push_token_data(struct token_cursor *tokens,
    226 			   enum token_type token,
    227 			   void *token_data, int token_data_size)
    228 {
    229 	static int depth = 0;
    230 #ifdef DEBUG
    231 	struct tok_str *str;
    232 #endif
    233 	if (!cursor_push_byte(&tokens->c, token))
    234 		return 0;
    235 
    236 	switch (token) {
    237 
    238 	case T_OPEN:
    239 		depth++;
    240 		break;
    241 
    242 	case T_CLOSE:
    243 		depth--;
    244 		break; /* nothing to write after these tokens */
    245 
    246 	case T_NUMBER:
    247 	case T_SYMBOL:
    248 
    249 		/* fallthrough */
    250 	case T_STRING:
    251 #ifdef DEBUG
    252 		print_spaces(depth*2);
    253 		str = (struct tok_str*)token_data;
    254 		if (token == T_STRING) {
    255 			tokdebug("str \"%.*s\"", str->len, str->data);
    256 		} else if (token == T_NUMBER) {
    257 			tokdebug("num %.*s", str->len, str->data);
    258 		} else {
    259 			tokdebug("sym %.*s", str->len, str->data);
    260 		}
    261 #endif
    262 		if (!cursor_push(&tokens->c, token_data, token_data_size)) {
    263 			printf("hmm? %d\n", token_data_size);
    264 			cursor_print_around(&tokens->c, 10);
    265 			return 0;
    266 		}
    267 #ifdef DEBUG
    268 		printf("\n");
    269 #endif
    270 		break;
    271 	}
    272 
    273 
    274 	return 1;
    275 }
    276 
    277 
    278 static int push_token(struct token_cursor *tokens, enum token_type token)
    279 {
    280 	return push_token_data(tokens, token, NULL, 0);
    281 }
    282 
    283 static int push_symbol(struct token_cursor *tokens, struct tok_str symbol)
    284 {
    285 	return push_token_data(tokens, T_SYMBOL, &symbol, sizeof(symbol));
    286 }
    287 
    288 static int push_string(struct token_cursor *tokens, struct tok_str str)
    289 {
    290 	return push_token_data(tokens, T_STRING, &str, sizeof(str));
    291 }
    292 
    293 static int push_number(struct token_cursor *tokens, struct tok_str str)
    294 {
    295 	return push_token_data(tokens, T_NUMBER, &str, sizeof(str));
    296 }
    297 
    298 
    299 static int is_start_symbol_char(char c)
    300 {
    301 	return c >= 'a' && c <= 'z';
    302 }
    303 
    304 static int is_symbol_char(char c)
    305 {
    306 	return is_start_symbol_char(c) || c == '-' || c == '_' ||
    307 		(c >= '0' && c <= '9');
    308 }
    309 
    310 static int pull_escaped_char(struct cursor *cursor, u8 *c)
    311 {
    312 	int ok;
    313 
    314 	ok = pull_byte(cursor, c);
    315 	if (!ok)
    316 		return 0;
    317 
    318 	if (*c != '\\') {
    319 		return 1;
    320 	}
    321 
    322 	ok = pull_byte(cursor, c);
    323 
    324 	/* we saw an escape char but input ended!? */
    325 	if (!ok) {
    326 		return 0;
    327 	}
    328 
    329 	return 2;
    330 }
    331 
    332 static int pull_number(struct token_cursor *cursor, u8 **start)
    333 {
    334 	int ok = 1;
    335 	int chars = 0;
    336 	u8 c;
    337 
    338 	struct cursor temp;
    339 
    340 	*start = temp.p = cursor->c.p;
    341 	temp.end = cursor->c.end;
    342 
    343 	while (1) {
    344 		ok = pull_byte(&temp, &c);
    345 		if (!ok) return 0;
    346 
    347 		/* first char should start with a letter */
    348 		if (chars == 0 && !isdigit(c) && c != '-') {
    349 			cursor->err = TE_NUM_START_CHAR;
    350 			cursor->err_data.c = c;
    351 			return 0;
    352 		} else if (chars > 0 && (isspace(c) || c == ')')) {
    353 			/* we got a number */
    354 			break;
    355 		} else if (chars > 0 && !isdigit(c) && c != '.') {
    356 			cursor->err = TE_NUM_CHAR;
    357 			cursor->err_data.c = c;
    358 			return 0;
    359 		}
    360 
    361 		chars++;
    362 
    363 		if (!ok) {
    364 			cursor->err = TE_SYM_OVERFLOW;
    365 			return 0;
    366 		}
    367 	}
    368 
    369 	if (!ok) {
    370 		cursor->err = TE_SYM_OVERFLOW;
    371 		return 0;
    372 	}
    373 
    374 	cursor->c.p = temp.p-1;
    375 	cursor->err = TE_OK;
    376 
    377 	/* remove the first counted quote since this was not pushed */
    378 	return chars;
    379 }
    380 
    381 static int pull_string(struct token_cursor *cursor, u8 **start, int *len)
    382 {
    383 	int ok = 1;
    384 	int chars = 0;
    385 	u8 c;
    386 
    387 	struct token_cursor temp;
    388 
    389 	copy_token_cursor(cursor, &temp);
    390 
    391 	while (1) {
    392 		if (chars == 0)
    393 			*start = temp.c.p+1;
    394 
    395 		ok = pull_escaped_char(&temp.c, &c);
    396 		if (!ok) return 0;
    397 
    398 
    399 		/* first char should start with a letter */
    400 		if (chars == 0 && c != '"') {
    401 			cursor->err = TE_STR_START_CHAR;
    402 			cursor->err_data.c = c;
    403 			return 0;
    404 		} else if (chars == 0 && c == '"') {
    405 			/* this increment will get removed at end */
    406 			chars++;
    407 			continue;
    408 		} else if (chars > 0 && c == '"' && ok == 1) {
    409 			/* ok == 2 would mean that it was escaped, so
    410 			   we're done here */
    411 			break;
    412 		}
    413 
    414 		if (!ok) {
    415 			cursor->err = TE_SYM_OVERFLOW;
    416 			return 0;
    417 		}
    418 	}
    419 
    420 	if (!ok) {
    421 		cursor->err = TE_SYM_OVERFLOW;
    422 		return 0;
    423 	}
    424 
    425 	copy_token_cursor(&temp, cursor);
    426 
    427 	/* remove the first counted quote since this was not pushed */
    428 	*len = temp.c.p - *start - 1;
    429 	return 1;
    430 }
    431 
    432 static int pull_symbol(struct token_cursor *cursor, u8 **start)
    433 {
    434 	int ok = 1;
    435 	int chars = 0;
    436 	u8 c;
    437 
    438 	struct token_cursor temp;
    439 
    440 	copy_token_cursor(cursor, &temp);
    441 
    442 	*start = temp.c.p;
    443 
    444 	while (1) {
    445 		ok = pull_byte(&temp.c, &c);
    446 		if (!ok) return 0;
    447 
    448 		/* first char should start with a letter */
    449 		if (chars == 0 && !is_start_symbol_char(c)) {
    450 			cursor->err = TE_SYM_START_CHAR;
    451 			cursor->err_data.c = c;
    452 			return 0;
    453 		} else if (chars > 0 && (isspace(c) || c == ')')) {
    454 			/* we're done here */
    455 			break;
    456 		} else if (chars > 0 && !is_symbol_char(c)) {
    457 			cursor->err = TE_SYM_CHAR;
    458 			cursor->err_data.c = c;
    459 			return 0;
    460 		}
    461 
    462 		chars++;
    463 
    464 		if (!ok) {
    465 			cursor->err = TE_SYM_OVERFLOW;
    466 			return 0;
    467 		}
    468 	}
    469 
    470 	if (!ok) {
    471 		cursor->err = TE_SYM_OVERFLOW;
    472 		return 0;
    473 	}
    474 
    475 	temp.c.p--;
    476 	copy_token_cursor(&temp, cursor);
    477 
    478 	return chars;
    479 }
    480 
    481 static void init_cell(struct cell *cell)
    482 {
    483 	memset(cell, 0, sizeof(*cell));
    484 }
    485 
    486 static int read_and_push_atom(struct token_cursor *cursor, struct token_cursor *tokens)
    487 {
    488 	struct tok_str str;
    489 	u8 *start;
    490 	int ok, len;
    491 
    492 	ok = pull_symbol(cursor, &start);
    493 	if (ok) {
    494 		str.len  = ok;
    495 		str.data = start;
    496 		if (!push_symbol(tokens, str)) {
    497 			cursor_print_around(&tokens->c, 10);
    498 			printf("read_and_push_atom identifier push overflow\n");
    499 			return 0;
    500 		}
    501 		return 1;
    502 	}
    503 
    504 	ok = pull_string(cursor, &start, &len);
    505 	if (ok) {
    506 		str.len  = len;
    507 		str.data = start;
    508 		ok = push_string(tokens, str);
    509 		if (!ok) {
    510 			printf("read_and_push_atom string push overflow\n");
    511 			return 0;
    512 		}
    513 		return 1;
    514 	}
    515 
    516 	start = cursor->c.p;
    517 	ok = pull_number(cursor, &start);
    518 	if (ok) {
    519 		str.len  = ok;
    520 		str.data = start;
    521 		ok = push_number(tokens, str);
    522 		if (!ok) {
    523 			printf("read_and_push_atom number push overflow\n");
    524 			return 0;
    525 		}
    526 		return 1;
    527 	}
    528 
    529 	/* TODO: read_number */
    530 
    531 	return 0;
    532 }
    533 
    534 int tokenize_cells(u8 *buf, int buf_size, struct token_cursor *tokens)
    535 {
    536 	enum tok_state state;
    537 	struct token_cursor cursor;
    538 	/* u8 *start = buf; */
    539 	u8 *token_buf = tokens->c.p;
    540 	u8 c;
    541 	int ok;
    542 
    543 	cursor.c.p = buf;
    544 	cursor.c.end = buf + buf_size;
    545 
    546 	state = TS_OPEN;
    547 
    548 	while (cursor.c.p < cursor.c.end) {
    549 		ok = pull_byte(&cursor.c, &c);
    550 		if (!ok) break;
    551 
    552 		if (state == TS_OPEN) {
    553 			if (isspace(c))
    554 				continue;
    555 
    556 			if (c != '(') {
    557 				printf("expected '(' or whitespace\n");
    558 				return 0;
    559 			}
    560 
    561 			push_token(tokens, T_OPEN);
    562 			state = TS_ATOM;
    563 			continue;
    564 		}
    565 		else if (state == TS_ATOM) {
    566 			if (isspace(c))
    567 				continue;
    568 
    569 			if (c == '(') {
    570 				push_token(tokens, T_OPEN);
    571 				continue;
    572 			}
    573 
    574 			if (c == ')') {
    575 				push_token(tokens, T_CLOSE);
    576 				continue;
    577 			}
    578 
    579 			cursor.c.p--;
    580 			/* printf("\nat %c (%ld) before reading atom\n", *cursor.p, cursor.p - start); */
    581 			ok = read_and_push_atom(&cursor, tokens);
    582 			if (!ok) {
    583 				print_token_error(&cursor);
    584 				return 0;
    585 			}
    586 
    587 		}
    588 	}
    589 
    590 	/* just seal the buffer now since we won't be adding to it */
    591 	tokens->c.end = tokens->c.p;
    592 	tokens->c.p = token_buf;
    593 
    594 	return 1;
    595 }
    596 
    597 
    598 static int pull_token_data(struct cursor *tokens, union token *token,
    599 			   enum token_type type)
    600 {
    601 	int ok;
    602 
    603 	switch (type) {
    604 	case T_OPEN:
    605 	case T_CLOSE:
    606 		return 1;
    607 	case T_STRING:
    608 	case T_SYMBOL:
    609 	case T_NUMBER:
    610 		ok = cursor_pull(tokens, (void*)&token->str,
    611 				 sizeof(struct tok_str));
    612 		return ok;
    613 	}
    614 
    615 	return 0;
    616 }
    617 
    618 static int pull_token_type(struct cursor *cursor, enum token_type *type)
    619 {
    620 	int ok;
    621 	u8 c;
    622 
    623 	ok = pull_byte(cursor, &c);
    624 	if (!ok) return 0;
    625 
    626 	*type = (enum token_type)c;
    627 	return 1;
    628 }
    629 
    630 static int pull_token(struct token_cursor *tokens,
    631 		      union token *token,
    632 		      enum token_type expected_type)
    633 {
    634 	struct token_cursor temp;
    635 	enum token_type type;
    636 	int ok;
    637 
    638 	copy_token_cursor(tokens, &temp);
    639 
    640 	ok = pull_token_type(&temp.c, &type);
    641 	if (!ok) return 0;
    642 
    643 	if (type != expected_type) {
    644 		tokens->err = TE_UNEXPECTED_TOKEN;
    645 		tokens->err_data.lex.expected = expected_type;
    646 		tokens->err_data.lex.got = type;
    647 		tokens->err_data.pos = cursor_count(&temp.c, 1);
    648 		return 0;
    649 	}
    650 
    651 	ok = pull_token_data(&temp.c, token, type);
    652 	if (!ok) {
    653 		return 0;
    654 	}
    655 
    656 	copy_token_cursor(&temp, tokens);
    657 
    658 	return 1;
    659 }
    660 
    661 #ifdef DEBUG
    662 /*
    663 static void print_token_data(union token *token, enum token_type type)
    664 {
    665 	switch (type) {
    666 	case T_OPEN:
    667 		printf("(");
    668 		return;
    669 	case T_CLOSE:
    670 		printf(")");
    671 		return;
    672 	case T_NUMBER:
    673 	case T_SYMBOL:
    674 		printf("%.*s", token->str.len, token->str.data);
    675 		return;
    676 	case T_STRING:
    677 		printf("\"%.*s\"", token->str.len, token->str.data);
    678 		return;
    679 	}
    680 }
    681 
    682 
    683 static void print_current_token(struct token_cursor *tokens)
    684 {
    685 	struct token_cursor temp;
    686 	enum token_type type;
    687 	union token token;
    688 	int ok;
    689 
    690 	copy_token_cursor(tokens, &temp);
    691 
    692 	ok = pull_token_type(&temp.c, &type);
    693 	if (!ok) {
    694 		printf("could not peek token\n");
    695 		return;
    696 	}
    697 
    698 	printf("current token: %s ", token_type_str(type));
    699 
    700 	ok = pull_token_data(&temp.c, &token, type);
    701 	if (!ok) {
    702 		printf("[could not peek token data]\n");
    703 		return;
    704 	}
    705 
    706 	print_token_data(&token, type);
    707 	printf("\n");
    708 }
    709 */
    710 #endif
    711 
    712 /*
    713  *  PARSING
    714  */
    715 
    716 
    717 static int parse_open(struct token_cursor *tokens)
    718 {
    719 	return pull_token(tokens, NULL, T_OPEN);
    720 }
    721 
    722 static int parse_close(struct token_cursor *tokens)
    723 {
    724 	return pull_token(tokens, NULL, T_CLOSE);
    725 }
    726 
    727 static int parse_stringy_token(struct token_cursor *tokens,
    728 			       struct tok_str *str,
    729 			       enum token_type type)
    730 {
    731 	union token token;
    732 
    733 	if (!pull_token(tokens, &token, type))
    734 		return 0;
    735 
    736 	str->data = token.str.data;
    737 	str->len = token.str.len;
    738 
    739 	return 1;
    740 }
    741 
    742 
    743 static int pull_symbol_token(struct token_cursor *tokens, struct tok_str *str)
    744 {
    745 	return parse_stringy_token(tokens, str, T_SYMBOL);
    746 }
    747 
    748 static int pull_number_token(struct token_cursor *tokens, struct tok_str *str)
    749 {
    750 	return parse_stringy_token(tokens, str, T_NUMBER);
    751 }
    752 
    753 static int parse_number(struct token_cursor *tokens, union number *number)
    754 {
    755 	int ok;
    756 	struct tok_str str;
    757 	char *end;
    758 
    759 	ok = pull_number_token(tokens, &str);
    760 	if (!ok) return 0;
    761 
    762 	/* TODO: float numbers */
    763 	number->integer = strtol((char*)str.data, &end, 10);
    764 
    765 	if ((u8*)end != (str.data + str.len)) {
    766 		printf("parse_number failed\n");
    767 		return 0;
    768 	}
    769 
    770 	return 1;
    771 }
    772 
    773 
    774 struct cell *get_cell(struct cursor *cells, int index)
    775 {
    776 	return (struct cell*)index_cursor(cells, index,
    777 					  sizeof(struct cell));
    778 }
    779 
    780 
    781 static int symbol_eq(struct tok_str *a, const char *b, int b_len)
    782 {
    783 	return memeq(a->data, a->len, (char*)b, b_len);
    784 }
    785 
    786 static int parse_symbol(struct token_cursor *tokens, const char *match)
    787 {
    788 	struct tok_str str;
    789 
    790 	if (!pull_symbol_token(tokens, &str))
    791 		return 0;
    792 
    793 	if (!symbol_eq(&str, match, strlen(match)))
    794 		return 0;
    795 
    796 	return 1;
    797 }
    798 
    799 static int parse_shape(struct token_cursor *tokens, struct attribute *attr)
    800 {
    801 	struct token_cursor temp;
    802 	struct tok_str str;
    803 	int ok;
    804 
    805 	copy_token_cursor(tokens, &temp);
    806 
    807 	ok = parse_symbol(&temp, "shape");
    808 	if (!ok) return 0;
    809 
    810 	attr->type = A_SHAPE;
    811 
    812 	ok = pull_symbol_token(&temp, &str);
    813 	if (!ok) return 0;
    814 
    815 	if (symbol_eq(&str, "rectangle", 9)) {
    816 		attr->data.shape = SHAPE_RECTANGLE;
    817 	} else if (symbol_eq(&str, "circle", 6)) {
    818 		attr->data.shape = SHAPE_CIRCLE;
    819 	} else if (symbol_eq(&str, "square", 6)) {
    820 		attr->data.shape = SHAPE_SQUARE;
    821 	} else {
    822 		tokens->err = TE_UNEXPECTED_SYMBOL;
    823 		tokens->err_data.symbol.expected.data = (u8*)"rectangle";
    824 		tokens->err_data.symbol.expected.len = 9;
    825 		tokens->err_data.symbol.got.data = str.data;
    826 		tokens->err_data.symbol.got.len = str.len;
    827 	}
    828 
    829 	copy_token_cursor(&temp, tokens);
    830 
    831 	return 1;
    832 }
    833 
    834 static int parse_str_attr(struct token_cursor *tokens,
    835 			  struct attribute *attr,
    836 			  const char *sym,
    837 			  enum attribute_type type,
    838 			  enum token_type tok_type)
    839 {
    840 	struct token_cursor temp;
    841 	struct tok_str str;
    842 	struct bufstr *bufstr;
    843 
    844 	assert(tok_type == T_NUMBER || tok_type == T_SYMBOL || tok_type == T_STRING);
    845 
    846 	copy_token_cursor(tokens, &temp);
    847 
    848 	if (sym == NULL) {
    849 		if (!pull_symbol_token(&temp, &str))
    850 			return 0;
    851 
    852 		attr->data.data_attr.sym.ptr = (char*)str.data;
    853 		attr->data.data_attr.sym.len = str.len;
    854 
    855 		bufstr = &attr->data.data_attr.str;
    856 	} else {
    857 		if (!parse_symbol(&temp, sym))
    858 			return 0;
    859 		bufstr = &attr->data.str;
    860 	}
    861 
    862 	if (!parse_stringy_token(&temp, &str, tok_type))
    863 		return 0;
    864 
    865 	bufstr->ptr = (char*)str.data;
    866 	bufstr->len = str.len;
    867 	attr->type = type;
    868 
    869 #ifdef DEBUG
    870 	if (sym == NULL)
    871 		tokdebug("attribute %.*s %.*s\n",
    872 				attr->data.data_attr.sym.len,
    873 				attr->data.data_attr.sym.ptr,
    874 				str.len,
    875 				str.data);
    876 	else
    877 		tokdebug("attribute %s %.*s\n", sym, str.len, str.data);
    878 #endif
    879 
    880 	copy_token_cursor(&temp, tokens);
    881 
    882 	return 1;
    883 }
    884 
    885 static int parse_size(struct token_cursor *tokens, struct attribute *attr)
    886 {
    887 	struct token_cursor temp;
    888 	struct tok_str str;
    889 	int ok;
    890 
    891 	copy_token_cursor(tokens, &temp);
    892 
    893 	ok = pull_symbol_token(&temp, &str);
    894 	if (!ok) return 0;
    895 
    896 	if (symbol_eq(&str, "height", 6)) {
    897 		attr->type = A_HEIGHT;
    898 	} else if (symbol_eq(&str, "width", 5)) {
    899 		attr->type = A_WIDTH;
    900 	} else if (symbol_eq(&str, "depth", 5)) {
    901 		attr->type = A_DEPTH;
    902 	}
    903 
    904 	ok = parse_number(&temp, &attr->data.number);
    905 	if (!ok) return 0;
    906 
    907 	tokdebug("attribute %s %d\n",
    908 		 attr_type_str(attr->type),
    909 		 attr->data.number.integer);
    910 
    911 	copy_token_cursor(&temp, tokens);
    912 
    913 	return 1;
    914 }
    915 
    916 static struct bufstr *attr_bufstr(struct attribute *attr)
    917 {
    918 	if (attr->type == A_DATA)
    919 		return &attr->data.data_attr.str;
    920 	return &attr->data.str;
    921 }
    922 
    923 int cell_attr_str(struct cursor *attributes, struct cell *cell,
    924 		const char** name, int *len, enum attribute_type type)
    925 {
    926 	int i;
    927 	struct attribute *attr;
    928 	struct bufstr *bufstr;
    929 	*len = 0;
    930 	*name = "";
    931 
    932 	for (i = 0; i < cell->n_attributes; i++) {
    933 		attr = get_attr(attributes, cell->attributes[i]);
    934 		if (attr->type == type) {
    935 			bufstr = attr_bufstr(attr);
    936 			*name = bufstr->ptr;
    937 			*len = bufstr->len;
    938 			return 1;
    939 		}
    940 	}
    941 
    942 	return 0;
    943 }
    944 
    945 const char *cell_name(struct cursor *attributes, struct cell *cell, int *len)
    946 {
    947 	const char *name = NULL;
    948 	if (!cell_attr_str(attributes, cell, &name, len, A_NAME))
    949 		return NULL;
    950 	return name;
    951 }
    952 
    953 static int parse_attribute(struct token_cursor *tokens, struct attribute *attr)
    954 {
    955 	int ok;
    956 	struct token_cursor temp;
    957 
    958 	copy_token_cursor(tokens, &temp);
    959 
    960 	ok = parse_open(&temp);
    961 	if (!ok) return 0;
    962 
    963 	ok = parse_shape(&temp, attr);
    964 	if (ok) goto close;
    965 
    966 	ok = parse_str_attr(&temp, attr, "id", A_ID, T_SYMBOL);
    967 	if (ok) goto close;
    968 
    969 	ok = parse_str_attr(&temp, attr, "name", A_NAME, T_STRING);
    970 	if (ok) {
    971 		goto close;
    972 	}
    973 
    974 	ok = parse_str_attr(&temp, attr, "material", A_MATERIAL, T_STRING);
    975 	if (ok) goto close;
    976 
    977 	ok = parse_str_attr(&temp, attr, "color", A_COLOR, T_STRING);
    978 	if (ok) goto close;
    979 
    980 	/* TODO: parse multiple conditions */
    981 	ok = parse_str_attr(&temp, attr, "condition", A_CONDITION, T_STRING);
    982 	if (ok) goto close;
    983 
    984 	ok = parse_str_attr(&temp, attr, "location", A_LOCATION, T_SYMBOL);
    985 	if (ok) goto close;
    986 
    987 	ok = parse_str_attr(&temp, attr, "state", A_STATE, T_SYMBOL);
    988 	if (ok) goto close;
    989 
    990 	ok = parse_str_attr(&temp, attr, NULL, A_DATA, T_STRING);
    991 	if (ok) goto close;
    992 
    993 	ok = parse_size(&temp, attr);
    994 	if (ok) goto close;
    995 
    996 	return 0;
    997  close:
    998 	ok = parse_close(&temp);
    999 	if (!ok) return 0;
   1000 
   1001 	copy_token_cursor(&temp, tokens);
   1002 
   1003 	return 1;
   1004 }
   1005 
   1006 static int parse_attributes(struct token_cursor *tokens,
   1007 			    struct cursor *attributes,
   1008 			    int attr_inds[2])
   1009 {
   1010 	int ok;
   1011 	int index = -1;
   1012 	int first = 1;
   1013 	int parsed = 1;
   1014 	struct attribute attr;
   1015 
   1016 
   1017 	while (1) {
   1018 		ok = parse_attribute(tokens, &attr);
   1019 		if (!ok) break;
   1020 
   1021 		index = cursor_count(attributes, sizeof(attr));
   1022 		ok = cursor_push(attributes, (u8*)&attr, sizeof(attr));
   1023 
   1024 		if (!ok) {
   1025 			printf("attribute data overflow\n");
   1026 			return 0;
   1027 		}
   1028 
   1029 		parsed++;
   1030 
   1031 		if (first) {
   1032 			first = 0;
   1033 			attr_inds[0] = index;
   1034 		}
   1035 	}
   1036 
   1037 	attr_inds[1] = index;
   1038 
   1039 	return parsed;
   1040 }
   1041 
   1042 /*
   1043 static int parse_group(struct cursor *tokens)
   1044 {
   1045 	int ok;
   1046 	struct tok_str str;
   1047 
   1048 	ok = pull_symbol_token(tokens, &str);
   1049 	if (!ok) return 0;
   1050 
   1051 	if (!memeq(str.data, str.len, "group", 5))
   1052 		return 0;
   1053 
   1054 	parse_attributes(&tokens)
   1055 }
   1056 */
   1057 
   1058 
   1059 static int push_cell(struct cursor *cells, struct cell *cell, int *cell_index)
   1060 {
   1061 	int index;
   1062 	int ok;
   1063 
   1064 	index = cursor_count(cells, sizeof(*cell));
   1065 
   1066 	tokdebug("push_cell %d (%zu) %s\n", index, cells->p - cells->start, cell_type_str(cell->type));
   1067 
   1068 	if (index > 0xFFFF) {
   1069 		/* TODO: actual error message here */
   1070 		printf("push_cell_child overflow\n");
   1071 		return 0;
   1072 	}
   1073 
   1074 	ok = cursor_push(cells, (u8*)cell, sizeof(*cell));
   1075 	if (!ok) return 0;
   1076 
   1077 	if (cell_index)
   1078 		*cell_index = index;
   1079 
   1080 	return 1;
   1081 }
   1082 
   1083 static void copy_parser(struct parser *from, struct parser *to)
   1084 {
   1085 	copy_token_cursor(&from->tokens, &to->tokens);
   1086 	copy_cursor(&from->cells, &to->cells);
   1087 	copy_cursor(&from->attributes, &to->attributes);
   1088 }
   1089 
   1090 static int push_cell_child(struct cell *parent, int child_ind)
   1091 {
   1092 	int ok;
   1093 	struct cursor child_inds;
   1094 
   1095 	make_cursor((u8*)parent->children,
   1096 		    (u8*)parent->children + sizeof(parent->children),
   1097 		    &child_inds);
   1098 
   1099 	child_inds.p += parent->n_children * sizeof(parent->children[0]);
   1100 
   1101 	ok = cursor_push_u16(&child_inds, child_ind);
   1102 	if (!ok) return 0;
   1103 
   1104 	parent->n_children++;
   1105 
   1106 	return 1;
   1107 }
   1108 
   1109 const char *object_type_str(enum object_type type)
   1110 {
   1111 	switch (type) {
   1112 	case O_DOOR: return "door";
   1113 	case O_TABLE: return "table";
   1114 	case O_CHAIR: return "chair";
   1115 	case O_LIGHT: return "light";
   1116 	case O_OBJECT: return "";
   1117 	}
   1118 
   1119 	return "unknown";
   1120 }
   1121 
   1122 const char *cell_type_str(enum cell_type type)
   1123 {
   1124 	switch (type) {
   1125 	case C_GROUP: return "group";
   1126 	case C_SPACE: return "space";
   1127 	case C_OBJECT: return "object";
   1128 	case C_ROOM: return "room";
   1129 	}
   1130 
   1131 	return "unknown";
   1132 }
   1133 
   1134 static int parse_cell_attrs(struct parser *parser, int *index, struct cell *cell)
   1135 {
   1136 	struct cursor cell_attr_inds;
   1137 	struct cell *child_cell;
   1138 	int child_cell_index;
   1139 	int attr_inds[2] = {0};
   1140 	int i, ok;
   1141 
   1142 	make_cursor((u8*)cell->attributes,
   1143 		    (u8*)cell->attributes + sizeof(cell->attributes),
   1144 		    &cell_attr_inds);
   1145 
   1146 	cell_attr_inds.p += cell->n_attributes * sizeof(cell->attributes[0]);
   1147 
   1148 	/* 0 attributes returns 1, 1 attrs returns 2, etc
   1149 	   0 is a real error, an attribute push overflow */
   1150 	ok = parse_attributes(&parser->tokens, &parser->attributes, attr_inds);
   1151 	if (!ok) return 0;
   1152 
   1153 	tokdebug("parse_attributes %d\n", ok);
   1154 
   1155 	for (i = attr_inds[0]; i <= attr_inds[1]; i++) {
   1156 		ok = cursor_push_u16(&cell_attr_inds, i);
   1157 		if (!ok) return 0;
   1158 		cell->n_attributes++;
   1159 	}
   1160 
   1161 	/* Optional child cell */
   1162 	tokdebug("optional child cell in parse_cell_attrs\n");
   1163 	ok = parse_cell(parser, &child_cell_index);
   1164 	if (ok) {
   1165 		if (!(child_cell = get_cell(&parser->cells, child_cell_index)))
   1166 			return 0;
   1167 		tokdebug("parse_cell_attrs push child cell\n");
   1168 		if (!push_cell_child(cell, child_cell_index))
   1169 			return 0;
   1170 
   1171 	}
   1172 	else {
   1173 		tokdebug("no child cells found\n");
   1174 	}
   1175 
   1176 	ok = push_cell(&parser->cells, cell, index);
   1177 	if (!ok) return 0;
   1178 
   1179 	return 1;
   1180 }
   1181 
   1182 static int parse_cell_by_name(struct parser *parser,
   1183 			      int *index,
   1184 			      const char *name,
   1185 			      enum cell_type type)
   1186 {
   1187 	int ok;
   1188 	struct parser backtracked;
   1189 	struct cell cell;
   1190 	int ind;
   1191 
   1192 	init_cell(&cell);
   1193 
   1194 	copy_parser(parser, &backtracked);
   1195 
   1196 	cell.type = type;
   1197 
   1198 	ok = parse_symbol(&backtracked.tokens, name);
   1199 	if (!ok) return 0;
   1200 
   1201 	ok = parse_cell_attrs(&backtracked, &ind, &cell);
   1202 	if (!ok) return 0;
   1203 
   1204 	if (index)
   1205 		*index = ind;
   1206 
   1207 	copy_parser(&backtracked, parser);
   1208 
   1209 	return 1;
   1210 }
   1211 
   1212 static int parse_space(struct parser *parser, int *index)
   1213 {
   1214 	return parse_cell_by_name(parser, index, "space", C_SPACE);
   1215 }
   1216 
   1217 static int parse_room(struct parser *parser, int *index)
   1218 {
   1219 	return parse_cell_by_name(parser, index, "room", C_ROOM);
   1220 }
   1221 
   1222 static int parse_group(struct parser *parser, int *index)
   1223 {
   1224 	int ncells = 0;
   1225 	int child_ind;
   1226 
   1227 	struct parser backtracked;
   1228 	struct cell group;
   1229 	struct cell *child_cell;
   1230 
   1231 	init_cell(&group);
   1232 
   1233 	copy_parser(parser, &backtracked);
   1234 
   1235 	if (!parse_symbol(&backtracked.tokens, "group"))
   1236 		return 0;
   1237 
   1238 	while (1) {
   1239 		if (!parse_cell(&backtracked, &child_ind))
   1240 			break;
   1241 
   1242 		child_cell = get_cell(&backtracked.cells, child_ind);
   1243 		if (child_cell == NULL) {
   1244 			printf("UNUSUAL: group get_cell was NULL\n");
   1245 			return 0;
   1246 		}
   1247 
   1248 		tokdebug("group child cell type %s\n", cell_type_str(child_cell->type));
   1249 		if (!push_cell_child(&group, child_ind))
   1250 			return 0;
   1251 
   1252 		ncells++;
   1253 	}
   1254 
   1255 	tokdebug("parse_group cells %d\n", ncells);
   1256 
   1257 	if (ncells == 0)
   1258 		return 0;
   1259 
   1260 	group.type = C_GROUP;
   1261 
   1262 	if (!push_cell(&backtracked.cells, &group, index))
   1263 		return 0;
   1264 
   1265 	copy_parser(&backtracked, parser);
   1266 
   1267 	return ncells;
   1268 }
   1269 
   1270 struct object_def {
   1271 	const char *name;
   1272 	enum object_type type;
   1273 };
   1274 
   1275 static struct object_def object_defs[] = {
   1276 	{"table", O_TABLE},
   1277 	{"chair", O_CHAIR},
   1278 	{"door", O_DOOR},
   1279 	{"light", O_LIGHT},
   1280 	{"obj", O_OBJECT},
   1281 };
   1282 
   1283 static int parse_object(struct parser *parser, int *index)
   1284 {
   1285 	int i;
   1286 	struct parser backtracked;
   1287 	struct tok_str str;
   1288 	struct object_def *def;
   1289 	struct cell cell;
   1290 	int ind;
   1291 
   1292 	init_cell(&cell);
   1293 	cell.type = C_OBJECT;
   1294 
   1295 	copy_parser(parser, &backtracked);
   1296 
   1297 	if (!pull_symbol_token(&backtracked.tokens, &str))
   1298 		return 0;
   1299 
   1300 	for (i = 0; i < ARRAY_SIZE(object_defs); i++) {
   1301 		def = &object_defs[i];
   1302 
   1303 		if (symbol_eq(&str, def->name, strlen(def->name))) {
   1304 			cell.obj_type = def->type;
   1305 			break;
   1306 		}
   1307 	}
   1308 
   1309 	if (!parse_cell_attrs(&backtracked, &ind, &cell))
   1310 		return 0;
   1311 
   1312 	if (index)
   1313 		*index = ind;
   1314 
   1315 	copy_parser(&backtracked, parser);
   1316 
   1317 	return 1;
   1318 }
   1319 
   1320 int parse_cell(struct parser *parser, int *index)
   1321 {
   1322 	int ok;
   1323 	struct parser backtracked;
   1324 
   1325 	/* mostly needed for parse_open and parse_close */
   1326 	copy_parser(parser, &backtracked);
   1327 
   1328 	ok = parse_open(&backtracked.tokens);
   1329 	if (!ok) {
   1330 		tokdebug("parse_open failed in parse_cell\n");
   1331 		return 0;
   1332 	}
   1333 
   1334 	ok = parse_group(&backtracked, index);
   1335 	if (ok) {
   1336 		tokdebug("got parse_group\n");
   1337 		goto close;
   1338 	}
   1339 
   1340 	/* print_current_token(&backtracked.tokens); */
   1341 
   1342 	ok = parse_room(&backtracked, index);
   1343 	if (ok) {
   1344 		tokdebug("got parse_room\n");
   1345 		goto close;
   1346 	}
   1347 
   1348 	ok = parse_space(&backtracked, index);
   1349 	if (ok) {
   1350 		tokdebug("got parse_space\n");
   1351 		goto close;
   1352 	}
   1353 
   1354 	ok = parse_object(&backtracked, index);
   1355 	if (ok) goto close;
   1356 
   1357 	return 0;
   1358 close:
   1359 	ok = parse_close(&backtracked.tokens);
   1360 	if (!ok) return 0;
   1361 
   1362 	copy_parser(&backtracked, parser);
   1363 
   1364 	return 1;
   1365 }
   1366 
   1367 
   1368 int init_parser(struct parser *parser)
   1369 {
   1370 	struct cursor mem;
   1371 	u8 *pmem;
   1372 	int ok;
   1373 
   1374 	int attrs_size = sizeof(struct attribute) * 1024;
   1375 	int tokens_size = 2048*32;
   1376 	int cells_size = sizeof(struct cell) * 1024;
   1377 	int memsize = attrs_size + tokens_size + cells_size;
   1378 
   1379 	if (!(pmem = calloc(1, memsize))) {
   1380 		return 0;
   1381 	}
   1382 
   1383 	make_cursor(pmem, pmem + memsize, &mem);
   1384 
   1385 	ok =
   1386 		cursor_slice(&mem, &parser->cells, cells_size) &&
   1387 		cursor_slice(&mem, &parser->attributes, attrs_size) &&
   1388 		cursor_slice(&mem, &parser->tokens.c, tokens_size);
   1389 	assert(ok);
   1390 
   1391 	init_token_cursor(&parser->tokens);
   1392 
   1393 	return 1;
   1394 }
   1395 
   1396 int free_parser(struct parser *parser)
   1397 {
   1398 	free(parser->cells.start);
   1399 
   1400 	return 1;
   1401 }
   1402 
   1403 int parse_buffer(struct parser *parser, u8 *file_buf, int len, int *root)
   1404 {
   1405 	int ok;
   1406 
   1407 	ok = tokenize_cells(file_buf, len, &parser->tokens);
   1408 
   1409 	if (!ok) {
   1410 		printf("failed to tokenize\n");
   1411 		return 0;
   1412 	}
   1413 
   1414 	ok = parse_cell(parser, root);
   1415 	if (!ok) {
   1416 		print_token_error(&parser->tokens);
   1417 		return 0;
   1418 	}
   1419 
   1420 	return 1;
   1421 }
   1422 
   1423 
   1424 int parse_file(struct parser *parser, const char *filename, int *root, u8 *buf,
   1425 		u32 bufsize)
   1426 {
   1427 	int count, ok;
   1428 
   1429 	ok = read_file(filename, buf, bufsize, &count);
   1430 
   1431 	if (!ok) {
   1432 		printf("failed to load '%s'\n", filename);
   1433 		return 0;
   1434 	}
   1435 
   1436 	ok = parse_buffer(parser, buf, count, root);
   1437 	return ok;
   1438 }
   1439