protoverse

A metaverse protocol
git clone git://jb55.com/protoverse
Log | Files | Refs | README | LICENSE

commit 3d63f451583189466536139eeb41b00ac0ee1bc7
parent 4a08f846296f4f635467f479ee093d62fff9bb5b
Author: William Casarin <jb55@jb55.com>
Date:   Mon, 15 Jun 2020 00:28:21 -0700

better error messages, fix crash

Signed-off-by: William Casarin <jb55@jb55.com>

Diffstat:
Mdefault.nix | 2+-
Mparse.c | 270++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------
Mparse.h | 38+++++++++++++++++++++++++++-----------
Mprotoverse.c | 27+++++++++++++++++++++------
Msatoshis-citadel.space | 28++++++++++++----------------
5 files changed, 253 insertions(+), 112 deletions(-)

diff --git a/default.nix b/default.nix @@ -2,5 +2,5 @@ with pkgs; stdenv.mkDerivation { name = "project"; - nativeBuildInputs = [ ]; + nativeBuildInputs = [ gdb ]; } diff --git a/parse.c b/parse.c @@ -22,14 +22,6 @@ enum known_symbol { S_MATERIAL, }; -enum token_type { - T_OPEN, - T_CLOSE, - T_STRING, - T_SYMBOL, - T_NUMBER, -}; - enum tok_state { TS_OPEN, TS_CLOSE, @@ -64,8 +56,23 @@ enum attribute_type { A_LOCATION, }; +union attr_data { + struct { + const char *ptr; + int len; + } str; + int integer; + double fdouble; +}; + +struct attribute { + union attr_data data; + enum attribute_type type; +}; + struct cell { int attributes[MAX_ATTRIBUTES]; + struct cell *child; const char *name; const char *id; @@ -73,6 +80,24 @@ struct cell { enum cell_type type; }; +static void copy_cursor(struct cursor *src, struct cursor *dest) +{ + dest->start = src->start; + dest->p = src->p; + dest->end = src->end; + dest->err = src->err; + memcpy(&dest->err_data, &src->err_data, sizeof(src->err_data)); +} + +void make_cursor(u8 *start, u8 *end, struct cursor *cursor) +{ + cursor->start = start; + cursor->p = start; + cursor->end = end; + cursor->err = TE_OK; + memset(&cursor->err_data, 0, sizeof(cursor->err_data)); +} + static const char *token_error_string(enum token_error err) { switch (err) { @@ -83,20 +108,44 @@ static const char *token_error_string(enum token_error err) case TE_SYM_CHAR: return "invalid symbol character"; case TE_NUM_CHAR: return "invalid number character"; case TE_SYM_OVERFLOW: return "symbol push overflow"; + case TE_UNEXPECTED_TOKEN: return "unexpected token during parsing"; } return "unknown"; } -static void print_token_error(struct cursor *cursor) +static const char *token_type_str(enum token_type type) { - int is_chr_data = cursor->err == TE_STR_START_CHAR || - cursor->err == TE_SYM_START_CHAR || - cursor->err == TE_NUM_START_CHAR || - cursor->err == TE_NUM_CHAR || - cursor->err == TE_SYM_CHAR; - printf("\nerror: %s %.*s\n", token_error_string(cursor->err), - is_chr_data?1:0, (char*)&cursor->err_data.c); + switch (type) { + case T_OPEN: return "("; + case T_CLOSE: return ")"; + case T_SYMBOL: return "symbol"; + case T_STRING: return "string"; + case T_NUMBER: return "number"; + } + + return "unknown"; +} + +void print_token_error(struct cursor *cursor) +{ + if (cursor->err == TE_UNEXPECTED_TOKEN) { + printf("error: %s: expected '%s' got '%s'\n", + token_error_string(cursor->err), + token_type_str(cursor->err_data.parse.expected), + token_type_str(cursor->err_data.parse.got)); + } + else { + int is_chr_data = cursor->err == TE_STR_START_CHAR || + cursor->err == TE_SYM_START_CHAR || + cursor->err == TE_NUM_START_CHAR || + cursor->err == TE_NUM_CHAR || + cursor->err == TE_SYM_CHAR; + + printf("\nerror: %s %.*s\n", token_error_string(cursor->err), + is_chr_data?1:0, (char*)&cursor->err_data.c); + } + } static int pull_byte(struct cursor *cursor, u8 *c) @@ -255,21 +304,17 @@ static int pull_escaped_char(struct cursor *cursor, u8 *c) return 2; } -static int pull_number(struct cursor *cursor, u8 *buf, int buf_len) +static int pull_number(struct cursor *cursor, u8 **start) { int ok = 1; int chars = 0; u8 c; struct cursor temp; - struct cursor buf_cursor; - temp.p = cursor->p; + *start = temp.p = cursor->p; temp.end = cursor->end; - buf_cursor.p = buf; - buf_cursor.end = buf + buf_len; - while (1) { ok = pull_byte(&temp, &c); if (!ok) return 0; @@ -287,7 +332,7 @@ static int pull_number(struct cursor *cursor, u8 *buf, int buf_len) cursor->err_data.c = c; return 0; } - ok = push_byte(&buf_cursor, c); + chars++; if (!ok) { @@ -296,9 +341,6 @@ static int pull_number(struct cursor *cursor, u8 *buf, int buf_len) } } - ok = push_byte(&buf_cursor, 0); - chars++; - if (!ok) { cursor->err = TE_SYM_OVERFLOW; return 0; @@ -311,25 +353,23 @@ static int pull_number(struct cursor *cursor, u8 *buf, int buf_len) return chars; } -static int pull_string(struct cursor *cursor, u8 *buf, int buf_len) +static int pull_string(struct cursor *cursor, u8 **start) { int ok = 1; int chars = 0; u8 c; struct cursor temp; - struct cursor buf_cursor; - temp.p = cursor->p; - temp.end = cursor->end; - - buf_cursor.p = buf; - buf_cursor.end = buf + buf_len; + copy_cursor(cursor, &temp); while (1) { ok = pull_escaped_char(&temp, &c); if (!ok) return 0; + if (chars == 1) + *start = temp.p; + /* first char should start with a letter */ if (chars == 0 && c != '"') { cursor->err = TE_STR_START_CHAR; @@ -345,7 +385,6 @@ static int pull_string(struct cursor *cursor, u8 *buf, int buf_len) break; } - ok = push_byte(&buf_cursor, c); chars++; if (!ok) { @@ -354,35 +393,28 @@ static int pull_string(struct cursor *cursor, u8 *buf, int buf_len) } } - ok = push_byte(&buf_cursor, 0); - chars++; - if (!ok) { cursor->err = TE_SYM_OVERFLOW; return 0; } - cursor->p = temp.p; - cursor->err = TE_OK; + copy_cursor(&temp, cursor); /* remove the first counted quote since this was not pushed */ return --chars; } -static int pull_symbol(struct cursor *cursor, u8 *buf, int buf_len) +static int pull_symbol(struct cursor *cursor, u8 **start) { int ok = 1; int chars = 0; u8 c; struct cursor temp; - struct cursor sym_cursor; - temp.p = cursor->p; - temp.end = cursor->end; + copy_cursor(cursor, &temp); - sym_cursor.p = buf; - sym_cursor.end = buf + buf_len; + *start = temp.p; while (1) { ok = pull_byte(&temp, &c); @@ -402,7 +434,6 @@ static int pull_symbol(struct cursor *cursor, u8 *buf, int buf_len) return 0; } - ok = push_byte(&sym_cursor, c); chars++; if (!ok) { @@ -411,31 +442,27 @@ static int pull_symbol(struct cursor *cursor, u8 *buf, int buf_len) } } - ok = push_byte(&sym_cursor, 0); - chars++; - if (!ok) { cursor->err = TE_SYM_OVERFLOW; return 0; } - cursor->p = temp.p-1; - cursor->end = temp.end; - cursor->err = TE_OK; + temp.p--; + copy_cursor(&temp, cursor); return chars; } static int read_and_push_atom(struct cursor *cursor, struct cursor *tokens) { - u8 buf[255]; struct tok_str str; + u8 *start; int ok; - ok = pull_symbol(cursor, buf, sizeof(buf)); + ok = pull_symbol(cursor, &start); if (ok) { str.len = ok; - str.data = buf; + str.data = start; ok = push_symbol(tokens, str); if (!ok) { printf("read_and_push_atom identifier push overflow\n"); @@ -444,11 +471,10 @@ static int read_and_push_atom(struct cursor *cursor, struct cursor *tokens) return 1; } - - ok = pull_string(cursor, buf, sizeof(buf)); + ok = pull_string(cursor, &start); if (ok) { str.len = ok; - str.data = buf; + str.data = start; ok = push_string(tokens, str); if (!ok) { printf("read_and_push_atom string push overflow\n"); @@ -457,10 +483,11 @@ static int read_and_push_atom(struct cursor *cursor, struct cursor *tokens) return 1; } - ok = pull_number(cursor, buf, sizeof(buf)); + start = cursor->p; + ok = pull_number(cursor, &start); if (ok) { str.len = ok; - str.data = buf; + str.data = start; ok = push_number(tokens, str); if (!ok) { printf("read_and_push_atom number push overflow\n"); @@ -474,26 +501,23 @@ static int read_and_push_atom(struct cursor *cursor, struct cursor *tokens) return 0; } -int tokenize_space(u8 *buf, int buf_size, u8 *token_buf, - int token_buf_size, struct cursor *tokens) +int tokenize_cells(u8 *buf, int buf_size, struct cursor *tokens) { enum tok_state state; struct cursor cursor; /* u8 *start = buf; */ + u8 *token_buf = tokens->p; u8 c; int ok; cursor.p = buf; cursor.end = buf + buf_size; - tokens.p = token_buf; - tokens.end = token_buf + token_buf_size; - state = TS_OPEN; while (cursor.p < cursor.end) { ok = pull_byte(&cursor, &c); - if (!ok) return 0; + if (!ok) break; if (state == TS_OPEN) { if (isspace(c)) @@ -533,6 +557,10 @@ int tokenize_space(u8 *buf, int buf_size, u8 *token_buf, } } + /* just seal the buffer now since we won't be adding to it */ + tokens->end = tokens->p; + tokens->p = token_buf; + return 1; } @@ -566,20 +594,22 @@ static int pull_token(struct cursor *tokens, u8 c; int ok; - temp.p = tokens->p; - temp.end = tokens->end; + copy_cursor(tokens, &temp); ok = pull_byte(&temp, &c); if (!ok) return 0; type = (enum token_type)c; - ok = pull_token_data(&temp, token, type); - if (!ok) { + if (type != expected_type) { + tokens->err = TE_UNEXPECTED_TOKEN; + tokens->err_data.parse.expected = expected_type; + tokens->err_data.parse.got = type; return 0; } - if (type != expected_type) { + ok = pull_token_data(&temp, token, type); + if (!ok) { return 0; } @@ -612,24 +642,108 @@ static int pull_symbol_token(struct cursor *tokens, struct tok_str *str) return 1; } -int parse_cell(struct cursor *tokens) +static int memeq(void *buf, int buf_len, void *buf2, int buf2_len) +{ + if (buf_len != buf2_len) + return 0; + + return memcmp(buf, buf2, buf_len) == 0; +} + +static int parse_attribute(struct cursor *tokens, struct attribute attr) { - struct cursor tokens; + int ok; struct tok_str str; + + (void)attr; + + ok = pull_symbol_token(tokens, &str); + if (!ok) return 0; + return 1; +} + + +/* +static int parse_attributes(struct cursor *tokens, + struct cursor *attributes, + struct cursor *attr_ids, + int *parsed_attrs) +{ + int ok; + struct attribute attr; + + while (1) { + ok = parse_attribute(tokens, &attr); + + if (!ok) return 0; + + push_data() + } + + ok = pull_symbol_token(tokens, &str); + if (!ok) return 0; +} +*/ + +/* +static int parse_group(struct cursor *tokens) +{ + int ok; + struct tok_str str; + + ok = pull_symbol_token(tokens, &str); + if (!ok) return 0; + + if (!memeq(str.data, str.len, "group", 5)) + return 0; + + parse_attributes(&tokens) +} +*/ + +static int parse_room(struct cursor *tokens, struct cell *cell) +{ int ok; + struct tok_str str; - tokens.p = token_buf; - tokens.end = token_buf + token_buf_size; + ok = pull_symbol_token(tokens, &str); + if (!ok) return 0; + + if (!memeq(str.data, str.len, "room", 5)) + return 0; + + parse_attributes(&tokens); + + parse_object(tokens, cell); +} + +static int parse_cell(struct cursor *tokens, struct cell *cell) +{ + int ok; + /* ok = parse_group(tokens, cell); */ + /* if (ok) return 1; */ + + ok = parse_room(tokens, cell); + if (ok) return 1; + + /* ok = parse_object(tokens, cell); */ + + return 0; +} + +int parse_cells(struct cursor *tokens) +{ + struct tok_str str; + int ok; while (1) { - ok = pull_open(&tokens); + ok = pull_open(tokens); if (!ok) return 0; /* cell identifier */ - ok = pull_symbol_token(&tokens, &str); + ok = parse_cell(tokens, &str); if (!ok) return 0; - printf("got token: %.*s\n", str.len, str.data); } diff --git a/parse.h b/parse.h @@ -4,15 +4,6 @@ #include "typedefs.h" -struct cursor { - u8 *p; - u8 *end; - enum token_error err; - union { - char c; - } err_data; -}; - enum token_error { TE_OK, TE_STR_START_CHAR, @@ -20,11 +11,36 @@ enum token_error { TE_SYM_START_CHAR, TE_SYM_CHAR, TE_NUM_CHAR, + TE_UNEXPECTED_TOKEN, TE_SYM_OVERFLOW, }; +enum token_type { + T_OPEN, + T_CLOSE, + T_STRING, + T_SYMBOL, + T_NUMBER, +}; + + +struct cursor { + u8 *start; + u8 *p; + u8 *end; + enum token_error err; + union { + char c; + struct { + enum token_type expected; + enum token_type got; + } parse; + } err_data; +}; -int tokenize_space(unsigned char *buf, int buf_size, u8 *token_buf, int token_buf_size, struct cursor *tokens); -int parse_cell(u8 *token_buf, int token_buf_size); +void make_cursor(u8 *start, u8 *end, struct cursor *cursor); +int tokenize_cells(unsigned char *buf, int buf_size, struct cursor *tokens); +int parse_cells(struct cursor *tokens); +void print_token_error(struct cursor *cursor); #endif /* PROTOVERSE_PARSE_H */ diff --git a/protoverse.c b/protoverse.c @@ -2,25 +2,40 @@ #include "io.h" #include "parse.h" +#include <assert.h> int main(int argc, const char *argv[]) { static u8 file_buf[4096]; static u8 token_buf[2048]; struct cursor tokens; - size_t count; + const char *space; + int ok; + + tokens.p = token_buf; + tokens.end = token_buf + sizeof(token_buf); - const char *space = argc == 2 ? argv[1] : "satoshis-citadel.space"; - int ok = read_file(space, file_buf, sizeof(file_buf), &count); + space = argc == 2 ? argv[1] : "satoshis-citadel.space"; + ok = read_file(space, file_buf, sizeof(file_buf), &count); if (!ok) { printf("failed to load '%s'\n", space); return 1; } - tokenize_space(file_buf, count, token_buf, sizeof(token_buf), - &tokens); - parse_cell(&tokens); + ok = tokenize_cells(file_buf, count, &tokens); + + if (!ok) { + printf("failed to tokenize\n"); + return 1; + } + + assert(tokens.p == token_buf); + + ok = parse_cells(&tokens); + if (!ok) { + print_token_error(&tokens); + } return 0; } diff --git a/satoshis-citadel.space b/satoshis-citadel.space @@ -1,17 +1,13 @@ -(group (name "\"Satoshi's\" Citadel") - (room (name "Foyer") - (shape rectangle) - (width 10) (depth 10) (height 100) - (group - (table - (id welcome-desk) - (name "Welcome desk") - (material marble) - (condition clean new) - (width 1) (depth 2) (height 1) - (location center) - ) - ) - ) -) +(room (name "\"Satoshi's\" Citadel") + (shape rectangle) + (width 10) (depth 10) (height 100) + (group + (table + (id welcome-desk) + (name "Welcome desk") + (material marble) + (condition clean new) + (width 1) (depth 2) (height 1) + (location center) + )))