protoverse

A metaverse protocol
git clone git://jb55.com/protoverse
Log | Files | Refs | README | LICENSE

commit 89054e9f6ed2dd1c00b2affd56357fb401619e3a
parent a27d0c4dc830cda4112a2cc018a062d0edf2766d
Author: William Casarin <jb55@jb55.com>
Date:   Sun, 14 Jun 2020 13:29:20 -0700

example is lexing!

Signed-off-by: William Casarin <jb55@jb55.com>

Diffstat:
Mparse.c | 101++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 87 insertions(+), 14 deletions(-)

diff --git a/parse.c b/parse.c @@ -9,8 +9,10 @@ enum token_error { TE_OK, TE_STR_START_CHAR, + TE_NUM_START_CHAR, TE_SYM_START_CHAR, TE_SYM_CHAR, + TE_NUM_CHAR, TE_SYM_OVERFLOW, }; @@ -67,7 +69,9 @@ static const char *token_error_string(enum token_error err) case TE_OK: return "all good"; case TE_STR_START_CHAR: return "string didn't start with \""; case TE_SYM_START_CHAR: return "symbol didn't start with a-z"; + case TE_NUM_START_CHAR: return "number didn't start with 0-9 or -"; case TE_SYM_CHAR: return "invalid symbol character"; + case TE_NUM_CHAR: return "invalid number character"; case TE_SYM_OVERFLOW: return "symbol push overflow"; } @@ -78,6 +82,8 @@ static void print_token_error(struct cursor *cursor) { int is_chr_data = cursor->err == TE_STR_START_CHAR || cursor->err == TE_SYM_START_CHAR || + cursor->err == TE_NUM_START_CHAR || + cursor->err == TE_NUM_CHAR || cursor->err == TE_SYM_CHAR; printf("\nerror: %s %.*s\n", token_error_string(cursor->err), is_chr_data?1:0, (char*)&cursor->err_data.c); @@ -139,9 +145,6 @@ static int push_token_data(struct cursor *tokens, if (!ok) return 0; switch (token) { - case T_NUMBER: - tokdebug("NUM"); - break; case T_OPEN: depth++; @@ -151,18 +154,19 @@ static int push_token_data(struct cursor *tokens, depth--; break; /* nothing to write after these tokens */ + case T_NUMBER: case T_SYMBOL: - /* tokdebug(""); */ /* fallthrough */ case T_STRING: print_spaces(depth*2); str = (struct tok_str*)token_data; if (token == T_STRING) { - tokdebug("\"%.*s\" ", str->len, str->data); - } - else { - tokdebug("%.*s ", str->len, str->data); + tokdebug("str \"%.*s\"", str->len, str->data); + } else if (token == T_NUMBER) { + tokdebug("num %.*s", str->len, str->data); + } else { + tokdebug("sym %.*s", str->len, str->data); } ok = push_data(tokens, token_data, token_data_size); if (!ok) return 0; @@ -190,6 +194,11 @@ static int push_string(struct cursor *tokens, struct tok_str str) return push_token_data(tokens, T_STRING, &str, sizeof(str)); } +static int push_number(struct cursor *tokens, struct tok_str str) +{ + return push_token_data(tokens, T_NUMBER, &str, sizeof(str)); +} + static int is_start_symbol_char(char c) { @@ -224,6 +233,62 @@ static int pull_escaped_char(struct cursor *cursor, u8 *c) return 2; } +static int pull_number(struct cursor *cursor, u8 *buf, int buf_len) +{ + int ok = 1; + int chars = 0; + u8 c; + + struct cursor temp; + struct cursor buf_cursor; + + temp.p = cursor->p; + temp.end = cursor->end; + + buf_cursor.p = buf; + buf_cursor.end = buf + buf_len; + + while (1) { + ok = pull_byte(&temp, &c); + if (!ok) return 0; + + /* first char should start with a letter */ + if (chars == 0 && !isdigit(c) && c != '-') { + cursor->err = TE_NUM_START_CHAR; + cursor->err_data.c = c; + return 0; + } else if (chars > 0 && (isspace(c) || c == ')')) { + /* we got a number */ + break; + } else if (chars > 0 && !isdigit(c) && c != '.') { + cursor->err = TE_NUM_CHAR; + cursor->err_data.c = c; + return 0; + } + ok = push_byte(&buf_cursor, c); + chars++; + + if (!ok) { + cursor->err = TE_SYM_OVERFLOW; + return 0; + } + } + + ok = push_byte(&buf_cursor, 0); + chars++; + + if (!ok) { + cursor->err = TE_SYM_OVERFLOW; + return 0; + } + + cursor->p = temp.p-1; + cursor->err = TE_OK; + + /* remove the first counted quote since this was not pushed */ + return chars; +} + static int pull_string(struct cursor *cursor, u8 *buf, int buf_len) { int ok = 1; @@ -347,7 +412,7 @@ static int read_and_push_atom(struct cursor *cursor, struct cursor *tokens) ok = pull_symbol(cursor, buf, sizeof(buf)); if (ok) { - str.len = ok; + str.len = ok; str.data = buf; ok = push_symbol(tokens, str); if (!ok) { @@ -356,15 +421,11 @@ static int read_and_push_atom(struct cursor *cursor, struct cursor *tokens) } return 1; } - else { - /* printf("\ndidn't get symbol: "); */ - /* print_token_error(cursor); */ - } ok = pull_string(cursor, buf, sizeof(buf)); if (ok) { - str.len = ok; + str.len = ok; str.data = buf; ok = push_string(tokens, str); if (!ok) { @@ -374,6 +435,18 @@ static int read_and_push_atom(struct cursor *cursor, struct cursor *tokens) return 1; } + ok = pull_number(cursor, buf, sizeof(buf)); + if (ok) { + str.len = ok; + str.data = buf; + ok = push_number(tokens, str); + if (!ok) { + printf("read_and_push_atom number push overflow\n"); + return 0; + } + return 1; + } + /* TODO: read_number */ return 0;