commit 3d63f451583189466536139eeb41b00ac0ee1bc7
parent 4a08f846296f4f635467f479ee093d62fff9bb5b
Author: William Casarin <jb55@jb55.com>
Date: Mon, 15 Jun 2020 00:28:21 -0700
better error messages, fix crash
Signed-off-by: William Casarin <jb55@jb55.com>
Diffstat:
5 files changed, 253 insertions(+), 112 deletions(-)
diff --git a/default.nix b/default.nix
@@ -2,5 +2,5 @@
with pkgs;
stdenv.mkDerivation {
name = "project";
- nativeBuildInputs = [ ];
+ nativeBuildInputs = [ gdb ];
}
diff --git a/parse.c b/parse.c
@@ -22,14 +22,6 @@ enum known_symbol {
S_MATERIAL,
};
-enum token_type {
- T_OPEN,
- T_CLOSE,
- T_STRING,
- T_SYMBOL,
- T_NUMBER,
-};
-
enum tok_state {
TS_OPEN,
TS_CLOSE,
@@ -64,8 +56,23 @@ enum attribute_type {
A_LOCATION,
};
+union attr_data {
+ struct {
+ const char *ptr;
+ int len;
+ } str;
+ int integer;
+ double fdouble;
+};
+
+struct attribute {
+ union attr_data data;
+ enum attribute_type type;
+};
+
struct cell {
int attributes[MAX_ATTRIBUTES];
+ struct cell *child;
const char *name;
const char *id;
@@ -73,6 +80,24 @@ struct cell {
enum cell_type type;
};
+static void copy_cursor(struct cursor *src, struct cursor *dest)
+{
+ dest->start = src->start;
+ dest->p = src->p;
+ dest->end = src->end;
+ dest->err = src->err;
+ memcpy(&dest->err_data, &src->err_data, sizeof(src->err_data));
+}
+
+void make_cursor(u8 *start, u8 *end, struct cursor *cursor)
+{
+ cursor->start = start;
+ cursor->p = start;
+ cursor->end = end;
+ cursor->err = TE_OK;
+ memset(&cursor->err_data, 0, sizeof(cursor->err_data));
+}
+
static const char *token_error_string(enum token_error err)
{
switch (err) {
@@ -83,20 +108,44 @@ static const char *token_error_string(enum token_error err)
case TE_SYM_CHAR: return "invalid symbol character";
case TE_NUM_CHAR: return "invalid number character";
case TE_SYM_OVERFLOW: return "symbol push overflow";
+ case TE_UNEXPECTED_TOKEN: return "unexpected token during parsing";
}
return "unknown";
}
-static void print_token_error(struct cursor *cursor)
+static const char *token_type_str(enum token_type type)
{
- int is_chr_data = cursor->err == TE_STR_START_CHAR ||
- cursor->err == TE_SYM_START_CHAR ||
- cursor->err == TE_NUM_START_CHAR ||
- cursor->err == TE_NUM_CHAR ||
- cursor->err == TE_SYM_CHAR;
- printf("\nerror: %s %.*s\n", token_error_string(cursor->err),
- is_chr_data?1:0, (char*)&cursor->err_data.c);
+ switch (type) {
+ case T_OPEN: return "(";
+ case T_CLOSE: return ")";
+ case T_SYMBOL: return "symbol";
+ case T_STRING: return "string";
+ case T_NUMBER: return "number";
+ }
+
+ return "unknown";
+}
+
+void print_token_error(struct cursor *cursor)
+{
+ if (cursor->err == TE_UNEXPECTED_TOKEN) {
+ printf("error: %s: expected '%s' got '%s'\n",
+ token_error_string(cursor->err),
+ token_type_str(cursor->err_data.parse.expected),
+ token_type_str(cursor->err_data.parse.got));
+ }
+ else {
+ int is_chr_data = cursor->err == TE_STR_START_CHAR ||
+ cursor->err == TE_SYM_START_CHAR ||
+ cursor->err == TE_NUM_START_CHAR ||
+ cursor->err == TE_NUM_CHAR ||
+ cursor->err == TE_SYM_CHAR;
+
+ printf("\nerror: %s %.*s\n", token_error_string(cursor->err),
+ is_chr_data?1:0, (char*)&cursor->err_data.c);
+ }
+
}
static int pull_byte(struct cursor *cursor, u8 *c)
@@ -255,21 +304,17 @@ static int pull_escaped_char(struct cursor *cursor, u8 *c)
return 2;
}
-static int pull_number(struct cursor *cursor, u8 *buf, int buf_len)
+static int pull_number(struct cursor *cursor, u8 **start)
{
int ok = 1;
int chars = 0;
u8 c;
struct cursor temp;
- struct cursor buf_cursor;
- temp.p = cursor->p;
+ *start = temp.p = cursor->p;
temp.end = cursor->end;
- buf_cursor.p = buf;
- buf_cursor.end = buf + buf_len;
-
while (1) {
ok = pull_byte(&temp, &c);
if (!ok) return 0;
@@ -287,7 +332,7 @@ static int pull_number(struct cursor *cursor, u8 *buf, int buf_len)
cursor->err_data.c = c;
return 0;
}
- ok = push_byte(&buf_cursor, c);
+
chars++;
if (!ok) {
@@ -296,9 +341,6 @@ static int pull_number(struct cursor *cursor, u8 *buf, int buf_len)
}
}
- ok = push_byte(&buf_cursor, 0);
- chars++;
-
if (!ok) {
cursor->err = TE_SYM_OVERFLOW;
return 0;
@@ -311,25 +353,23 @@ static int pull_number(struct cursor *cursor, u8 *buf, int buf_len)
return chars;
}
-static int pull_string(struct cursor *cursor, u8 *buf, int buf_len)
+static int pull_string(struct cursor *cursor, u8 **start)
{
int ok = 1;
int chars = 0;
u8 c;
struct cursor temp;
- struct cursor buf_cursor;
- temp.p = cursor->p;
- temp.end = cursor->end;
-
- buf_cursor.p = buf;
- buf_cursor.end = buf + buf_len;
+ copy_cursor(cursor, &temp);
while (1) {
ok = pull_escaped_char(&temp, &c);
if (!ok) return 0;
+ if (chars == 1)
+ *start = temp.p;
+
/* first char should start with a letter */
if (chars == 0 && c != '"') {
cursor->err = TE_STR_START_CHAR;
@@ -345,7 +385,6 @@ static int pull_string(struct cursor *cursor, u8 *buf, int buf_len)
break;
}
- ok = push_byte(&buf_cursor, c);
chars++;
if (!ok) {
@@ -354,35 +393,28 @@ static int pull_string(struct cursor *cursor, u8 *buf, int buf_len)
}
}
- ok = push_byte(&buf_cursor, 0);
- chars++;
-
if (!ok) {
cursor->err = TE_SYM_OVERFLOW;
return 0;
}
- cursor->p = temp.p;
- cursor->err = TE_OK;
+ copy_cursor(&temp, cursor);
/* remove the first counted quote since this was not pushed */
return --chars;
}
-static int pull_symbol(struct cursor *cursor, u8 *buf, int buf_len)
+static int pull_symbol(struct cursor *cursor, u8 **start)
{
int ok = 1;
int chars = 0;
u8 c;
struct cursor temp;
- struct cursor sym_cursor;
- temp.p = cursor->p;
- temp.end = cursor->end;
+ copy_cursor(cursor, &temp);
- sym_cursor.p = buf;
- sym_cursor.end = buf + buf_len;
+ *start = temp.p;
while (1) {
ok = pull_byte(&temp, &c);
@@ -402,7 +434,6 @@ static int pull_symbol(struct cursor *cursor, u8 *buf, int buf_len)
return 0;
}
- ok = push_byte(&sym_cursor, c);
chars++;
if (!ok) {
@@ -411,31 +442,27 @@ static int pull_symbol(struct cursor *cursor, u8 *buf, int buf_len)
}
}
- ok = push_byte(&sym_cursor, 0);
- chars++;
-
if (!ok) {
cursor->err = TE_SYM_OVERFLOW;
return 0;
}
- cursor->p = temp.p-1;
- cursor->end = temp.end;
- cursor->err = TE_OK;
+ temp.p--;
+ copy_cursor(&temp, cursor);
return chars;
}
static int read_and_push_atom(struct cursor *cursor, struct cursor *tokens)
{
- u8 buf[255];
struct tok_str str;
+ u8 *start;
int ok;
- ok = pull_symbol(cursor, buf, sizeof(buf));
+ ok = pull_symbol(cursor, &start);
if (ok) {
str.len = ok;
- str.data = buf;
+ str.data = start;
ok = push_symbol(tokens, str);
if (!ok) {
printf("read_and_push_atom identifier push overflow\n");
@@ -444,11 +471,10 @@ static int read_and_push_atom(struct cursor *cursor, struct cursor *tokens)
return 1;
}
-
- ok = pull_string(cursor, buf, sizeof(buf));
+ ok = pull_string(cursor, &start);
if (ok) {
str.len = ok;
- str.data = buf;
+ str.data = start;
ok = push_string(tokens, str);
if (!ok) {
printf("read_and_push_atom string push overflow\n");
@@ -457,10 +483,11 @@ static int read_and_push_atom(struct cursor *cursor, struct cursor *tokens)
return 1;
}
- ok = pull_number(cursor, buf, sizeof(buf));
+ start = cursor->p;
+ ok = pull_number(cursor, &start);
if (ok) {
str.len = ok;
- str.data = buf;
+ str.data = start;
ok = push_number(tokens, str);
if (!ok) {
printf("read_and_push_atom number push overflow\n");
@@ -474,26 +501,23 @@ static int read_and_push_atom(struct cursor *cursor, struct cursor *tokens)
return 0;
}
-int tokenize_space(u8 *buf, int buf_size, u8 *token_buf,
- int token_buf_size, struct cursor *tokens)
+int tokenize_cells(u8 *buf, int buf_size, struct cursor *tokens)
{
enum tok_state state;
struct cursor cursor;
/* u8 *start = buf; */
+ u8 *token_buf = tokens->p;
u8 c;
int ok;
cursor.p = buf;
cursor.end = buf + buf_size;
- tokens.p = token_buf;
- tokens.end = token_buf + token_buf_size;
-
state = TS_OPEN;
while (cursor.p < cursor.end) {
ok = pull_byte(&cursor, &c);
- if (!ok) return 0;
+ if (!ok) break;
if (state == TS_OPEN) {
if (isspace(c))
@@ -533,6 +557,10 @@ int tokenize_space(u8 *buf, int buf_size, u8 *token_buf,
}
}
+ /* just seal the buffer now since we won't be adding to it */
+ tokens->end = tokens->p;
+ tokens->p = token_buf;
+
return 1;
}
@@ -566,20 +594,22 @@ static int pull_token(struct cursor *tokens,
u8 c;
int ok;
- temp.p = tokens->p;
- temp.end = tokens->end;
+ copy_cursor(tokens, &temp);
ok = pull_byte(&temp, &c);
if (!ok) return 0;
type = (enum token_type)c;
- ok = pull_token_data(&temp, token, type);
- if (!ok) {
+ if (type != expected_type) {
+ tokens->err = TE_UNEXPECTED_TOKEN;
+ tokens->err_data.parse.expected = expected_type;
+ tokens->err_data.parse.got = type;
return 0;
}
- if (type != expected_type) {
+ ok = pull_token_data(&temp, token, type);
+ if (!ok) {
return 0;
}
@@ -612,24 +642,108 @@ static int pull_symbol_token(struct cursor *tokens, struct tok_str *str)
return 1;
}
-int parse_cell(struct cursor *tokens)
+static int memeq(void *buf, int buf_len, void *buf2, int buf2_len)
+{
+ if (buf_len != buf2_len)
+ return 0;
+
+ return memcmp(buf, buf2, buf_len) == 0;
+}
+
+static int parse_attribute(struct cursor *tokens, struct attribute attr)
{
- struct cursor tokens;
+ int ok;
struct tok_str str;
+
+ (void)attr;
+
+ ok = pull_symbol_token(tokens, &str);
+ if (!ok) return 0;
+ return 1;
+}
+
+
+/*
+static int parse_attributes(struct cursor *tokens,
+ struct cursor *attributes,
+ struct cursor *attr_ids,
+ int *parsed_attrs)
+{
+ int ok;
+ struct attribute attr;
+
+ while (1) {
+ ok = parse_attribute(tokens, &attr);
+
+ if (!ok) return 0;
+
+ push_data()
+ }
+
+ ok = pull_symbol_token(tokens, &str);
+ if (!ok) return 0;
+}
+*/
+
+/*
+static int parse_group(struct cursor *tokens)
+{
+ int ok;
+ struct tok_str str;
+
+ ok = pull_symbol_token(tokens, &str);
+ if (!ok) return 0;
+
+ if (!memeq(str.data, str.len, "group", 5))
+ return 0;
+
+ parse_attributes(&tokens)
+}
+*/
+
+static int parse_room(struct cursor *tokens, struct cell *cell)
+{
int ok;
+ struct tok_str str;
- tokens.p = token_buf;
- tokens.end = token_buf + token_buf_size;
+ ok = pull_symbol_token(tokens, &str);
+ if (!ok) return 0;
+
+ if (!memeq(str.data, str.len, "room", 5))
+ return 0;
+
+ parse_attributes(&tokens);
+
+ parse_object(tokens, cell);
+}
+
+static int parse_cell(struct cursor *tokens, struct cell *cell)
+{
+ int ok;
+ /* ok = parse_group(tokens, cell); */
+ /* if (ok) return 1; */
+
+ ok = parse_room(tokens, cell);
+ if (ok) return 1;
+
+ /* ok = parse_object(tokens, cell); */
+
+ return 0;
+}
+
+int parse_cells(struct cursor *tokens)
+{
+ struct tok_str str;
+ int ok;
while (1) {
- ok = pull_open(&tokens);
+ ok = pull_open(tokens);
if (!ok) return 0;
/* cell identifier */
- ok = pull_symbol_token(&tokens, &str);
+ ok = parse_cell(tokens, &str);
if (!ok) return 0;
- printf("got token: %.*s\n", str.len, str.data);
}
diff --git a/parse.h b/parse.h
@@ -4,15 +4,6 @@
#include "typedefs.h"
-struct cursor {
- u8 *p;
- u8 *end;
- enum token_error err;
- union {
- char c;
- } err_data;
-};
-
enum token_error {
TE_OK,
TE_STR_START_CHAR,
@@ -20,11 +11,36 @@ enum token_error {
TE_SYM_START_CHAR,
TE_SYM_CHAR,
TE_NUM_CHAR,
+ TE_UNEXPECTED_TOKEN,
TE_SYM_OVERFLOW,
};
+enum token_type {
+ T_OPEN,
+ T_CLOSE,
+ T_STRING,
+ T_SYMBOL,
+ T_NUMBER,
+};
+
+
+struct cursor {
+ u8 *start;
+ u8 *p;
+ u8 *end;
+ enum token_error err;
+ union {
+ char c;
+ struct {
+ enum token_type expected;
+ enum token_type got;
+ } parse;
+ } err_data;
+};
-int tokenize_space(unsigned char *buf, int buf_size, u8 *token_buf, int token_buf_size, struct cursor *tokens);
-int parse_cell(u8 *token_buf, int token_buf_size);
+void make_cursor(u8 *start, u8 *end, struct cursor *cursor);
+int tokenize_cells(unsigned char *buf, int buf_size, struct cursor *tokens);
+int parse_cells(struct cursor *tokens);
+void print_token_error(struct cursor *cursor);
#endif /* PROTOVERSE_PARSE_H */
diff --git a/protoverse.c b/protoverse.c
@@ -2,25 +2,40 @@
#include "io.h"
#include "parse.h"
+#include <assert.h>
int main(int argc, const char *argv[]) {
static u8 file_buf[4096];
static u8 token_buf[2048];
struct cursor tokens;
-
size_t count;
+ const char *space;
+ int ok;
+
+ tokens.p = token_buf;
+ tokens.end = token_buf + sizeof(token_buf);
- const char *space = argc == 2 ? argv[1] : "satoshis-citadel.space";
- int ok = read_file(space, file_buf, sizeof(file_buf), &count);
+ space = argc == 2 ? argv[1] : "satoshis-citadel.space";
+ ok = read_file(space, file_buf, sizeof(file_buf), &count);
if (!ok) {
printf("failed to load '%s'\n", space);
return 1;
}
- tokenize_space(file_buf, count, token_buf, sizeof(token_buf),
- &tokens);
- parse_cell(&tokens);
+ ok = tokenize_cells(file_buf, count, &tokens);
+
+ if (!ok) {
+ printf("failed to tokenize\n");
+ return 1;
+ }
+
+ assert(tokens.p == token_buf);
+
+ ok = parse_cells(&tokens);
+ if (!ok) {
+ print_token_error(&tokens);
+ }
return 0;
}
diff --git a/satoshis-citadel.space b/satoshis-citadel.space
@@ -1,17 +1,13 @@
-(group (name "\"Satoshi's\" Citadel")
- (room (name "Foyer")
- (shape rectangle)
- (width 10) (depth 10) (height 100)
- (group
- (table
- (id welcome-desk)
- (name "Welcome desk")
- (material marble)
- (condition clean new)
- (width 1) (depth 2) (height 1)
- (location center)
- )
- )
- )
-)
+(room (name "\"Satoshi's\" Citadel")
+ (shape rectangle)
+ (width 10) (depth 10) (height 100)
+ (group
+ (table
+ (id welcome-desk)
+ (name "Welcome desk")
+ (material marble)
+ (condition clean new)
+ (width 1) (depth 2) (height 1)
+ (location center)
+ )))