chibipub

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit 807f763ef25f1303368177479efedda49d7dcdcf
parent 087978681c1aff8cc5dc06e7290a3dda06fa42f5
Author: William Casarin <jb55@jb55.com>
Date:   Sun, 20 Dec 2020 10:56:12 -0800

generalize json parsing

Diffstat:
MMakefile | 4++--
Msrc/json.c | 466++++++++-----------------------------------------------------------------------
Msrc/json.h | 43++++++++++++++-----------------------------
Msrc/test_json.c | 7++++---
Asrc/ubjson.c | 438+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/ubjson.h | 42++++++++++++++++++++++++++++++++++++++++++
6 files changed, 544 insertions(+), 456 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,8 +1,8 @@ -CFLAGS = -Wall -Werror -std=gnu99 -Isrc $(shell pkg-config --cflags openssl) +CFLAGS = -Og -ggdb -Wall -Werror -std=gnu99 -Isrc $(shell pkg-config --cflags openssl) LDFLAGS = $(shell pkg-config --libs openssl) -OBJS = src/http.o src/base64.o src/inbox.o src/json.o +OBJS = src/http.o src/base64.o src/inbox.o src/json.o src/ubjson.o HEADERS = $(wildcard src/*.h) diff --git a/src/json.c b/src/json.c @@ -6,16 +6,9 @@ #include <assert.h> #include <ctype.h> -struct json_parser { - struct cursor cur; - struct cursor ubjson; - struct errors errs; -}; - -void init_ubjson(struct ubjson *ubjson, unsigned char *buf, size_t bufsize) +void init_json_handlers(struct json_handlers *h) { - make_cursor(buf, buf + bufsize, &ubjson->cur); - init_errors(&ubjson->errs); + memset(h, 0, sizeof(*h)); } #define max(a,b) ((a) > (b) ? (a) : (b)) @@ -121,55 +114,6 @@ static int parse_utf8_char(struct cursor *cur, unsigned int *chr) return 0; } -static int push_ubjson_num(struct cursor *ubjson, unsigned int len) -{ - if (len <= 0xFF) { - if (!push_byte(ubjson, 'U')) { - return 0; - } - if (!push_byte(ubjson, (unsigned char)len)) { - return 0; - } - } else if (len <= 0xFFFF) { - if (!push_byte(ubjson, 'I')) { - return 0; - } - /* TODO: big-endian push */ - if (!push_u16(ubjson, (unsigned int)len)) { - return 0; - } - } else { - if (!push_byte(ubjson, 'l')) { - return 0; - } - /* TODO: big-endian push */ - if (!push_int(ubjson, len)) { - return 0; - } - } - return 1; -} - -static int push_ubjson_str(struct cursor *ubjson, unsigned char *text, - unsigned int size) -{ - - if (!push_byte(ubjson, 'S')) { - return 0; - } - - if (!push_ubjson_num(ubjson, size)) { - return 0; - } - - /* TODO push string without escapes */ - if (!push_data(ubjson, text, size)) { - return 0; - } - - return 1; -} - static inline int parse_escape(struct json_parser *p) { p->cur.p++; @@ -198,8 +142,8 @@ static int parse_string(struct json_parser *p) } if (chr == '"') { - /* TODO @generalize push json string */ - return push_ubjson_str(&p->ubjson, start+1, p->cur.p-start-2); + return (*p->handlers.string)(&p->handlers, start+1, + p->cur.p-start-2); } if (chr == '\\' && !parse_escape(p)) { @@ -285,8 +229,7 @@ static int parse_number(struct json_parser *p) } if (c != '.' && (c < '0' || c > '9')) { /* just a 0 */ - push_ubjson_num(&p->ubjson, 0); - return 1; + return (*p->handlers.number)(&p->handlers, 0); } return parse_fraction(p); } else if (c >= '1' && c <= '9') { @@ -295,11 +238,15 @@ static int parse_number(struct json_parser *p) return 0; } - return push_ubjson_num(&p->ubjson, digits * sign); + /* @generalize */ + return (*p->handlers.number)(&p->handlers, digits * sign); } else { note_error(&p->errs, "expected '-', '0' or '1-9', got '%c'", c) return 0; } + + note_error(&p->errs, "impossibru"); + return 0; } static int parse_value(struct json_parser *p); @@ -325,23 +272,6 @@ static int parse_kv(struct json_parser *p) } -static inline int finalize_container(struct json_parser *p, unsigned int *len, char end) -{ - *len = p->ubjson.p + 1 - ((unsigned char*)len)-4; - p->cur.p++; - return push_byte(&p->ubjson, end); -} - -static inline int finalize_array(struct json_parser *p, unsigned int *len) -{ - return finalize_container(p, len, ']'); -} - -static inline int finalize_object(struct json_parser *p, unsigned int *len) -{ - return finalize_container(p, len, '}'); -} - static int parse_object(struct json_parser *p) { char c; @@ -353,17 +283,10 @@ static int parse_object(struct json_parser *p) return 0; } - if (!push_byte(&p->ubjson, '{')) - return 0; - - if (!push_byte(&p->ubjson, 'l')) - return 0; - - /* fill this in later */ - len = (unsigned int *)p->ubjson.p; - - if (!push_int(&p->ubjson, 0)) + if (!(*p->handlers.open)(&p->handlers, '{', &len)) { + note_error(&p->errs, "obj handle open", c); return 0; + } while(1) { consume_whitespace(&p->cur); @@ -374,8 +297,8 @@ static int parse_object(struct json_parser *p) } if (c == '}') { - /* TODO (generalize): finalize_object */ - return finalize_object(p, len); + p->cur.p++; + return (*p->handlers.close)(&p->handlers, '}', len); } if (c != '"') { @@ -399,8 +322,8 @@ static int parse_object(struct json_parser *p) } if (c == '}') { - /* TODO (generalize): finalize_object */ - return finalize_object(p, len); + p->cur.p++; + return (*p->handlers.close)(&p->handlers, '}', len); } note_error(&p->errs, "expected ',' or '}', got '%c'", c); @@ -423,21 +346,8 @@ static int parse_array(struct json_parser *p) return 0; } - if (!push_byte(&p->ubjson, '[')) { - note_error(&p->errs, "push ubjson '[' oob"); - return 0; - } - - if (!push_byte(&p->ubjson, 'l')) { - note_error(&p->errs, "push ubjson 'l' oob"); - return 0; - } - - /* fill this in later */ - len = (unsigned int *)p->ubjson.p; - - if (!push_int(&p->ubjson, 0)) { - note_error(&p->errs, "push ubjson array len oob"); + if (!(*p->handlers.open)(&p->handlers, '[', &len)) { + note_error(&p->errs, "obj handle open", c); return 0; } @@ -450,8 +360,8 @@ static int parse_array(struct json_parser *p) } if (c == ']') { - finalize_array(p, len); - return 1; + p->cur.p++; + return (*p->handlers.close)(&p->handlers, ']', len); } while(1) { @@ -466,7 +376,8 @@ static int parse_array(struct json_parser *p) } if (c == ']') { - return finalize_array(p, len); + p->cur.p++; + return (*p->handlers.close)(&p->handlers, ']', len); } if (c != ',') { @@ -483,22 +394,16 @@ static int parse_array(struct json_parser *p) static inline int parse_bool(struct json_parser *p) { if (parse_str(&p->cur, "true")) { - /* TODO @generalize push true */ - return push_byte(&p->ubjson, 'T'); + return (*p->handlers.boolean)(&p->handlers, 1); } if (parse_str(&p->cur, "false")) { - return push_byte(&p->ubjson, 'F'); + return (*p->handlers.boolean)(&p->handlers, 0); } return 0; } -static inline int push_ubjson_null(struct cursor *ubjson) -{ - return push_byte(ubjson, 'Z'); -} - static int parse_null(struct json_parser *p) { if (!parse_str(&p->cur, "null")) { @@ -506,8 +411,7 @@ static int parse_null(struct json_parser *p) return 0; } - /* TODO @generalize push null */ - return push_ubjson_null(&p->ubjson); + return (*p->handlers.null)(&p->handlers); } static int parse_value(struct json_parser *p) @@ -560,313 +464,31 @@ static int parse_array_or_object(struct json_parser *p) return 0; } -static int parse_ubjson_sized_len(struct ubjson *ubjson, unsigned int *len, unsigned int *size) -{ - unsigned char byte; - unsigned short u16; - unsigned char *start; - - start = ubjson->cur.p; - - if (!pull_byte(&ubjson->cur, &byte)) { - note_error(&ubjson->errs, "oob"); - return 0; - } else if (byte == 'U' || byte == 'i') { - if (!pull_byte(&ubjson->cur, &byte)) { - note_error(&ubjson->errs, "pull byte after u8 int"); - } else { - if (size) - *size = 1; - *len = byte; - return 1; - } - } else if (byte == 'I') { - if (!pull_data(&ubjson->cur, (unsigned char*)&u16, sizeof(u16))) { - note_error(&ubjson->errs, "pull byte after u16 int"); - } else { - if (size) - *size = 2; - *len = u16; - return 1; - } - } else if (byte == 'l' || byte == 'L') { - if (!pull_int(&ubjson->cur, (int*)len)) { - note_error(&ubjson->errs, "pull byte after u16 int"); - } else { - if (size) - *size = 4; - return 1; - } - } else { - note_error(&ubjson->errs, "unhandled number tag '%c'", byte); - } - - ubjson->cur.p = start; - return 0; -} - -static inline int parse_ubjson_len(struct ubjson *ubjson, unsigned int *len) -{ - return parse_ubjson_sized_len(ubjson, len, NULL); -} - -static int parse_ubjson_string(struct ubjson *ubjson, struct json *val) -{ - char byte; - byte = 0; - - if (!parse_char(&ubjson->cur, &byte, 'S')) { - note_error(&ubjson->errs, "expected S tag, got '%c'", byte); - return 0; - } - - if (!parse_ubjson_len(ubjson, &val->len)) { - note_error(&ubjson->errs, "size"); - return 0; - } - - val->type = JSON_STRING; - val->string = (char*)ubjson->cur.p; - - ubjson->cur.p += val->len; - - return 1; -} - -static inline int is_number_tag(unsigned char tag) -{ - return tag == 'U' || tag == 'l' || tag == 'I' || tag == 'L' || tag == 'i'; -} - -static inline int valid_ubjson_tag(char c) -{ - return c == 'S' || - c == '{' || - c == '[' || - c == 'T' || - c == 'F' || - c == 'Z' || is_number_tag(c); -} - -static inline void copy_ubjson(struct ubjson *src, struct ubjson *dst) { - copy_cursor(&src->cur, &dst->cur); - copy_errors(&src->errs, &dst->errs); - dst->data_end = src->data_end; -} - -static int parse_ubjson_object(struct ubjson *ubjson, struct json *val) -{ - char c; - c = 0; - - val->type = JSON_OBJECT; - assert(ubjson != &val->container); - copy_ubjson(ubjson, &val->container); - - if (!parse_char(&ubjson->cur, &c, '{')) { - note_error(&ubjson->errs, "expected '{' tag, got '%c'", c); - return 0; - } - - if (!parse_ubjson_len(ubjson, &val->len)) { - /* reset */ - ubjson->cur.p = val->container.cur.p; - note_error(&ubjson->errs, "object len"); - return 0; - } - - if (ubjson->cur.p + val->len >= ubjson->data_end) { - /* reset */ - ubjson->cur.p = val->container.cur.p; - note_error(&ubjson->errs, "invalid len %d", val->len); - return 0; - } - - ubjson->cur.p += val->len; - - return 1; -} - -static int parse_ubjson_number(struct ubjson *ubjson, struct json *val) -{ - if (!parse_ubjson_sized_len(ubjson, &val->number_int, &val->len)) { - note_error(&ubjson->errs, ""); - return 0; - } - - val->type = JSON_NUMBER_INT; - return 1; -} - -static int parse_ubjson_bool(struct ubjson *ubjson, struct json *val) -{ - unsigned char byte; - if (!pull_byte(&ubjson->cur, &byte)) { - note_error(&ubjson->errs, "pull byte oob"); - return 0; - } - - if (byte != 'T' && byte != 'F') { - note_error(&ubjson->errs, "invalid bool tag: '%c'", byte); - return 0; - } - - val->type = JSON_BOOL; - val->len = 0; - val->boolean = byte == 'T'; - return 1; -} - -int parse_ubjson_value(struct ubjson *ubjson, struct json *val) -{ - char tag; - assert(&ubjson->cur != &val->container.cur); - if (!peek_char(&ubjson->cur, &tag)) { - note_error(&ubjson->errs, "peek value tag oob"); - return 0; - } - - if (tag == 'Z') { - val->type = JSON_NULL; - ubjson->cur.p++; - return 1; - } else if (tag == 'S') { - return parse_ubjson_string(ubjson, val); - } else if (tag == '{') { - return parse_ubjson_object(ubjson, val); - } else if (is_number_tag(tag)) { - return parse_ubjson_number(ubjson, val); - } else if (tag == 'F' || tag == 'T') { - return parse_ubjson_bool(ubjson, val); - } - - note_error(&ubjson->errs, "unhandled type '%c'", tag); - return 0; -} - -static int ubjson_obj_lookup(struct ubjson *ubjson, const char *path, struct json *val) -{ - char byte; - unsigned int len; - struct json blackhole; - byte = 0; - assert(&ubjson->cur != &val->container.cur); - - if (!parse_char(&ubjson->cur, &byte, '{')) { - note_error(&ubjson->errs, "no object tag, got '%c'", byte); - return 0; - } - - if (!parse_ubjson_len(ubjson, &len)) { - note_error(&ubjson->errs, "object size"); - return 0; - } - - while (1) { - if (!peek_char(&ubjson->cur, &byte)) { - note_error(&ubjson->errs, "oob"); - break; - } - - if (byte == '}') { - note_error(&ubjson->errs, "not found") - break; - } - - if (!parse_ubjson_string(ubjson, val)) { - note_error(&ubjson->errs, "string"); - break; - } - - if (strlen(path) != val->len || - memcmp(path, val->string, val->len)) { - /* skip over value */ - if (!parse_ubjson_value(ubjson, &blackhole)) { - note_error(&ubjson->errs, "skip value"); - return 0; - } - - continue; - } - - return parse_ubjson_value(ubjson, val); - } - note_error(&ubjson->errs, "not found"); - ubjson->cur.p = ubjson->cur.start; - return 0; -} - -int ubjson_lookup(struct ubjson *ubjson, const char **path, int path_len, struct json *val) +static void copy_json_handlers(struct json_handlers *src, + struct json_handlers *dst) { - int i; - const char *seg; - char byte; - struct ubjson next; - - byte = 0; - copy_ubjson(ubjson, &next); - - for (i = 0; i < path_len; i++) { - seg = path[i]; - - if (!ubjson_obj_lookup(&next, seg, val)) { - note_error(&ubjson->errs, "lookup path segment: '%s'", seg); - return 0; - } - - /* not at the last segment and don't have an object or array */ - if (i != path_len-1 && val->type != JSON_OBJECT) { - note_error(&ubjson->errs, - "segment '%s' not an object, got '%c'", seg, byte); - return 0; - } else if (val->type == JSON_OBJECT) { - copy_ubjson(&val->container, &next); - } - } - - return 1; + dst->data = src->data; + dst->string = src->string; + dst->boolean = src->boolean; + dst->null = src->null; + dst->number = src->number; + dst->open = src->open; + dst->close = src->close; } -void print_value(struct json *val) +static void init_json_parser(struct json_parser *p, unsigned char *buf, + size_t buf_size, struct json_handlers *h) { - switch (val->type) { - case JSON_STRING: - printf("%.*s", val->len, val->string); - return; - case JSON_NULL: - printf("null"); - return; - case JSON_NUMBER_INT: - printf("%d", val->number_int); - return; - case JSON_NUMBER: - printf("%f", val->number); - return; - case JSON_BOOL: - printf("%s", val->boolean? "true" : "false"); - return; - default: - printf("implement print %d", val->type); - } + memset(p, 0, sizeof(*p)); + make_cursor(buf, buf + buf_size, &p->cur); + copy_json_handlers(h, &p->handlers); } -int parse_json(unsigned char *buf, size_t buf_size, struct ubjson *out) +int parse_json(unsigned char *buf, size_t buf_size, struct json_parser *p, + struct json_handlers *handlers) { - int res; - struct json_parser p; - - memset(&p, 0, sizeof(p)); - - make_cursor(buf, buf + buf_size, &p.cur); - copy_cursor(&out->cur, &p.ubjson); - - res = parse_array_or_object(&p); - - out->data_end = p.ubjson.p; - out->cur.p = out->cur.start; - - return res; + init_json_parser(p, buf, buf_size, handlers); + return parse_array_or_object(p); } - diff --git a/src/json.h b/src/json.h @@ -6,38 +6,23 @@ #include "errors.h" #include <stddef.h> -struct ubjson { - struct cursor cur; - unsigned char *data_end; - struct errors errs; -}; - -enum json_value_type { - JSON_NUMBER, - JSON_NUMBER_INT, - JSON_STRING, - JSON_NULL, - JSON_BOOL, - JSON_OBJECT, - JSON_ARRAY, +struct json_handlers { + void *data; + int (*string)(struct json_handlers *data, unsigned char *text, int size); + int (*boolean)(struct json_handlers *data, int val); + int (*null)(struct json_handlers *data); + int (*number)(struct json_handlers *data, unsigned int number); + int (*open)(struct json_handlers *data, char type, unsigned int **len); + int (*close)(struct json_handlers *data, char type, unsigned int *len); }; -struct json { - enum json_value_type type; - unsigned int len; - union { - struct ubjson container; - double number; - unsigned int number_int; - const char *string; - int boolean; - }; +struct json_parser { + struct cursor cur; + struct errors errs; + struct json_handlers handlers; }; -void init_ubjson(struct ubjson *ubjson, unsigned char *buf, size_t bufsize); -int print_ubjson(struct ubjson *json); -int parse_json(unsigned char *buf, size_t buf_size, struct ubjson *out); -int ubjson_lookup(struct ubjson *ubjson, const char **path, int path_len, struct json *val); -void print_value(struct json *val); +void init_json_handlers(struct json_handlers *h); +int parse_json(unsigned char *buf, size_t buf_size, struct json_parser *, struct json_handlers *); #endif diff --git a/src/test_json.c b/src/test_json.c @@ -1,5 +1,6 @@ #include "json.h" +#include "ubjson.h" #include "cursor.h" #include <assert.h> @@ -49,11 +50,11 @@ int main(int argc, char *argv[]) init_ubjson(&ubjson, out, outlen); unsigned char empty[] = "{}"; - assert(parse_json(empty, sizeof(empty), &ubjson)); + assert(parse_ubjson(empty, sizeof(empty), &ubjson)); ubjson.cur.p = ubjson.cur.start; unsigned char kv[] = "{ \"key\" : {\"obj\":\"123\"} , \"otherkey\": \"stringy\"}"; - assert(parse_json(kv, sizeof(kv), &ubjson)); + assert(parse_ubjson(kv, sizeof(kv), &ubjson)); const char *path[] = {"key", "obj"}; assert(ubjson_lookup(&ubjson, path, 2, &val)); @@ -65,7 +66,7 @@ int main(int argc, char *argv[]) ubjson.cur.p = ubjson.cur.start; assert(map_file("corpus/math.json", &p, &flen)); assert(ubjson.errs.record); - parse_json(p, flen, &ubjson); + assert(parse_ubjson(p, flen, &ubjson)); printf("\n---\n"); write_data(ubjson.cur.start, ubjson.data_end - ubjson.cur.start); diff --git a/src/ubjson.c b/src/ubjson.c @@ -0,0 +1,438 @@ + +#include "ubjson.h" +#include "json.h" +#include "parse.h" +#include <assert.h> + +static inline int push_ubjson_null(struct cursor *ubjson) +{ + return push_byte(ubjson, 'Z'); +} + +static inline int is_number_tag(unsigned char tag) +{ + return tag == 'U' || tag == 'l' || tag == 'I' || tag == 'L' || tag == 'i'; +} + +static int parse_ubjson_sized_len(struct ubjson *ubjson, unsigned int *len, unsigned int *size) +{ + unsigned char byte; + unsigned short u16; + unsigned char *start; + + start = ubjson->cur.p; + + if (!pull_byte(&ubjson->cur, &byte)) { + note_error(&ubjson->errs, "oob"); + return 0; + } else if (byte == 'U' || byte == 'i') { + if (!pull_byte(&ubjson->cur, &byte)) { + note_error(&ubjson->errs, "pull byte after u8 int"); + } else { + if (size) + *size = 1; + *len = byte; + return 1; + } + } else if (byte == 'I') { + if (!pull_data(&ubjson->cur, (unsigned char*)&u16, sizeof(u16))) { + note_error(&ubjson->errs, "pull byte after u16 int"); + } else { + if (size) + *size = 2; + *len = u16; + return 1; + } + } else if (byte == 'l' || byte == 'L') { + if (!pull_int(&ubjson->cur, (int*)len)) { + note_error(&ubjson->errs, "pull byte after u16 int"); + } else { + if (size) + *size = 4; + return 1; + } + } else { + note_error(&ubjson->errs, "unhandled number tag '%c'", byte); + } + + ubjson->cur.p = start; + return 0; +} + +static inline int parse_ubjson_len(struct ubjson *ubjson, unsigned int *len) +{ + return parse_ubjson_sized_len(ubjson, len, NULL); +} + +static int parse_ubjson_string(struct ubjson *ubjson, struct json *val) +{ + char byte; + byte = 0; + + if (!parse_char(&ubjson->cur, &byte, 'S')) { + note_error(&ubjson->errs, "expected S tag, got '%c'", byte); + return 0; + } + + if (!parse_ubjson_len(ubjson, &val->len)) { + note_error(&ubjson->errs, "size"); + return 0; + } + + val->type = JSON_STRING; + val->string = (char*)ubjson->cur.p; + + ubjson->cur.p += val->len; + + return 1; +} + +static inline int push_ubjson_close(struct cursor *ubjson, unsigned int *len, char end) +{ + *len = ubjson->p + 1 - ((unsigned char*)len)-4; + return push_byte(ubjson, end); +} + +static int push_ubjson_open(struct cursor *ubjson, char typ, unsigned int **len) +{ + if (!push_byte(ubjson, typ)) + return 0; + + if (!push_byte(ubjson, 'l')) + return 0; + + /* fill this in later */ + *len = (unsigned int *)ubjson->p; + + if (!push_int(ubjson, 0)) + return 0; + + return 1; +} + +static int push_ubjson_num(struct cursor *ubjson, unsigned int len) +{ + if (len <= 0xFF) { + if (!push_byte(ubjson, 'U')) { + return 0; + } + if (!push_byte(ubjson, (unsigned char)len)) { + return 0; + } + } else if (len <= 0xFFFF) { + if (!push_byte(ubjson, 'I')) { + return 0; + } + /* TODO: big-endian push */ + if (!push_u16(ubjson, (unsigned int)len)) { + return 0; + } + } else { + if (!push_byte(ubjson, 'l')) { + return 0; + } + /* TODO: big-endian push */ + if (!push_int(ubjson, len)) { + return 0; + } + } + return 1; +} + +static int push_ubjson_str(struct cursor *ubjson, unsigned char *text, + unsigned int size) +{ + + if (!push_byte(ubjson, 'S')) { + return 0; + } + + if (!push_ubjson_num(ubjson, size)) { + return 0; + } + + /* TODO push string without escapes */ + if (!push_data(ubjson, text, size)) { + return 0; + } + + return 1; +} + +static inline int valid_ubjson_tag(char c) +{ + return c == 'S' || + c == '{' || + c == '[' || + c == 'T' || + c == 'F' || + c == 'Z' || is_number_tag(c); +} + +static inline void copy_ubjson(struct ubjson *src, struct ubjson *dst) { + copy_cursor(&src->cur, &dst->cur); + copy_errors(&src->errs, &dst->errs); + dst->data_end = src->data_end; +} + +static int parse_ubjson_object(struct ubjson *ubjson, struct json *val) +{ + char c; + c = 0; + + val->type = JSON_OBJECT; + assert(ubjson != &val->container); + copy_ubjson(ubjson, &val->container); + + if (!parse_char(&ubjson->cur, &c, '{')) { + note_error(&ubjson->errs, "expected '{' tag, got '%c'", c); + return 0; + } + + if (!parse_ubjson_len(ubjson, &val->len)) { + /* reset */ + ubjson->cur.p = val->container.cur.p; + note_error(&ubjson->errs, "object len"); + return 0; + } + + if (ubjson->cur.p + val->len >= ubjson->data_end) { + /* reset */ + ubjson->cur.p = val->container.cur.p; + note_error(&ubjson->errs, "invalid len %d", val->len); + return 0; + } + + ubjson->cur.p += val->len; + + return 1; +} + +static int parse_ubjson_number(struct ubjson *ubjson, struct json *val) +{ + if (!parse_ubjson_sized_len(ubjson, &val->number_int, &val->len)) { + note_error(&ubjson->errs, ""); + return 0; + } + + val->type = JSON_NUMBER_INT; + return 1; +} + +static int parse_ubjson_bool(struct ubjson *ubjson, struct json *val) +{ + unsigned char byte; + if (!pull_byte(&ubjson->cur, &byte)) { + note_error(&ubjson->errs, "pull byte oob"); + return 0; + } + + if (byte != 'T' && byte != 'F') { + note_error(&ubjson->errs, "invalid bool tag: '%c'", byte); + return 0; + } + + val->type = JSON_BOOL; + val->len = 0; + val->boolean = byte == 'T'; + return 1; +} + +int parse_ubjson_value(struct ubjson *ubjson, struct json *val) +{ + char tag; + assert(&ubjson->cur != &val->container.cur); + if (!peek_char(&ubjson->cur, &tag)) { + note_error(&ubjson->errs, "peek value tag oob"); + return 0; + } + + if (tag == 'Z') { + val->type = JSON_NULL; + ubjson->cur.p++; + return 1; + } else if (tag == 'S') { + return parse_ubjson_string(ubjson, val); + } else if (tag == '{') { + return parse_ubjson_object(ubjson, val); + } else if (is_number_tag(tag)) { + return parse_ubjson_number(ubjson, val); + } else if (tag == 'F' || tag == 'T') { + return parse_ubjson_bool(ubjson, val); + } + + note_error(&ubjson->errs, "unhandled type '%c'", tag); + return 0; +} + +static int ubjson_obj_lookup(struct ubjson *ubjson, const char *path, struct json *val) +{ + char byte; + unsigned int len; + struct json blackhole; + byte = 0; + assert(&ubjson->cur != &val->container.cur); + + if (!parse_char(&ubjson->cur, &byte, '{')) { + note_error(&ubjson->errs, "no object tag, got '%c'", byte); + return 0; + } + + if (!parse_ubjson_len(ubjson, &len)) { + note_error(&ubjson->errs, "object size"); + return 0; + } + + while (1) { + if (!peek_char(&ubjson->cur, &byte)) { + note_error(&ubjson->errs, "oob"); + break; + } + + if (byte == '}') { + note_error(&ubjson->errs, "not found") + break; + } + + if (!parse_ubjson_string(ubjson, val)) { + note_error(&ubjson->errs, "string"); + break; + } + + if (strlen(path) != val->len || + memcmp(path, val->string, val->len)) { + /* skip over value */ + if (!parse_ubjson_value(ubjson, &blackhole)) { + note_error(&ubjson->errs, "skip value"); + return 0; + } + + continue; + } + + return parse_ubjson_value(ubjson, val); + } + + note_error(&ubjson->errs, "not found"); + ubjson->cur.p = ubjson->cur.start; + return 0; +} + +int ubjson_lookup(struct ubjson *ubjson, const char **path, int path_len, struct json *val) +{ + int i; + const char *seg; + char byte; + struct ubjson next; + + byte = 0; + copy_ubjson(ubjson, &next); + + for (i = 0; i < path_len; i++) { + seg = path[i]; + + if (!ubjson_obj_lookup(&next, seg, val)) { + note_error(&ubjson->errs, "lookup path segment: '%s'", seg); + return 0; + } + + /* not at the last segment and don't have an object or array */ + if (i != path_len-1 && val->type != JSON_OBJECT) { + note_error(&ubjson->errs, + "segment '%s' not an object, got '%c'", seg, byte); + return 0; + } else if (val->type == JSON_OBJECT) { + copy_ubjson(&val->container, &next); + } + } + + return 1; +} + + +static int handle_ubjson_str(struct json_handlers *h, unsigned char *text, int size) +{ + return push_ubjson_str((struct cursor*)h->data, text, size); +} + +static int handle_ubjson_bool(struct json_handlers *h, int val) +{ + return push_byte((struct cursor*)h->data, val ? 'T' : 'F'); +} + +static int handle_ubjson_null(struct json_handlers *h) +{ + return push_ubjson_null((struct cursor*)h->data); +} + +static int handle_ubjson_number(struct json_handlers *h, unsigned int num) +{ + return push_ubjson_num((struct cursor*)h->data, num); +} + +static int handle_ubjson_open(struct json_handlers *h, char type, unsigned int **len) +{ + return push_ubjson_open((struct cursor*)h->data, type, len); +} + +static int handle_ubjson_close(struct json_handlers *h, char type, unsigned int *len) +{ + return push_ubjson_close((struct cursor*)h->data, len, type); +} + +static void make_ubjson_handlers(struct json_handlers *h, struct cursor *ubjson) +{ + h->data = (void*)ubjson; + + h->string = handle_ubjson_str; + h->boolean = handle_ubjson_bool; + h->null = handle_ubjson_null; + h->number = handle_ubjson_number; + h->open = handle_ubjson_open; + h->close = handle_ubjson_close; +} + +int parse_ubjson(unsigned char *buf, size_t buf_size, struct ubjson *out) +{ + struct json_parser p; + struct json_handlers h; + int ok; + + make_ubjson_handlers(&h, &out->cur); + + ok = parse_json(buf, buf_size, &p, &h); + + out->data_end = out->cur.p; + out->cur.p = out->cur.start; + + return ok; +} + +void init_ubjson(struct ubjson *ubjson, unsigned char *buf, size_t bufsize) +{ + make_cursor(buf, buf + bufsize, &ubjson->cur); + init_errors(&ubjson->errs); +} + +void print_value(struct json *val) +{ + switch (val->type) { + case JSON_STRING: + printf("%.*s", val->len, val->string); + return; + case JSON_NULL: + printf("null"); + return; + case JSON_NUMBER_INT: + printf("%d", val->number_int); + return; + case JSON_NUMBER: + printf("%f", val->number); + return; + case JSON_BOOL: + printf("%s", val->boolean? "true" : "false"); + return; + default: + printf("implement print %d", val->type); + } +} diff --git a/src/ubjson.h b/src/ubjson.h @@ -0,0 +1,42 @@ + +#ifndef WOLFSOCKS_UBJSON +#define WOLFSOCKS_UBJSON + +#include "cursor.h" +#include "errors.h" + +struct ubjson { + struct cursor cur; + unsigned char *data_end; + struct errors errs; +}; + +enum json_value_type { + JSON_NUMBER, + JSON_NUMBER_INT, + JSON_STRING, + JSON_NULL, + JSON_BOOL, + JSON_OBJECT, + JSON_ARRAY, +}; + +struct json { + enum json_value_type type; + unsigned int len; + union { + struct ubjson container; + double number; + unsigned int number_int; + const char *string; + int boolean; + }; +}; + +void init_ubjson(struct ubjson *ubjson, unsigned char *buf, size_t bufsize); +int print_ubjson(struct ubjson *json); +int parse_ubjson(unsigned char *buf, size_t buf_size, struct ubjson *out); +int ubjson_lookup(struct ubjson *ubjson, const char **path, int path_len, struct json *val); +void print_value(struct json *val); + +#endif /* WOLFSOCKS_UBJSON */