chibipub

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit 207b9b30f42f03270001fbba4419b41093073e98
parent 807f763ef25f1303368177479efedda49d7dcdcf
Author: William Casarin <jb55@jb55.com>
Date:   Sun, 20 Dec 2020 11:37:22 -0800

compact json writer

Diffstat:
Msrc/json.c | 112++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
Msrc/json.h | 11+++++++++--
Msrc/test_json.c | 49++++++++++++++++++++++++++++++++++---------------
Msrc/ubjson.c | 25+++++++++++++++++--------
4 files changed, 162 insertions(+), 35 deletions(-)

diff --git a/src/json.c b/src/json.c @@ -6,6 +6,13 @@ #include <assert.h> #include <ctype.h> +void init_json_pusher(struct json_pusher *p, unsigned char *buf, size_t size) +{ + memset(p, 0, sizeof(*p)); + init_errors(&p->errs); + make_cursor(buf, buf+size, &p->cur); +} + void init_json_handlers(struct json_handlers *h) { memset(h, 0, sizeof(*h)); @@ -254,6 +261,7 @@ static int parse_value(struct json_parser *p); static int parse_kv(struct json_parser *p) { char c; + unsigned int *len; c = 0; if (!parse_string(p)) { @@ -268,6 +276,11 @@ static int parse_kv(struct json_parser *p) return 0; } + if (!(*p->handlers.token)(&p->handlers, ':', &len)) { + note_error(&p->errs, "':' handler"); + return 0; + } + return parse_value(p); } @@ -283,7 +296,7 @@ static int parse_object(struct json_parser *p) return 0; } - if (!(*p->handlers.open)(&p->handlers, '{', &len)) { + if (!(*p->handlers.token)(&p->handlers, '{', &len)) { note_error(&p->errs, "obj handle open", c); return 0; } @@ -298,7 +311,11 @@ static int parse_object(struct json_parser *p) if (c == '}') { p->cur.p++; - return (*p->handlers.close)(&p->handlers, '}', len); + if (!(*p->handlers.token)(&p->handlers, '}', &len)) { + note_error(&p->errs, "'}' handler 1"); + return 0; + } + return 1; } if (c != '"') { @@ -317,13 +334,21 @@ static int parse_object(struct json_parser *p) } if (c == ',') { + if (!(*p->handlers.token)(&p->handlers, ',', &len)) { + note_error(&p->errs, "',' handler"); + return 0; + } p->cur.p++; continue; } if (c == '}') { p->cur.p++; - return (*p->handlers.close)(&p->handlers, '}', len); + if (!(*p->handlers.token)(&p->handlers, '}', &len)) { + note_error(&p->errs, "'}' handler 2"); + return 0; + } + return 1; } note_error(&p->errs, "expected ',' or '}', got '%c'", c); @@ -346,8 +371,8 @@ static int parse_array(struct json_parser *p) return 0; } - if (!(*p->handlers.open)(&p->handlers, '[', &len)) { - note_error(&p->errs, "obj handle open", c); + if (!(*p->handlers.token)(&p->handlers, '[', &len)) { + note_error(&p->errs, "array handle open", c); return 0; } @@ -361,7 +386,7 @@ static int parse_array(struct json_parser *p) if (c == ']') { p->cur.p++; - return (*p->handlers.close)(&p->handlers, ']', len); + return (*p->handlers.token)(&p->handlers, ']', &len); } while(1) { @@ -377,7 +402,7 @@ static int parse_array(struct json_parser *p) if (c == ']') { p->cur.p++; - return (*p->handlers.close)(&p->handlers, ']', len); + return (*p->handlers.token)(&p->handlers, ']', &len); } if (c != ',') { @@ -385,6 +410,11 @@ static int parse_array(struct json_parser *p) return 0; } + if (!(*p->handlers.token)(&p->handlers, ',', &len)) { + note_error(&p->errs, "']' handler"); + return 0; + } + p->cur.p++; } @@ -457,8 +487,9 @@ static int parse_array_or_object(struct json_parser *p) return parse_object(p); } - if (c == '[') + if (c == '[') { return parse_array(p); + } note_error(&p->errs, "expected '{' or '[', got '%c'", c); return 0; @@ -473,8 +504,68 @@ static void copy_json_handlers(struct json_handlers *src, dst->boolean = src->boolean; dst->null = src->null; dst->number = src->number; - dst->open = src->open; - dst->close = src->close; + dst->token = src->token; +} + +static int handle_string(struct json_handlers *h, unsigned char *text, int size) +{ + struct json_pusher *p = (struct json_pusher*)h->data; + + if (!push_byte(&p->cur, '"')) { + note_error(&p->errs, "oob"); + return 0; + } + + /* TODO: re-escape when we have unescaped chars in here? */ + if (!push_data(&p->cur, text, size)) { + note_error(&p->errs, "oob"); + return 0; + } + + if (!push_byte(&p->cur, '"')) { + note_error(&p->errs, "oob"); + return 0; + } + + return 1; +} + +static int handle_bool(struct json_handlers *h, int val) +{ + struct json_pusher *p = (struct json_pusher*)h->data; + return push_str(&p->cur, val? "true" : "false"); +} + +static int handle_null(struct json_handlers *h) +{ + struct json_pusher *p = (struct json_pusher*)h->data; + return push_data(&p->cur, (unsigned char*)"null", 4); +} + +static int handle_number(struct json_handlers *h, unsigned int number) +{ + struct json_pusher *p = (struct json_pusher*)h->data; + static char digits[16]; + int c; + + c = snprintf(digits, sizeof(digits), "%d", number); + return push_data(&p->cur, (unsigned char*)digits, c); +} + +static inline int handle_token(struct json_handlers *h, char token, unsigned int **len) +{ + struct json_pusher *p = (struct json_pusher*)h->data; + return push_byte(&p->cur, token); +} + +void make_compact_handlers(struct json_handlers *h, struct json_pusher *out) +{ + h->data = out; + h->string = handle_string; + h->boolean = handle_bool; + h->null = handle_null; + h->number = handle_number; + h->token = handle_token; } static void init_json_parser(struct json_parser *p, unsigned char *buf, @@ -482,6 +573,7 @@ static void init_json_parser(struct json_parser *p, unsigned char *buf, { memset(p, 0, sizeof(*p)); make_cursor(buf, buf + buf_size, &p->cur); + init_errors(&p->errs); copy_json_handlers(h, &p->handlers); } diff --git a/src/json.h b/src/json.h @@ -6,14 +6,18 @@ #include "errors.h" #include <stddef.h> +struct json_pusher { + struct errors errs; + struct cursor cur; +}; + struct json_handlers { void *data; int (*string)(struct json_handlers *data, unsigned char *text, int size); int (*boolean)(struct json_handlers *data, int val); int (*null)(struct json_handlers *data); int (*number)(struct json_handlers *data, unsigned int number); - int (*open)(struct json_handlers *data, char type, unsigned int **len); - int (*close)(struct json_handlers *data, char type, unsigned int *len); + int (*token)(struct json_handlers *data, char type, unsigned int **len); }; struct json_parser { @@ -22,7 +26,10 @@ struct json_parser { struct json_handlers handlers; }; + +void init_json_pusher(struct json_pusher *p, unsigned char *buf, size_t size); void init_json_handlers(struct json_handlers *h); +void make_compact_handlers(struct json_handlers *h, struct json_pusher *out); int parse_json(unsigned char *buf, size_t buf_size, struct json_parser *, struct json_handlers *); #endif diff --git a/src/test_json.c b/src/test_json.c @@ -5,6 +5,7 @@ #include <assert.h> #include <stdio.h> +#include <string.h> #include <sys/mman.h> #include <sys/stat.h> #include <sys/types.h> @@ -36,16 +37,12 @@ static void write_data(unsigned char *data, int data_size) fclose(file); } -int main(int argc, char *argv[]) +static void test_ubjson_handler(unsigned char *out, size_t outlen) { - unsigned char *out; unsigned char *p; struct ubjson ubjson; struct json val; - size_t flen, outlen; - - outlen = 1024 * 1024 * 80; - out = malloc(outlen); + size_t flen; init_ubjson(&ubjson, out, outlen); @@ -53,28 +50,50 @@ int main(int argc, char *argv[]) assert(parse_ubjson(empty, sizeof(empty), &ubjson)); ubjson.cur.p = ubjson.cur.start; - unsigned char kv[] = "{ \"key\" : {\"obj\":\"123\"} , \"otherkey\": \"stringy\"}"; + unsigned char kv[] = "{ \"key\" : {\"obj\":123} , \"otherkey\": \"stringy\"}"; assert(parse_ubjson(kv, sizeof(kv), &ubjson)); const char *path[] = {"key", "obj"}; assert(ubjson_lookup(&ubjson, path, 2, &val)); - - printf("found val: "); - print_value(&val); - printf("\n---\n"); - + assert(val.number_int == 123); ubjson.cur.p = ubjson.cur.start; + assert(map_file("corpus/math.json", &p, &flen)); assert(ubjson.errs.record); assert(parse_ubjson(p, flen, &ubjson)); - printf("\n---\n"); write_data(ubjson.cur.start, ubjson.data_end - ubjson.cur.start); const char *path2[] = {"meta", "view", "oid"}; assert(ubjson_lookup(&ubjson, path2, 3, &val)); + assert(val.number_int == 26867540); +} + +static void test_compact_handler(unsigned char *out, size_t outlen) +{ + struct json_parser p; + struct json_handlers h; + struct json_pusher pusher; + + init_json_pusher(&pusher, out, outlen); + make_compact_handlers(&h, &pusher); + + unsigned char kv[] = "\n{ \"key\" : {\"obj\":\"123\"} , \"otherkey\": \"stringy\"}\n"; + assert(parse_json(kv, sizeof(kv), &p, &h)); + const char *expected = "{\"key\":{\"obj\":\"123\"},\"otherkey\":\"stringy\"}"; + assert(!memcmp(expected, pusher.cur.start, strlen(expected))); +} + +int main(int argc, char *argv[]) +{ + unsigned char *out; + size_t outlen; + + outlen = 1024 * 1024 * 80; + out = malloc(outlen); + + test_ubjson_handler(out, outlen); + test_compact_handler(out, outlen); - print_value(&val); - printf("\n"); free(out); } diff --git a/src/ubjson.c b/src/ubjson.c @@ -350,36 +350,46 @@ int ubjson_lookup(struct ubjson *ubjson, const char **path, int path_len, struct } -static int handle_ubjson_str(struct json_handlers *h, unsigned char *text, int size) +static inline int handle_ubjson_str(struct json_handlers *h, unsigned char *text, int size) { return push_ubjson_str((struct cursor*)h->data, text, size); } -static int handle_ubjson_bool(struct json_handlers *h, int val) +static inline int handle_ubjson_bool(struct json_handlers *h, int val) { return push_byte((struct cursor*)h->data, val ? 'T' : 'F'); } -static int handle_ubjson_null(struct json_handlers *h) +static inline int handle_ubjson_null(struct json_handlers *h) { return push_ubjson_null((struct cursor*)h->data); } -static int handle_ubjson_number(struct json_handlers *h, unsigned int num) +static inline int handle_ubjson_number(struct json_handlers *h, unsigned int num) { return push_ubjson_num((struct cursor*)h->data, num); } -static int handle_ubjson_open(struct json_handlers *h, char type, unsigned int **len) +static inline int handle_ubjson_open(struct json_handlers *h, char type, unsigned int **len) { return push_ubjson_open((struct cursor*)h->data, type, len); } -static int handle_ubjson_close(struct json_handlers *h, char type, unsigned int *len) +static inline int handle_ubjson_close(struct json_handlers *h, char type, unsigned int *len) { return push_ubjson_close((struct cursor*)h->data, len, type); } +static int handle_ubjson_token(struct json_handlers *h, char token, unsigned int **len) +{ + if (token == '{' || token == '[') + return handle_ubjson_open(h, token, len); + else if (token == '}' || token == ']') + return handle_ubjson_close(h, token, *len); + else + return 1; +} + static void make_ubjson_handlers(struct json_handlers *h, struct cursor *ubjson) { h->data = (void*)ubjson; @@ -388,8 +398,7 @@ static void make_ubjson_handlers(struct json_handlers *h, struct cursor *ubjson) h->boolean = handle_ubjson_bool; h->null = handle_ubjson_null; h->number = handle_ubjson_number; - h->open = handle_ubjson_open; - h->close = handle_ubjson_close; + h->token = handle_ubjson_token; } int parse_ubjson(unsigned char *buf, size_t buf_size, struct ubjson *out)