commit 807f763ef25f1303368177479efedda49d7dcdcf
parent 087978681c1aff8cc5dc06e7290a3dda06fa42f5
Author: William Casarin <jb55@jb55.com>
Date: Sun, 20 Dec 2020 10:56:12 -0800
generalize json parsing
Diffstat:
M | Makefile | | | 4 | ++-- |
M | src/json.c | | | 466 | ++++++++----------------------------------------------------------------------- |
M | src/json.h | | | 43 | ++++++++++++++----------------------------- |
M | src/test_json.c | | | 7 | ++++--- |
A | src/ubjson.c | | | 438 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | src/ubjson.h | | | 42 | ++++++++++++++++++++++++++++++++++++++++++ |
6 files changed, 544 insertions(+), 456 deletions(-)
diff --git a/Makefile b/Makefile
@@ -1,8 +1,8 @@
-CFLAGS = -Wall -Werror -std=gnu99 -Isrc $(shell pkg-config --cflags openssl)
+CFLAGS = -Og -ggdb -Wall -Werror -std=gnu99 -Isrc $(shell pkg-config --cflags openssl)
LDFLAGS = $(shell pkg-config --libs openssl)
-OBJS = src/http.o src/base64.o src/inbox.o src/json.o
+OBJS = src/http.o src/base64.o src/inbox.o src/json.o src/ubjson.o
HEADERS = $(wildcard src/*.h)
diff --git a/src/json.c b/src/json.c
@@ -6,16 +6,9 @@
#include <assert.h>
#include <ctype.h>
-struct json_parser {
- struct cursor cur;
- struct cursor ubjson;
- struct errors errs;
-};
-
-void init_ubjson(struct ubjson *ubjson, unsigned char *buf, size_t bufsize)
+void init_json_handlers(struct json_handlers *h)
{
- make_cursor(buf, buf + bufsize, &ubjson->cur);
- init_errors(&ubjson->errs);
+ memset(h, 0, sizeof(*h));
}
#define max(a,b) ((a) > (b) ? (a) : (b))
@@ -121,55 +114,6 @@ static int parse_utf8_char(struct cursor *cur, unsigned int *chr)
return 0;
}
-static int push_ubjson_num(struct cursor *ubjson, unsigned int len)
-{
- if (len <= 0xFF) {
- if (!push_byte(ubjson, 'U')) {
- return 0;
- }
- if (!push_byte(ubjson, (unsigned char)len)) {
- return 0;
- }
- } else if (len <= 0xFFFF) {
- if (!push_byte(ubjson, 'I')) {
- return 0;
- }
- /* TODO: big-endian push */
- if (!push_u16(ubjson, (unsigned int)len)) {
- return 0;
- }
- } else {
- if (!push_byte(ubjson, 'l')) {
- return 0;
- }
- /* TODO: big-endian push */
- if (!push_int(ubjson, len)) {
- return 0;
- }
- }
- return 1;
-}
-
-static int push_ubjson_str(struct cursor *ubjson, unsigned char *text,
- unsigned int size)
-{
-
- if (!push_byte(ubjson, 'S')) {
- return 0;
- }
-
- if (!push_ubjson_num(ubjson, size)) {
- return 0;
- }
-
- /* TODO push string without escapes */
- if (!push_data(ubjson, text, size)) {
- return 0;
- }
-
- return 1;
-}
-
static inline int parse_escape(struct json_parser *p)
{
p->cur.p++;
@@ -198,8 +142,8 @@ static int parse_string(struct json_parser *p)
}
if (chr == '"') {
- /* TODO @generalize push json string */
- return push_ubjson_str(&p->ubjson, start+1, p->cur.p-start-2);
+ return (*p->handlers.string)(&p->handlers, start+1,
+ p->cur.p-start-2);
}
if (chr == '\\' && !parse_escape(p)) {
@@ -285,8 +229,7 @@ static int parse_number(struct json_parser *p)
}
if (c != '.' && (c < '0' || c > '9')) {
/* just a 0 */
- push_ubjson_num(&p->ubjson, 0);
- return 1;
+ return (*p->handlers.number)(&p->handlers, 0);
}
return parse_fraction(p);
} else if (c >= '1' && c <= '9') {
@@ -295,11 +238,15 @@ static int parse_number(struct json_parser *p)
return 0;
}
- return push_ubjson_num(&p->ubjson, digits * sign);
+ /* @generalize */
+ return (*p->handlers.number)(&p->handlers, digits * sign);
} else {
note_error(&p->errs, "expected '-', '0' or '1-9', got '%c'", c)
return 0;
}
+
+ note_error(&p->errs, "impossibru");
+ return 0;
}
static int parse_value(struct json_parser *p);
@@ -325,23 +272,6 @@ static int parse_kv(struct json_parser *p)
}
-static inline int finalize_container(struct json_parser *p, unsigned int *len, char end)
-{
- *len = p->ubjson.p + 1 - ((unsigned char*)len)-4;
- p->cur.p++;
- return push_byte(&p->ubjson, end);
-}
-
-static inline int finalize_array(struct json_parser *p, unsigned int *len)
-{
- return finalize_container(p, len, ']');
-}
-
-static inline int finalize_object(struct json_parser *p, unsigned int *len)
-{
- return finalize_container(p, len, '}');
-}
-
static int parse_object(struct json_parser *p)
{
char c;
@@ -353,17 +283,10 @@ static int parse_object(struct json_parser *p)
return 0;
}
- if (!push_byte(&p->ubjson, '{'))
- return 0;
-
- if (!push_byte(&p->ubjson, 'l'))
- return 0;
-
- /* fill this in later */
- len = (unsigned int *)p->ubjson.p;
-
- if (!push_int(&p->ubjson, 0))
+ if (!(*p->handlers.open)(&p->handlers, '{', &len)) {
+ note_error(&p->errs, "obj handle open", c);
return 0;
+ }
while(1) {
consume_whitespace(&p->cur);
@@ -374,8 +297,8 @@ static int parse_object(struct json_parser *p)
}
if (c == '}') {
- /* TODO (generalize): finalize_object */
- return finalize_object(p, len);
+ p->cur.p++;
+ return (*p->handlers.close)(&p->handlers, '}', len);
}
if (c != '"') {
@@ -399,8 +322,8 @@ static int parse_object(struct json_parser *p)
}
if (c == '}') {
- /* TODO (generalize): finalize_object */
- return finalize_object(p, len);
+ p->cur.p++;
+ return (*p->handlers.close)(&p->handlers, '}', len);
}
note_error(&p->errs, "expected ',' or '}', got '%c'", c);
@@ -423,21 +346,8 @@ static int parse_array(struct json_parser *p)
return 0;
}
- if (!push_byte(&p->ubjson, '[')) {
- note_error(&p->errs, "push ubjson '[' oob");
- return 0;
- }
-
- if (!push_byte(&p->ubjson, 'l')) {
- note_error(&p->errs, "push ubjson 'l' oob");
- return 0;
- }
-
- /* fill this in later */
- len = (unsigned int *)p->ubjson.p;
-
- if (!push_int(&p->ubjson, 0)) {
- note_error(&p->errs, "push ubjson array len oob");
+ if (!(*p->handlers.open)(&p->handlers, '[', &len)) {
+ note_error(&p->errs, "obj handle open", c);
return 0;
}
@@ -450,8 +360,8 @@ static int parse_array(struct json_parser *p)
}
if (c == ']') {
- finalize_array(p, len);
- return 1;
+ p->cur.p++;
+ return (*p->handlers.close)(&p->handlers, ']', len);
}
while(1) {
@@ -466,7 +376,8 @@ static int parse_array(struct json_parser *p)
}
if (c == ']') {
- return finalize_array(p, len);
+ p->cur.p++;
+ return (*p->handlers.close)(&p->handlers, ']', len);
}
if (c != ',') {
@@ -483,22 +394,16 @@ static int parse_array(struct json_parser *p)
static inline int parse_bool(struct json_parser *p)
{
if (parse_str(&p->cur, "true")) {
- /* TODO @generalize push true */
- return push_byte(&p->ubjson, 'T');
+ return (*p->handlers.boolean)(&p->handlers, 1);
}
if (parse_str(&p->cur, "false")) {
- return push_byte(&p->ubjson, 'F');
+ return (*p->handlers.boolean)(&p->handlers, 0);
}
return 0;
}
-static inline int push_ubjson_null(struct cursor *ubjson)
-{
- return push_byte(ubjson, 'Z');
-}
-
static int parse_null(struct json_parser *p)
{
if (!parse_str(&p->cur, "null")) {
@@ -506,8 +411,7 @@ static int parse_null(struct json_parser *p)
return 0;
}
- /* TODO @generalize push null */
- return push_ubjson_null(&p->ubjson);
+ return (*p->handlers.null)(&p->handlers);
}
static int parse_value(struct json_parser *p)
@@ -560,313 +464,31 @@ static int parse_array_or_object(struct json_parser *p)
return 0;
}
-static int parse_ubjson_sized_len(struct ubjson *ubjson, unsigned int *len, unsigned int *size)
-{
- unsigned char byte;
- unsigned short u16;
- unsigned char *start;
-
- start = ubjson->cur.p;
-
- if (!pull_byte(&ubjson->cur, &byte)) {
- note_error(&ubjson->errs, "oob");
- return 0;
- } else if (byte == 'U' || byte == 'i') {
- if (!pull_byte(&ubjson->cur, &byte)) {
- note_error(&ubjson->errs, "pull byte after u8 int");
- } else {
- if (size)
- *size = 1;
- *len = byte;
- return 1;
- }
- } else if (byte == 'I') {
- if (!pull_data(&ubjson->cur, (unsigned char*)&u16, sizeof(u16))) {
- note_error(&ubjson->errs, "pull byte after u16 int");
- } else {
- if (size)
- *size = 2;
- *len = u16;
- return 1;
- }
- } else if (byte == 'l' || byte == 'L') {
- if (!pull_int(&ubjson->cur, (int*)len)) {
- note_error(&ubjson->errs, "pull byte after u16 int");
- } else {
- if (size)
- *size = 4;
- return 1;
- }
- } else {
- note_error(&ubjson->errs, "unhandled number tag '%c'", byte);
- }
-
- ubjson->cur.p = start;
- return 0;
-}
-
-static inline int parse_ubjson_len(struct ubjson *ubjson, unsigned int *len)
-{
- return parse_ubjson_sized_len(ubjson, len, NULL);
-}
-
-static int parse_ubjson_string(struct ubjson *ubjson, struct json *val)
-{
- char byte;
- byte = 0;
-
- if (!parse_char(&ubjson->cur, &byte, 'S')) {
- note_error(&ubjson->errs, "expected S tag, got '%c'", byte);
- return 0;
- }
-
- if (!parse_ubjson_len(ubjson, &val->len)) {
- note_error(&ubjson->errs, "size");
- return 0;
- }
-
- val->type = JSON_STRING;
- val->string = (char*)ubjson->cur.p;
-
- ubjson->cur.p += val->len;
-
- return 1;
-}
-
-static inline int is_number_tag(unsigned char tag)
-{
- return tag == 'U' || tag == 'l' || tag == 'I' || tag == 'L' || tag == 'i';
-}
-
-static inline int valid_ubjson_tag(char c)
-{
- return c == 'S' ||
- c == '{' ||
- c == '[' ||
- c == 'T' ||
- c == 'F' ||
- c == 'Z' || is_number_tag(c);
-}
-
-static inline void copy_ubjson(struct ubjson *src, struct ubjson *dst) {
- copy_cursor(&src->cur, &dst->cur);
- copy_errors(&src->errs, &dst->errs);
- dst->data_end = src->data_end;
-}
-
-static int parse_ubjson_object(struct ubjson *ubjson, struct json *val)
-{
- char c;
- c = 0;
-
- val->type = JSON_OBJECT;
- assert(ubjson != &val->container);
- copy_ubjson(ubjson, &val->container);
-
- if (!parse_char(&ubjson->cur, &c, '{')) {
- note_error(&ubjson->errs, "expected '{' tag, got '%c'", c);
- return 0;
- }
-
- if (!parse_ubjson_len(ubjson, &val->len)) {
- /* reset */
- ubjson->cur.p = val->container.cur.p;
- note_error(&ubjson->errs, "object len");
- return 0;
- }
-
- if (ubjson->cur.p + val->len >= ubjson->data_end) {
- /* reset */
- ubjson->cur.p = val->container.cur.p;
- note_error(&ubjson->errs, "invalid len %d", val->len);
- return 0;
- }
-
- ubjson->cur.p += val->len;
-
- return 1;
-}
-
-static int parse_ubjson_number(struct ubjson *ubjson, struct json *val)
-{
- if (!parse_ubjson_sized_len(ubjson, &val->number_int, &val->len)) {
- note_error(&ubjson->errs, "");
- return 0;
- }
-
- val->type = JSON_NUMBER_INT;
- return 1;
-}
-
-static int parse_ubjson_bool(struct ubjson *ubjson, struct json *val)
-{
- unsigned char byte;
- if (!pull_byte(&ubjson->cur, &byte)) {
- note_error(&ubjson->errs, "pull byte oob");
- return 0;
- }
-
- if (byte != 'T' && byte != 'F') {
- note_error(&ubjson->errs, "invalid bool tag: '%c'", byte);
- return 0;
- }
-
- val->type = JSON_BOOL;
- val->len = 0;
- val->boolean = byte == 'T';
- return 1;
-}
-
-int parse_ubjson_value(struct ubjson *ubjson, struct json *val)
-{
- char tag;
- assert(&ubjson->cur != &val->container.cur);
- if (!peek_char(&ubjson->cur, &tag)) {
- note_error(&ubjson->errs, "peek value tag oob");
- return 0;
- }
-
- if (tag == 'Z') {
- val->type = JSON_NULL;
- ubjson->cur.p++;
- return 1;
- } else if (tag == 'S') {
- return parse_ubjson_string(ubjson, val);
- } else if (tag == '{') {
- return parse_ubjson_object(ubjson, val);
- } else if (is_number_tag(tag)) {
- return parse_ubjson_number(ubjson, val);
- } else if (tag == 'F' || tag == 'T') {
- return parse_ubjson_bool(ubjson, val);
- }
-
- note_error(&ubjson->errs, "unhandled type '%c'", tag);
- return 0;
-}
-
-static int ubjson_obj_lookup(struct ubjson *ubjson, const char *path, struct json *val)
-{
- char byte;
- unsigned int len;
- struct json blackhole;
- byte = 0;
- assert(&ubjson->cur != &val->container.cur);
-
- if (!parse_char(&ubjson->cur, &byte, '{')) {
- note_error(&ubjson->errs, "no object tag, got '%c'", byte);
- return 0;
- }
-
- if (!parse_ubjson_len(ubjson, &len)) {
- note_error(&ubjson->errs, "object size");
- return 0;
- }
-
- while (1) {
- if (!peek_char(&ubjson->cur, &byte)) {
- note_error(&ubjson->errs, "oob");
- break;
- }
-
- if (byte == '}') {
- note_error(&ubjson->errs, "not found")
- break;
- }
-
- if (!parse_ubjson_string(ubjson, val)) {
- note_error(&ubjson->errs, "string");
- break;
- }
-
- if (strlen(path) != val->len ||
- memcmp(path, val->string, val->len)) {
- /* skip over value */
- if (!parse_ubjson_value(ubjson, &blackhole)) {
- note_error(&ubjson->errs, "skip value");
- return 0;
- }
-
- continue;
- }
-
- return parse_ubjson_value(ubjson, val);
- }
- note_error(&ubjson->errs, "not found");
- ubjson->cur.p = ubjson->cur.start;
- return 0;
-}
-
-int ubjson_lookup(struct ubjson *ubjson, const char **path, int path_len, struct json *val)
+static void copy_json_handlers(struct json_handlers *src,
+ struct json_handlers *dst)
{
- int i;
- const char *seg;
- char byte;
- struct ubjson next;
-
- byte = 0;
- copy_ubjson(ubjson, &next);
-
- for (i = 0; i < path_len; i++) {
- seg = path[i];
-
- if (!ubjson_obj_lookup(&next, seg, val)) {
- note_error(&ubjson->errs, "lookup path segment: '%s'", seg);
- return 0;
- }
-
- /* not at the last segment and don't have an object or array */
- if (i != path_len-1 && val->type != JSON_OBJECT) {
- note_error(&ubjson->errs,
- "segment '%s' not an object, got '%c'", seg, byte);
- return 0;
- } else if (val->type == JSON_OBJECT) {
- copy_ubjson(&val->container, &next);
- }
- }
-
- return 1;
+ dst->data = src->data;
+ dst->string = src->string;
+ dst->boolean = src->boolean;
+ dst->null = src->null;
+ dst->number = src->number;
+ dst->open = src->open;
+ dst->close = src->close;
}
-void print_value(struct json *val)
+static void init_json_parser(struct json_parser *p, unsigned char *buf,
+ size_t buf_size, struct json_handlers *h)
{
- switch (val->type) {
- case JSON_STRING:
- printf("%.*s", val->len, val->string);
- return;
- case JSON_NULL:
- printf("null");
- return;
- case JSON_NUMBER_INT:
- printf("%d", val->number_int);
- return;
- case JSON_NUMBER:
- printf("%f", val->number);
- return;
- case JSON_BOOL:
- printf("%s", val->boolean? "true" : "false");
- return;
- default:
- printf("implement print %d", val->type);
- }
+ memset(p, 0, sizeof(*p));
+ make_cursor(buf, buf + buf_size, &p->cur);
+ copy_json_handlers(h, &p->handlers);
}
-int parse_json(unsigned char *buf, size_t buf_size, struct ubjson *out)
+int parse_json(unsigned char *buf, size_t buf_size, struct json_parser *p,
+ struct json_handlers *handlers)
{
- int res;
- struct json_parser p;
-
- memset(&p, 0, sizeof(p));
-
- make_cursor(buf, buf + buf_size, &p.cur);
- copy_cursor(&out->cur, &p.ubjson);
-
- res = parse_array_or_object(&p);
-
- out->data_end = p.ubjson.p;
- out->cur.p = out->cur.start;
-
- return res;
+ init_json_parser(p, buf, buf_size, handlers);
+ return parse_array_or_object(p);
}
-
diff --git a/src/json.h b/src/json.h
@@ -6,38 +6,23 @@
#include "errors.h"
#include <stddef.h>
-struct ubjson {
- struct cursor cur;
- unsigned char *data_end;
- struct errors errs;
-};
-
-enum json_value_type {
- JSON_NUMBER,
- JSON_NUMBER_INT,
- JSON_STRING,
- JSON_NULL,
- JSON_BOOL,
- JSON_OBJECT,
- JSON_ARRAY,
+struct json_handlers {
+ void *data;
+ int (*string)(struct json_handlers *data, unsigned char *text, int size);
+ int (*boolean)(struct json_handlers *data, int val);
+ int (*null)(struct json_handlers *data);
+ int (*number)(struct json_handlers *data, unsigned int number);
+ int (*open)(struct json_handlers *data, char type, unsigned int **len);
+ int (*close)(struct json_handlers *data, char type, unsigned int *len);
};
-struct json {
- enum json_value_type type;
- unsigned int len;
- union {
- struct ubjson container;
- double number;
- unsigned int number_int;
- const char *string;
- int boolean;
- };
+struct json_parser {
+ struct cursor cur;
+ struct errors errs;
+ struct json_handlers handlers;
};
-void init_ubjson(struct ubjson *ubjson, unsigned char *buf, size_t bufsize);
-int print_ubjson(struct ubjson *json);
-int parse_json(unsigned char *buf, size_t buf_size, struct ubjson *out);
-int ubjson_lookup(struct ubjson *ubjson, const char **path, int path_len, struct json *val);
-void print_value(struct json *val);
+void init_json_handlers(struct json_handlers *h);
+int parse_json(unsigned char *buf, size_t buf_size, struct json_parser *, struct json_handlers *);
#endif
diff --git a/src/test_json.c b/src/test_json.c
@@ -1,5 +1,6 @@
#include "json.h"
+#include "ubjson.h"
#include "cursor.h"
#include <assert.h>
@@ -49,11 +50,11 @@ int main(int argc, char *argv[])
init_ubjson(&ubjson, out, outlen);
unsigned char empty[] = "{}";
- assert(parse_json(empty, sizeof(empty), &ubjson));
+ assert(parse_ubjson(empty, sizeof(empty), &ubjson));
ubjson.cur.p = ubjson.cur.start;
unsigned char kv[] = "{ \"key\" : {\"obj\":\"123\"} , \"otherkey\": \"stringy\"}";
- assert(parse_json(kv, sizeof(kv), &ubjson));
+ assert(parse_ubjson(kv, sizeof(kv), &ubjson));
const char *path[] = {"key", "obj"};
assert(ubjson_lookup(&ubjson, path, 2, &val));
@@ -65,7 +66,7 @@ int main(int argc, char *argv[])
ubjson.cur.p = ubjson.cur.start;
assert(map_file("corpus/math.json", &p, &flen));
assert(ubjson.errs.record);
- parse_json(p, flen, &ubjson);
+ assert(parse_ubjson(p, flen, &ubjson));
printf("\n---\n");
write_data(ubjson.cur.start, ubjson.data_end - ubjson.cur.start);
diff --git a/src/ubjson.c b/src/ubjson.c
@@ -0,0 +1,438 @@
+
+#include "ubjson.h"
+#include "json.h"
+#include "parse.h"
+#include <assert.h>
+
+static inline int push_ubjson_null(struct cursor *ubjson)
+{
+ return push_byte(ubjson, 'Z');
+}
+
+static inline int is_number_tag(unsigned char tag)
+{
+ return tag == 'U' || tag == 'l' || tag == 'I' || tag == 'L' || tag == 'i';
+}
+
+static int parse_ubjson_sized_len(struct ubjson *ubjson, unsigned int *len, unsigned int *size)
+{
+ unsigned char byte;
+ unsigned short u16;
+ unsigned char *start;
+
+ start = ubjson->cur.p;
+
+ if (!pull_byte(&ubjson->cur, &byte)) {
+ note_error(&ubjson->errs, "oob");
+ return 0;
+ } else if (byte == 'U' || byte == 'i') {
+ if (!pull_byte(&ubjson->cur, &byte)) {
+ note_error(&ubjson->errs, "pull byte after u8 int");
+ } else {
+ if (size)
+ *size = 1;
+ *len = byte;
+ return 1;
+ }
+ } else if (byte == 'I') {
+ if (!pull_data(&ubjson->cur, (unsigned char*)&u16, sizeof(u16))) {
+ note_error(&ubjson->errs, "pull byte after u16 int");
+ } else {
+ if (size)
+ *size = 2;
+ *len = u16;
+ return 1;
+ }
+ } else if (byte == 'l' || byte == 'L') {
+ if (!pull_int(&ubjson->cur, (int*)len)) {
+ note_error(&ubjson->errs, "pull byte after u16 int");
+ } else {
+ if (size)
+ *size = 4;
+ return 1;
+ }
+ } else {
+ note_error(&ubjson->errs, "unhandled number tag '%c'", byte);
+ }
+
+ ubjson->cur.p = start;
+ return 0;
+}
+
+static inline int parse_ubjson_len(struct ubjson *ubjson, unsigned int *len)
+{
+ return parse_ubjson_sized_len(ubjson, len, NULL);
+}
+
+static int parse_ubjson_string(struct ubjson *ubjson, struct json *val)
+{
+ char byte;
+ byte = 0;
+
+ if (!parse_char(&ubjson->cur, &byte, 'S')) {
+ note_error(&ubjson->errs, "expected S tag, got '%c'", byte);
+ return 0;
+ }
+
+ if (!parse_ubjson_len(ubjson, &val->len)) {
+ note_error(&ubjson->errs, "size");
+ return 0;
+ }
+
+ val->type = JSON_STRING;
+ val->string = (char*)ubjson->cur.p;
+
+ ubjson->cur.p += val->len;
+
+ return 1;
+}
+
+static inline int push_ubjson_close(struct cursor *ubjson, unsigned int *len, char end)
+{
+ *len = ubjson->p + 1 - ((unsigned char*)len)-4;
+ return push_byte(ubjson, end);
+}
+
+static int push_ubjson_open(struct cursor *ubjson, char typ, unsigned int **len)
+{
+ if (!push_byte(ubjson, typ))
+ return 0;
+
+ if (!push_byte(ubjson, 'l'))
+ return 0;
+
+ /* fill this in later */
+ *len = (unsigned int *)ubjson->p;
+
+ if (!push_int(ubjson, 0))
+ return 0;
+
+ return 1;
+}
+
+static int push_ubjson_num(struct cursor *ubjson, unsigned int len)
+{
+ if (len <= 0xFF) {
+ if (!push_byte(ubjson, 'U')) {
+ return 0;
+ }
+ if (!push_byte(ubjson, (unsigned char)len)) {
+ return 0;
+ }
+ } else if (len <= 0xFFFF) {
+ if (!push_byte(ubjson, 'I')) {
+ return 0;
+ }
+ /* TODO: big-endian push */
+ if (!push_u16(ubjson, (unsigned int)len)) {
+ return 0;
+ }
+ } else {
+ if (!push_byte(ubjson, 'l')) {
+ return 0;
+ }
+ /* TODO: big-endian push */
+ if (!push_int(ubjson, len)) {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static int push_ubjson_str(struct cursor *ubjson, unsigned char *text,
+ unsigned int size)
+{
+
+ if (!push_byte(ubjson, 'S')) {
+ return 0;
+ }
+
+ if (!push_ubjson_num(ubjson, size)) {
+ return 0;
+ }
+
+ /* TODO push string without escapes */
+ if (!push_data(ubjson, text, size)) {
+ return 0;
+ }
+
+ return 1;
+}
+
+static inline int valid_ubjson_tag(char c)
+{
+ return c == 'S' ||
+ c == '{' ||
+ c == '[' ||
+ c == 'T' ||
+ c == 'F' ||
+ c == 'Z' || is_number_tag(c);
+}
+
+static inline void copy_ubjson(struct ubjson *src, struct ubjson *dst) {
+ copy_cursor(&src->cur, &dst->cur);
+ copy_errors(&src->errs, &dst->errs);
+ dst->data_end = src->data_end;
+}
+
+static int parse_ubjson_object(struct ubjson *ubjson, struct json *val)
+{
+ char c;
+ c = 0;
+
+ val->type = JSON_OBJECT;
+ assert(ubjson != &val->container);
+ copy_ubjson(ubjson, &val->container);
+
+ if (!parse_char(&ubjson->cur, &c, '{')) {
+ note_error(&ubjson->errs, "expected '{' tag, got '%c'", c);
+ return 0;
+ }
+
+ if (!parse_ubjson_len(ubjson, &val->len)) {
+ /* reset */
+ ubjson->cur.p = val->container.cur.p;
+ note_error(&ubjson->errs, "object len");
+ return 0;
+ }
+
+ if (ubjson->cur.p + val->len >= ubjson->data_end) {
+ /* reset */
+ ubjson->cur.p = val->container.cur.p;
+ note_error(&ubjson->errs, "invalid len %d", val->len);
+ return 0;
+ }
+
+ ubjson->cur.p += val->len;
+
+ return 1;
+}
+
+static int parse_ubjson_number(struct ubjson *ubjson, struct json *val)
+{
+ if (!parse_ubjson_sized_len(ubjson, &val->number_int, &val->len)) {
+ note_error(&ubjson->errs, "");
+ return 0;
+ }
+
+ val->type = JSON_NUMBER_INT;
+ return 1;
+}
+
+static int parse_ubjson_bool(struct ubjson *ubjson, struct json *val)
+{
+ unsigned char byte;
+ if (!pull_byte(&ubjson->cur, &byte)) {
+ note_error(&ubjson->errs, "pull byte oob");
+ return 0;
+ }
+
+ if (byte != 'T' && byte != 'F') {
+ note_error(&ubjson->errs, "invalid bool tag: '%c'", byte);
+ return 0;
+ }
+
+ val->type = JSON_BOOL;
+ val->len = 0;
+ val->boolean = byte == 'T';
+ return 1;
+}
+
+int parse_ubjson_value(struct ubjson *ubjson, struct json *val)
+{
+ char tag;
+ assert(&ubjson->cur != &val->container.cur);
+ if (!peek_char(&ubjson->cur, &tag)) {
+ note_error(&ubjson->errs, "peek value tag oob");
+ return 0;
+ }
+
+ if (tag == 'Z') {
+ val->type = JSON_NULL;
+ ubjson->cur.p++;
+ return 1;
+ } else if (tag == 'S') {
+ return parse_ubjson_string(ubjson, val);
+ } else if (tag == '{') {
+ return parse_ubjson_object(ubjson, val);
+ } else if (is_number_tag(tag)) {
+ return parse_ubjson_number(ubjson, val);
+ } else if (tag == 'F' || tag == 'T') {
+ return parse_ubjson_bool(ubjson, val);
+ }
+
+ note_error(&ubjson->errs, "unhandled type '%c'", tag);
+ return 0;
+}
+
+static int ubjson_obj_lookup(struct ubjson *ubjson, const char *path, struct json *val)
+{
+ char byte;
+ unsigned int len;
+ struct json blackhole;
+ byte = 0;
+ assert(&ubjson->cur != &val->container.cur);
+
+ if (!parse_char(&ubjson->cur, &byte, '{')) {
+ note_error(&ubjson->errs, "no object tag, got '%c'", byte);
+ return 0;
+ }
+
+ if (!parse_ubjson_len(ubjson, &len)) {
+ note_error(&ubjson->errs, "object size");
+ return 0;
+ }
+
+ while (1) {
+ if (!peek_char(&ubjson->cur, &byte)) {
+ note_error(&ubjson->errs, "oob");
+ break;
+ }
+
+ if (byte == '}') {
+ note_error(&ubjson->errs, "not found")
+ break;
+ }
+
+ if (!parse_ubjson_string(ubjson, val)) {
+ note_error(&ubjson->errs, "string");
+ break;
+ }
+
+ if (strlen(path) != val->len ||
+ memcmp(path, val->string, val->len)) {
+ /* skip over value */
+ if (!parse_ubjson_value(ubjson, &blackhole)) {
+ note_error(&ubjson->errs, "skip value");
+ return 0;
+ }
+
+ continue;
+ }
+
+ return parse_ubjson_value(ubjson, val);
+ }
+
+ note_error(&ubjson->errs, "not found");
+ ubjson->cur.p = ubjson->cur.start;
+ return 0;
+}
+
+int ubjson_lookup(struct ubjson *ubjson, const char **path, int path_len, struct json *val)
+{
+ int i;
+ const char *seg;
+ char byte;
+ struct ubjson next;
+
+ byte = 0;
+ copy_ubjson(ubjson, &next);
+
+ for (i = 0; i < path_len; i++) {
+ seg = path[i];
+
+ if (!ubjson_obj_lookup(&next, seg, val)) {
+ note_error(&ubjson->errs, "lookup path segment: '%s'", seg);
+ return 0;
+ }
+
+ /* not at the last segment and don't have an object or array */
+ if (i != path_len-1 && val->type != JSON_OBJECT) {
+ note_error(&ubjson->errs,
+ "segment '%s' not an object, got '%c'", seg, byte);
+ return 0;
+ } else if (val->type == JSON_OBJECT) {
+ copy_ubjson(&val->container, &next);
+ }
+ }
+
+ return 1;
+}
+
+
+static int handle_ubjson_str(struct json_handlers *h, unsigned char *text, int size)
+{
+ return push_ubjson_str((struct cursor*)h->data, text, size);
+}
+
+static int handle_ubjson_bool(struct json_handlers *h, int val)
+{
+ return push_byte((struct cursor*)h->data, val ? 'T' : 'F');
+}
+
+static int handle_ubjson_null(struct json_handlers *h)
+{
+ return push_ubjson_null((struct cursor*)h->data);
+}
+
+static int handle_ubjson_number(struct json_handlers *h, unsigned int num)
+{
+ return push_ubjson_num((struct cursor*)h->data, num);
+}
+
+static int handle_ubjson_open(struct json_handlers *h, char type, unsigned int **len)
+{
+ return push_ubjson_open((struct cursor*)h->data, type, len);
+}
+
+static int handle_ubjson_close(struct json_handlers *h, char type, unsigned int *len)
+{
+ return push_ubjson_close((struct cursor*)h->data, len, type);
+}
+
+static void make_ubjson_handlers(struct json_handlers *h, struct cursor *ubjson)
+{
+ h->data = (void*)ubjson;
+
+ h->string = handle_ubjson_str;
+ h->boolean = handle_ubjson_bool;
+ h->null = handle_ubjson_null;
+ h->number = handle_ubjson_number;
+ h->open = handle_ubjson_open;
+ h->close = handle_ubjson_close;
+}
+
+int parse_ubjson(unsigned char *buf, size_t buf_size, struct ubjson *out)
+{
+ struct json_parser p;
+ struct json_handlers h;
+ int ok;
+
+ make_ubjson_handlers(&h, &out->cur);
+
+ ok = parse_json(buf, buf_size, &p, &h);
+
+ out->data_end = out->cur.p;
+ out->cur.p = out->cur.start;
+
+ return ok;
+}
+
+void init_ubjson(struct ubjson *ubjson, unsigned char *buf, size_t bufsize)
+{
+ make_cursor(buf, buf + bufsize, &ubjson->cur);
+ init_errors(&ubjson->errs);
+}
+
+void print_value(struct json *val)
+{
+ switch (val->type) {
+ case JSON_STRING:
+ printf("%.*s", val->len, val->string);
+ return;
+ case JSON_NULL:
+ printf("null");
+ return;
+ case JSON_NUMBER_INT:
+ printf("%d", val->number_int);
+ return;
+ case JSON_NUMBER:
+ printf("%f", val->number);
+ return;
+ case JSON_BOOL:
+ printf("%s", val->boolean? "true" : "false");
+ return;
+ default:
+ printf("implement print %d", val->type);
+ }
+}
diff --git a/src/ubjson.h b/src/ubjson.h
@@ -0,0 +1,42 @@
+
+#ifndef WOLFSOCKS_UBJSON
+#define WOLFSOCKS_UBJSON
+
+#include "cursor.h"
+#include "errors.h"
+
+struct ubjson {
+ struct cursor cur;
+ unsigned char *data_end;
+ struct errors errs;
+};
+
+enum json_value_type {
+ JSON_NUMBER,
+ JSON_NUMBER_INT,
+ JSON_STRING,
+ JSON_NULL,
+ JSON_BOOL,
+ JSON_OBJECT,
+ JSON_ARRAY,
+};
+
+struct json {
+ enum json_value_type type;
+ unsigned int len;
+ union {
+ struct ubjson container;
+ double number;
+ unsigned int number_int;
+ const char *string;
+ int boolean;
+ };
+};
+
+void init_ubjson(struct ubjson *ubjson, unsigned char *buf, size_t bufsize);
+int print_ubjson(struct ubjson *json);
+int parse_ubjson(unsigned char *buf, size_t buf_size, struct ubjson *out);
+int ubjson_lookup(struct ubjson *ubjson, const char **path, int path_len, struct json *val);
+void print_value(struct json *val);
+
+#endif /* WOLFSOCKS_UBJSON */