commit 1402b081bb2e991d5a12ad41ee8464009cfa2477
parent dec699b16d793d2c71f9118ae304e0fe64ff99a4
Author: William Casarin <jb55@jb55.com>
Date: Mon, 7 Dec 2020 15:36:57 -0800
ubjson traversal working
Diffstat:
M | src/json.c | | | 298 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- |
M | src/json.h | | | 26 | ++++++++++++++++++++++++-- |
M | src/test_json.c | | | 20 | +++++++++++++++----- |
3 files changed, 330 insertions(+), 14 deletions(-)
diff --git a/src/json.c b/src/json.c
@@ -10,8 +10,15 @@
struct json_parser {
struct cursor cur;
struct cursor ubjson;
+ int record_errors;
};
+/*
+static void note_error_(struct json_parser *p, ...)
+{
+}
+*/
+
static void consume_whitespace(struct cursor *cur)
{
for (; cur->p < cur->end; cur->p++) {
@@ -116,7 +123,7 @@ static int push_ubjson_len(struct cursor *ubjson, unsigned int len)
return 1;
}
-static int push_ubjson_str(struct cursor *ubjson, unsigned char *text,
+static int push_ubjson_str(struct cursor *ubjson, unsigned char *text,
unsigned int size)
{
@@ -208,9 +215,16 @@ static int parse_kv(struct json_parser *p)
return parse_value(p);
}
+static inline int finalize_object(struct json_parser *p, unsigned int *len)
+{
+ *len = p->ubjson.p + 1 - ((unsigned char*)len)-4;
+ return push_byte(&p->ubjson, '}');
+}
+
static int parse_object(struct json_parser *p)
{
char c;
+ unsigned int *len;
c = 0;
if (!parse_char(&p->cur, &c, '{')) {
@@ -221,6 +235,15 @@ static int parse_object(struct json_parser *p)
if (!push_byte(&p->ubjson, '{'))
return 0;
+ if (!push_byte(&p->ubjson, 'l'))
+ return 0;
+
+ /* fill this in later */
+ len = (unsigned int *)p->ubjson.p;
+
+ if (!push_int(&p->ubjson, 0))
+ return 0;
+
while(1) {
consume_whitespace(&p->cur);
@@ -231,7 +254,7 @@ static int parse_object(struct json_parser *p)
if (c == '}') {
/* TODO (generalize): finalize_object */
- return push_byte(&p->ubjson, '}');
+ return finalize_object(p, len);
}
if (c != '"') {
@@ -256,7 +279,7 @@ static int parse_object(struct json_parser *p)
if (c == '}') {
/* TODO (generalize): finalize_object */
- return push_byte(&p->ubjson, '}');
+ return finalize_object(p, len);
}
note_error(p, "expected ',' or '}', got '%c'", c);
@@ -311,13 +334,22 @@ static int parse_value(struct json_parser *p)
consume_whitespace(&p->cur);
- res = parse_string(p) ||
- parse_number(p) ||
+ p->record_errors = 0;
+
+ res = parse_string(p) ||
parse_object(p) ||
+ parse_number(p) ||
parse_array(p) ||
parse_bool(p) ||
parse_null(p);
+ p->record_errors = 1;
+
+ if (!res) {
+ note_error(p, "couldn't parse json value");
+ return 0;
+ }
+
consume_whitespace(&p->cur);
return res;
@@ -345,6 +377,255 @@ static int parse_array_or_object(struct json_parser *p)
return 0;
}
+static int parse_ubjson_size(struct ubjson *ubjson, unsigned int *len)
+{
+ unsigned char byte;
+ unsigned short u16;
+ unsigned char *start;
+
+ start = ubjson->cur.p;
+
+ if (!pull_byte(&ubjson->cur, &byte)) {
+ note_error(ubjson->errors, "oob");
+ } else if (byte == 'U' || byte == 'i') {
+ if (!pull_byte(&ubjson->cur, &byte)) {
+ note_error(ubjson->errors, "pull byte after u8 int");
+ } else {
+ *len = byte;
+ return 1;
+ }
+ } else if (byte == 'I') {
+ if (!pull_data(&ubjson->cur, (unsigned char*)&u16, sizeof(u16))) {
+ note_error(ubjson->errors, "pull byte after u16 int");
+ } else {
+ *len = u16;
+ return 1;
+ }
+ } else if (byte == 'l' || byte == 'L') {
+ if (!pull_int(&ubjson->cur, (int*)len)) {
+ note_error(ubjson->errors, "pull byte after u16 int");
+ } else {
+ return 1;
+ }
+ } else {
+ note_error(ubjson->errors, "unhandled number tag '%c'", byte);
+ }
+
+ ubjson->cur.p = start;
+ return 0;
+}
+
+static int parse_ubjson_string(struct ubjson *ubjson, struct json *val)
+{
+ char byte;
+ byte = 0;
+
+ if (!parse_char(&ubjson->cur, &byte, 'S')) {
+ note_error(ubjson->errors, "expected S tag, got '%c'", byte);
+ return 0;
+ }
+
+ if (!parse_ubjson_size(ubjson, &val->len)) {
+ note_error(ubjson->errors, "size");
+ return 0;
+ }
+
+ val->type = JSON_STRING;
+ val->string = (char*)ubjson->cur.p;
+ return 1;
+}
+
+static inline int valid_ubjson_tag(char c)
+{
+ return c == 'S' ||
+ c == '{' ||
+ c == '[' ||
+ c == 'Z';
+}
+
+static int consume_ubjson_value(struct ubjson *u)
+{
+ unsigned char c;
+ unsigned int len;
+
+ c = 0;
+
+ if (!pull_byte(&u->cur, &c)) {
+ note_error(u->errors, "oob");
+ return 0;
+ }
+ if (!valid_ubjson_tag(c)) {
+ note_error(u->errors, "invalid value tag '%c'", c);
+ return 0;
+ }
+
+ if (c == 'Z') {
+ len = 0;
+ } else if (!parse_ubjson_size(u, &len)) {
+ note_error(u->errors, "value size for tag '%c'", c);
+ return 0;
+ }
+
+ if (u->cur.p + len > u->cur.end) {
+ note_error(u->errors, "value size oob");
+ return 0;
+ }
+
+ u->cur.p += len;
+ return 1;
+}
+
+static inline void copy_ubjson(struct ubjson *src, struct ubjson *dst) {
+ copy_cursor(&src->cur, &dst->cur);
+ dst->data_end = src->data_end;
+}
+
+static int parse_ubjson_object(struct ubjson *ubjson, struct json *val)
+{
+ char c;
+ c = 0;
+
+ val->type = JSON_OBJECT;
+ copy_ubjson(ubjson, &val->container);
+
+ if (!parse_char(&ubjson->cur, &c, '{')) {
+ note_error(ubjson->errors, "expected '{' tag, got '%c'", c);
+ return 0;
+ }
+
+ if (!parse_ubjson_size(ubjson, &val->len)) {
+ /* reset */
+ ubjson->cur.p = val->container.cur.p;
+ note_error(ubjson->errors, "object len");
+ return 0;
+ }
+
+ if (ubjson->cur.p + val->len >= ubjson->data_end) {
+ /* reset */
+ ubjson->cur.p = val->container.cur.p;
+ note_error(ubjson->errors, "invalid len %d", val->len);
+ return 0;
+ }
+
+ ubjson->cur.p += val->len;
+
+ return 1;
+}
+
+int parse_ubjson_value(struct ubjson *ubjson, struct json *val)
+{
+ char tag;
+ if (!peek_char(&ubjson->cur, &tag)) {
+ note_error(ubjson->errors, "peek value tag oob");
+ return 0;
+ }
+
+ if (tag == 'Z') {
+ val->type = JSON_NULL;
+ ubjson->cur.p++;
+ return 1;
+ } else if (tag == 'S') {
+ return parse_ubjson_string(ubjson, val);
+ } else if (tag == '{') {
+ return parse_ubjson_object(ubjson, val);
+ }
+
+ note_error(ubjson->errors, "unhandled type '%c'", tag);
+ return 0;
+}
+
+static int ubjson_obj_lookup(struct ubjson *ubjson, const char *path, struct json *val)
+{
+ char byte;
+ unsigned int len;
+ byte = 0;
+
+ if (!parse_char(&ubjson->cur, &byte, '{')) {
+ note_error(ubjson->errors, "no object tag, got '%c'", byte);
+ return 0;
+ }
+
+ if (!parse_ubjson_size(ubjson, &len)) {
+ note_error(ubjson->errors, "object size");
+ return 0;
+ }
+
+ while (1) {
+ if (!peek_char(&ubjson->cur, &byte)) {
+ note_error(ubjson->errors, "oob");
+ break;
+ }
+
+ if (byte == '}') {
+ break;
+ }
+
+ if (!parse_ubjson_string(ubjson, val)) {
+ note_error(ubjson->errors, "string");
+ break;
+ }
+
+ ubjson->cur.p += val->len;
+
+ if (strlen(path) != val->len ||
+ memcmp(path, val->string, val->len)) {
+ /* skip over value */
+ if (!consume_ubjson_value(ubjson)) {
+ note_error(p, "skip value");
+ return 0;
+ }
+ continue;
+ }
+
+ return parse_ubjson_value(ubjson, val);
+ }
+
+ ubjson->cur.p = ubjson->cur.start;
+ return 0;
+}
+
+int read_ubjson_object(struct ubjson *ubjson, const char **path, int path_len, struct json *val)
+{
+ int i;
+ const char *seg;
+ char byte;
+ struct ubjson *next;
+
+ byte = 0;
+ next = ubjson;
+
+ for (i = 0; i < path_len; i++) {
+ seg = path[i];
+
+ if (!ubjson_obj_lookup(next, seg, val)) {
+ note_error(ubjson->errors, "lookup path segment: '%s'", seg);
+ return 0;
+ }
+
+ /* not at the last segment and don't have an object or array */
+ if (i != path_len-1 && val->type != JSON_OBJECT) {
+ note_error(ubjson->errors,
+ "segment '%s' not an object, got '%c'", seg, byte);
+ return 0;
+ } else if (val->type == JSON_OBJECT) {
+ next = &val->container;
+ }
+ }
+
+ return 1;
+}
+
+void print_value(struct json *val)
+{
+ switch (val->type) {
+ case JSON_STRING:
+ printf("%.*s\n", val->len, val->string);
+ return;
+ default:
+ printf("implement print %d", val->type);
+ }
+}
+
int parse_json(unsigned char *buf, size_t buf_size, struct ubjson *out)
{
int res;
@@ -353,10 +634,13 @@ int parse_json(unsigned char *buf, size_t buf_size, struct ubjson *out)
memset(&p, 0, sizeof(p));
make_cursor(buf, buf + buf_size, &p.cur);
- make_cursor(out->data, out->data + out->data_size, &p.ubjson);
+ copy_cursor(&out->cur, &p.ubjson);
res = parse_array_or_object(&p);
- out->data_size = p.ubjson.p - p.ubjson.start;
+
+ out->data_end = p.ubjson.p;
+ out->cur.p = out->cur.start;
+
return res;
}
diff --git a/src/json.h b/src/json.h
@@ -2,15 +2,37 @@
#ifndef WOLFSOCKS_JSON
#define WOLFSOCKS_JSON
+#include "cursor.h"
#include <stddef.h>
struct ubjson {
- unsigned char *data;
- size_t data_size;
+ struct cursor cur;
+ unsigned char *data_end;
};
+enum json_value_type {
+ JSON_NUMBER,
+ JSON_STRING,
+ JSON_NULL,
+ JSON_BOOL,
+ JSON_OBJECT,
+ JSON_ARRAY,
+};
+
+struct json {
+ enum json_value_type type;
+ unsigned int len;
+ union {
+ struct ubjson container;
+ double number;
+ const char *string;
+ int boolean;
+ };
+};
int print_ubjson(struct ubjson *json);
int parse_json(unsigned char *buf, size_t buf_size, struct ubjson *out);
+int read_ubjson_object(struct ubjson *ubjson, const char **path, int path_len, struct json *val);
+void print_value(struct json *val);
#endif
diff --git a/src/test_json.c b/src/test_json.c
@@ -1,5 +1,6 @@
#include "json.h"
+#include "cursor.h"
#include <assert.h>
#include <stdio.h>
@@ -17,9 +18,9 @@ int main(int argc, char *argv[])
{
static unsigned char out[1024] = {0};
struct ubjson ubjson;
+ struct json val;
- ubjson.data = out;
- ubjson.data_size = sizeof(out);
+ make_cursor(out, out + sizeof(out), &ubjson.cur);
//unsigned char bad[] = "{a}";
//assert(!parse_json(bad, sizeof(bad), &ubjson));
@@ -27,10 +28,19 @@ int main(int argc, char *argv[])
unsigned char empty[] = "{}";
assert(parse_json(empty, sizeof(empty), &ubjson));
- ubjson.data_size = sizeof(out);
+ ubjson.cur.p = ubjson.cur.start;
- unsigned char kv[] = "{ \"key\" : null , \"key2\": \"stringy\"}";
+ unsigned char kv[] = "{ \"key\" : {\"a\": \"b\"} , \"otherkey\": \"stringy\"}";
assert(parse_json(kv, sizeof(kv), &ubjson));
- write_data(ubjson.data, ubjson.data_size);
+ printf("\n");
+ const char *path[] = {"key", "a"};
+ assert(read_ubjson_object(&ubjson, path, 2, &val));
+
+ printf("found val: ");
+ print_value(&val);
+ printf("\n");
+
+ write_data(ubjson.cur.start, ubjson.data_end - ubjson.cur.start);
+
}