commit 57f13bbc561517eaec1e7b48e1e38c4abb470306
parent f29b59ae633b2fec66409f9cc2f6fcb753ecd14b
Author: William Casarin <jb55@jb55.com>
Date: Sun, 6 Dec 2020 16:28:15 -0800
json progress
Diffstat:
9 files changed, 345 insertions(+), 157 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,5 @@
/wolfsocks
*.o
/tags
+/test_out.ubjson
+src/test_json
diff --git a/Makefile b/Makefile
@@ -5,13 +5,17 @@ OBJS = src/http.o src/base64.o src/inbox.o src/json.o
HEADERS = $(wildcard src/*.h)
-DEPS = src/wolfsocks.c $(OBJS)
+wolfsocks: src/wolfsocks.c $(OBJS) $(HEADERS)
+ $(CC) $(CFLAGS) $< $(OBJS) $(LDFLAGS) -o $@
-wolfsocks: $(DEPS) $(HEADERS)
- $(CC) $(CFLAGS) $(DEPS) $(LDFLAGS) -o $@
+src/test_json: src/test_json.c $(OBJS) $(HEADERS)
+ $(CC) $(CFLAGS) $< $(OBJS) $(LDFLAGS) -o $@
+
+check: src/test_json
+ ./src/test_json
clean: fake
- rm -f $(OBJS) wolfsocks
+ rm -f $(OBJS) wolfsocks src/test_json
tags: fake
ctags src/*.c src/*.h
diff --git a/src/cursor.c b/src/cursor.c
@@ -1,137 +0,0 @@
-
-#include "cursor.h"
-
-#include <stdio.h>
-#include <string.h>
-
-void copy_cursor(struct cursor *src, struct cursor *dest)
-{
- dest->start = src->start;
- dest->p = src->p;
- dest->end = src->end;
-}
-
-void make_cursor(unsigned char *start, unsigned char *end, struct cursor *cursor)
-{
- cursor->start = start;
- cursor->p = start;
- cursor->end = end;
-}
-
-int cursor_index(struct cursor *cursor, int elem_size)
-{
- return (cursor->p - cursor->start) / elem_size;
-}
-
-
-int pull_byte(struct cursor *cursor, unsigned char *c)
-{
- if (cursor->p + 1 > cursor->end)
- return 0;
-
- *c = *cursor->p;
- cursor->p++;
-
- return 1;
-}
-
-
-int push_byte(struct cursor *cursor, unsigned char c)
-{
- if (cursor->p + 1 >= cursor->end) {
- return 0;
- }
-
- *cursor->p = c;
- cursor->p++;
-
- return 1;
-}
-
-int pull_data_into_cursor(struct cursor *cursor,
- struct cursor *dest,
- unsigned char **data,
- int len)
-{
- int ok;
-
- if (dest->p + len >= dest->end) {
- printf("not enough room in dest buffer\n");
- return 0;
- }
-
- ok = pull_data(cursor, dest->p, len);
- if (!ok) return 0;
-
- *data = dest->p;
- dest->p += len;
-
- return 1;
-}
-
-int pull_data(struct cursor *cursor, unsigned char *data, int len)
-{
- if (cursor->p + len >= cursor->end) {
- return 0;
- }
-
- memcpy(data, cursor->p, len);
- cursor->p += len;
-
- return 1;
-}
-
-int push_data(struct cursor *cursor, unsigned char *data, int len)
-{
- if (cursor->p + len > cursor->end) {
- printf("push_data oob\n");
- return 0;
- }
-
- memcpy(cursor->p, data, len);
- cursor->p += len;
-
- return 1;
-}
-
-int push_int(struct cursor *cursor, int i)
-{
- return push_data(cursor, (unsigned char*)&i, sizeof(i));
-}
-
-int pull_int(struct cursor *cursor, int *i)
-{
- return pull_data(cursor, (unsigned char*)i, sizeof(*i));
-}
-
-int push_u16(struct cursor *cursor, unsigned short i)
-{
- return push_data(cursor, (unsigned char*)&i, sizeof(i));
-}
-
-void *index_cursor(struct cursor *cursor, unsigned short index, int elem_size)
-{
- unsigned char *p;
- p = &cursor->start[elem_size * index];
-
- if (p > cursor->end)
- return NULL;
-
- return (void*)p;
-}
-
-
-int push_sized_str(struct cursor *cursor, const char *str, int len)
-{
- return push_data(cursor, (unsigned char*)str, len);
-}
-
-int push_str(struct cursor *cursor, const char *str)
-{
- return push_data(cursor, (unsigned char*)str, strlen(str));
-}
-
-int cursor_remaining_capacity(struct cursor *cursor)
-{
- return cursor->end - cursor->p;
-}
diff --git a/src/cursor.h b/src/cursor.h
@@ -63,7 +63,7 @@ static inline int cursor_index(struct cursor *cursor, int elem_size)
static inline int pull_byte(struct cursor *cursor, unsigned char *c)
{
- if (unlikely(cursor->p + 1 > cursor->end))
+ if (unlikely(cursor->p + 1 >= cursor->end))
return 0;
*c = *cursor->p;
@@ -75,7 +75,7 @@ static inline int pull_byte(struct cursor *cursor, unsigned char *c)
static inline int push_byte(struct cursor *cursor, unsigned char c)
{
- if (unlikely(cursor->p + 1 > cursor->end)) {
+ if (unlikely(cursor->p + 1 >= cursor->end)) {
return 0;
}
diff --git a/src/http.c b/src/http.c
@@ -1,5 +1,7 @@
#include "http.h"
#include "util.h"
+#include "parse.h"
+
#include <stdio.h>
#include <assert.h>
@@ -26,12 +28,6 @@ static int parse_segment_with(struct parser *p, char **seg, char delim)
return 1;
}
-static int consume_char(struct cursor *cur, char match)
-{
- char c;
- return pull_byte(cur, (unsigned char*)&c) && c == match;
-}
-
static int parse_segment(struct parser *p, char **seg)
{
return parse_segment_with(p, seg, ' ');
diff --git a/src/json.c b/src/json.c
@@ -1,7 +1,11 @@
#include "cursor.h"
+#include "json.h"
+#include "parse.h"
+
#include <ctype.h>
+#define note_error(p, msg, ...) fprintf(stderr, "%s: " msg "\n", __FUNCTION__, ##__VA_ARGS__);
struct json_parser {
struct cursor cur;
@@ -16,16 +20,192 @@ static void consume_whitespace(struct cursor *cur)
}
}
+static int pull_utf8_cont(struct cursor *cur, unsigned char *c)
+{
+ if (!pull_byte(cur, c)) {
+ return 0;
+ }
+
+ if ((*c & 0xC0) != 0x80) {
+ return 0;
+ }
+
+ *c = *c & 0x3F;
+
+ return 1;
+}
+
+static int parse_utf8_char(struct cursor *cur, unsigned int *chr)
+{
+ unsigned char c[4];
+
+ if (!pull_byte(cur, &c[0])) {
+ return 0;
+ }
+
+ if ((c[0] & 0x80) == 0) {
+ *chr = c[0];
+ return 1;
+ }
+
+ if ((c[0] & 0xE0) == 0xC0) {
+ if (!pull_utf8_cont(cur, &c[1])) {
+ return 0;
+ }
+ if (c[1] >= 0) {
+ *chr = ((c[0] & 0x1F) << 6) | c[1];
+ if (*chr >= 128) {
+ return 1;
+ }
+ }
+ } else if ((c[0] & 0xF0) == 0xE0) {
+ if (!(pull_utf8_cont(cur, &c[1]) &&
+ pull_utf8_cont(cur, &c[2]))) {
+ return 0;
+ }
+ if ((c[1] | c[2]) >= 0) {
+ *chr = ((c[0] & 0x0F) << 12) | (c[1] << 6) | c[2];
+ if (*chr >= 2048 && (*chr < 55296 || *chr > 57343)) {
+ return 1;
+ }
+ }
+ } else if ((c[0] & 0xF8) == 0xF0) {
+ if (!(pull_utf8_cont(cur, &c[1]) &&
+ pull_utf8_cont(cur, &c[2]) &&
+ pull_utf8_cont(cur, &c[3]))) {
+ return 0;
+ }
+ if ((c[1] | c[2] | c[3]) >= 0) {
+ *chr = ((c[0] & 0x08) << 18) | (c[1] << 12)
+ | (c[2] << 6) | c[3];
+ if (*chr >= 65536 && *chr <= 1114111) {
+ return 1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int push_ubjson_len(struct cursor *ubjson, unsigned int len)
+{
+ if (len <= 0xFF) {
+ if (!push_byte(ubjson, 'U')) {
+ return 0;
+ }
+ if (!push_byte(ubjson, (unsigned char)len)) {
+ return 0;
+ }
+ } else if (len <= 0xFFFF) {
+ if (!push_byte(ubjson, 'I')) {
+ return 0;
+ }
+ /* TODO: big-endian push */
+ if (!push_u16(ubjson, (unsigned int)len)) {
+ return 0;
+ }
+ } else {
+ if (!push_byte(ubjson, 'l')) {
+ return 0;
+ }
+ /* TODO: big-endian push */
+ if (!push_int(ubjson, len)) {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static int push_ubjson_str(struct cursor *ubjson, unsigned char *text,
+ unsigned int size)
+{
+
+ if (!push_byte(ubjson, 'S')) {
+ return 0;
+ }
+
+ if (!push_ubjson_len(ubjson, size)) {
+ return 0;
+ }
+
+ /* TODO push string without escapes */
+ if (!push_data(ubjson, text, size)) {
+ return 0;
+ }
+
+ return 1;
+}
+
+static int parse_escape(struct json_parser *p)
+{
+ (void)p;
+ note_error(p, "implement parse_escape");
+ return 0;
+}
+
static int parse_string(struct json_parser *p)
{
+ char c;
+ unsigned int chr;
+ unsigned char *start;
+ c = 0;
+ chr = 0;
+
+ start = p->cur.p;
+
if (!parse_char(&p->cur, &c, '"')) {
note_error(p, "expected '\"', got '%c'", c);
return 0;
}
+
+ while (1) {
+ if (!parse_utf8_char(&p->cur, &chr)) {
+ note_error(p, "invalid utf-8 codepoint");
+ break;
+ }
+
+ if (chr == '"') {
+ /* TODO @generalize push json string */
+ return push_ubjson_str(&p->ubjson, start+1, p->cur.p-start-2);
+ }
+
+ if (chr == '\\' && !parse_escape(p)) {
+ note_error(p, "invalid escape char '%c'", c);
+ break;
+ }
+ }
+
+ p->cur.p = start;
+ return 0;
+}
+
+static int parse_number(struct json_parser *p)
+{
+ (void)p;
+ note_error(p, "not implemented");
+ return 0;
}
+static int parse_value(struct json_parser *p);
+
static int parse_kv(struct json_parser *p)
{
+ char c;
+ c = 0;
+
+ if (!parse_string(p)) {
+ note_error(p, "key");
+ return 0;
+ }
+
+ consume_whitespace(&p->cur);
+
+ if (!parse_char(&p->cur, &c, ':')) {
+ note_error(p, "expected ':', got %c", c);
+ return 0;
+ }
+
+ return parse_value(p);
}
static int parse_object(struct json_parser *p)
@@ -38,6 +218,9 @@ static int parse_object(struct json_parser *p)
return 0;
}
+ if (!push_byte(&p->ubjson, '{'))
+ return 0;
+
while(1) {
consume_whitespace(&p->cur);
@@ -84,19 +267,77 @@ static int parse_object(struct json_parser *p)
return 0;
}
+static int parse_array(struct json_parser *p)
+{
+ (void)p;
+ note_error(p, "implement parse_array");
+ return 0;
+}
+
+static inline int parse_bool(struct json_parser *p)
+{
+ if (parse_str(&p->cur, "true")) {
+ /* TODO @generalize push true */
+ return push_byte(&p->ubjson, 'T');
+ }
+
+ if (parse_str(&p->cur, "false")) {
+ return push_byte(&p->ubjson, 'F');
+ }
+
+ return 0;
+}
+
+static inline int push_ubjson_null(struct cursor *ubjson)
+{
+ return push_byte(ubjson, 'Z');
+}
+
+static int parse_null(struct json_parser *p)
+{
+ if (!parse_str(&p->cur, "null")) {
+ fprintf(stderr, "got %.*s instead of null\n", 4, p->cur.p);
+ note_error(p, "not null");
+ return 0;
+ }
+
+ /* TODO @generalize push null */
+ return push_ubjson_null(&p->ubjson);
+}
+
+static int parse_value(struct json_parser *p)
+{
+ int res;
+
+ consume_whitespace(&p->cur);
+
+ res = parse_string(p) ||
+ parse_number(p) ||
+ parse_object(p) ||
+ parse_array(p) ||
+ parse_bool(p) ||
+ parse_null(p);
+
+ consume_whitespace(&p->cur);
+
+ return res;
+}
+
+
static int parse_array_or_object(struct json_parser *p)
{
char c;
- consume_whitespace(p);
+ consume_whitespace(&p->cur);
if (!peek_char(&p->cur, &c)) {
note_error(p, "oob");
return 0;
}
- if (c == '{')
+ if (c == '{') {
return parse_object(p);
-
+ }
+
if (c == '[')
return parse_array(p);
@@ -104,14 +345,19 @@ static int parse_array_or_object(struct json_parser *p)
return 0;
}
-int parse_json(unsigned char *buf, size_t buf_size, unsigned char *mem,
- size_t mem_size)
+int parse_json(unsigned char *buf, size_t buf_size, struct ubjson *out)
{
+ int res;
struct json_parser p;
+
memset(&p, 0, sizeof(p));
- make_cursor(buf, buf + buf_size, &p->cur);
- make_cursor(buf, mem + mem_size, &p->mem);
+ make_cursor(buf, buf + buf_size, &p.cur);
+ make_cursor(out->data, out->data + out->data_size, &p.ubjson);
- return parse_array_or_object(&p);
+ res = parse_array_or_object(&p);
+ out->data_size = p.ubjson.p - p.ubjson.start;
+ return res;
}
+
+
diff --git a/src/json.h b/src/json.h
@@ -0,0 +1,16 @@
+
+#ifndef WOLFSOCKS_JSON
+#define WOLFSOCKS_JSON
+
+#include <stddef.h>
+
+struct ubjson {
+ unsigned char *data;
+ size_t data_size;
+};
+
+
+int print_ubjson(struct ubjson *json);
+int parse_json(unsigned char *buf, size_t buf_size, struct ubjson *out);
+
+#endif
diff --git a/src/parse.h b/src/parse.h
@@ -19,4 +19,29 @@ static inline int parse_char(struct cursor *cur, char *out, char match)
return 0;
}
+static inline int parse_str(struct cursor *cur, const char *match)
+{
+ int len;
+ len = strlen(match);
+
+ if (cur->p + len >= cur->end) {
+ return 0;
+ }
+
+ if (!memcmp(match, cur->p, len)) {
+ cur->p += len;
+ return 1;
+ }
+
+ return 0;
+}
+
+static inline int consume_char(struct cursor *cur, char match)
+{
+ char c;
+ return pull_byte(cur, (unsigned char*)&c) && c == match;
+}
+
+
+
#endif /* WOLFSOCKS_PARSE */
diff --git a/src/test_json.c b/src/test_json.c
@@ -0,0 +1,36 @@
+
+#include "json.h"
+
+#include <assert.h>
+#include <stdio.h>
+
+static void write_data(unsigned char *data, int data_size)
+{
+ FILE *file;
+
+ file = fopen("test_out.ubjson", "wb");
+ fwrite(data, data_size, 1, file) ;
+ fclose(file);
+}
+
+int main(int argc, char *argv[])
+{
+ static unsigned char out[1024] = {0};
+ struct ubjson ubjson;
+
+ ubjson.data = out;
+ ubjson.data_size = sizeof(out);
+
+ //unsigned char bad[] = "{a}";
+ //assert(!parse_json(bad, sizeof(bad), &ubjson));
+ //ubjson.data_size = sizeof(out);
+
+ unsigned char empty[] = "{}";
+ assert(parse_json(empty, sizeof(empty), &ubjson));
+ ubjson.data_size = sizeof(out);
+
+ unsigned char kv[] = "{ \"key\" : null , \"key2\": \"stringy\"}";
+ assert(parse_json(kv, sizeof(kv), &ubjson));
+
+ write_data(ubjson.data, ubjson.data_size);
+}