json progress - chibipub - Unnamed repository; edit this file 'description' to name the repository.

commit 57f13bbc561517eaec1e7b48e1e38c4abb470306
parent f29b59ae633b2fec66409f9cc2f6fcb753ecd14b
Author: William Casarin <jb55@jb55.com>
Date:   Sun,  6 Dec 2020 16:28:15 -0800

json progress

Diffstat:
M .gitignore  | 2 ++
M Makefile  | 12 ++++++++----
D src/cursor.c  | 137 -------------------------------------------------------------------------------
M src/cursor.h  | 4 ++--
M src/http.c  | 8 ++------
M src/json.c  | 262 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
A src/json.h  | 16 ++++++++++++++++
M src/parse.h  | 25 +++++++++++++++++++++++++
A src/test_json.c  | 36 ++++++++++++++++++++++++++++++++++++

9 files changed, 345 insertions(+), 157 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,5 @@
 /wolfsocks
 *.o
 /tags
+/test_out.ubjson
+src/test_json
diff --git a/Makefile b/Makefile
@@ -5,13 +5,17 @@ OBJS = src/http.o src/base64.o src/inbox.o src/json.o
 
 HEADERS = $(wildcard src/*.h)
 
-DEPS = src/wolfsocks.c $(OBJS)
+wolfsocks: src/wolfsocks.c $(OBJS) $(HEADERS)
+	$(CC) $(CFLAGS) $< $(OBJS) $(LDFLAGS) -o $@
 
-wolfsocks: $(DEPS) $(HEADERS)
-	$(CC) $(CFLAGS) $(DEPS) $(LDFLAGS) -o $@
+src/test_json: src/test_json.c $(OBJS) $(HEADERS)
+	$(CC) $(CFLAGS) $< $(OBJS) $(LDFLAGS) -o $@
+
+check: src/test_json
+	./src/test_json
 
 clean: fake
-	rm -f $(OBJS) wolfsocks
+	rm -f $(OBJS) wolfsocks src/test_json
 
 tags: fake
 	ctags src/*.c src/*.h
diff --git a/src/cursor.c b/src/cursor.c
@@ -1,137 +0,0 @@
-
-#include "cursor.h"
-
-#include <stdio.h>
-#include <string.h>
-
-void copy_cursor(struct cursor *src, struct cursor *dest)
-{
-	dest->start = src->start;
-	dest->p = src->p;
-	dest->end = src->end;
-}
-
-void make_cursor(unsigned char *start, unsigned char *end, struct cursor *cursor)
-{
-	cursor->start = start;
-	cursor->p = start;
-	cursor->end = end;
-}
-
-int cursor_index(struct cursor *cursor, int elem_size)
-{
-	return (cursor->p - cursor->start) / elem_size;
-}
-
-
-int pull_byte(struct cursor *cursor, unsigned char *c)
-{
-	if (cursor->p + 1 > cursor->end)
-		return 0;
-
-	*c = *cursor->p;
-	cursor->p++;
-
-	return 1;
-}
-
-
-int push_byte(struct cursor *cursor, unsigned char c)
-{
-	if (cursor->p + 1 >= cursor->end) {
-		return 0;
-	}
-
-	*cursor->p = c;
-	cursor->p++;
-
-	return 1;
-}
-
-int pull_data_into_cursor(struct cursor *cursor,
-			  struct cursor *dest,
-			  unsigned char **data,
-			  int len)
-{
-	int ok;
-
-	if (dest->p + len >= dest->end) {
-		printf("not enough room in dest buffer\n");
-		return 0;
-	}
-
-	ok = pull_data(cursor, dest->p, len);
-	if (!ok) return 0;
-
-	*data = dest->p;
-	dest->p += len;
-
-	return 1;
-}
-
-int pull_data(struct cursor *cursor, unsigned char *data, int len)
-{
-	if (cursor->p + len >= cursor->end) {
-		return 0;
-	}
-
-	memcpy(data, cursor->p, len);
-	cursor->p += len;
-
-	return 1;
-}
-
-int push_data(struct cursor *cursor, unsigned char *data, int len)
-{
-	if (cursor->p + len > cursor->end) {
-		printf("push_data oob\n");
-		return 0;
-	}
-
-	memcpy(cursor->p, data, len);
-	cursor->p += len;
-
-	return 1;
-}
-
-int push_int(struct cursor *cursor, int i)
-{
-	return push_data(cursor, (unsigned char*)&i, sizeof(i));
-}
-
-int pull_int(struct cursor *cursor, int *i)
-{
-	return pull_data(cursor, (unsigned char*)i, sizeof(*i));
-}
-
-int push_u16(struct cursor *cursor, unsigned short i)
-{
-	return push_data(cursor, (unsigned char*)&i, sizeof(i));
-}
-
-void *index_cursor(struct cursor *cursor, unsigned short index, int elem_size)
-{
-	unsigned char *p;
-	p = &cursor->start[elem_size * index];
-
-	if (p > cursor->end)
-		return NULL;
-
-	return (void*)p;
-}
-
-
-int push_sized_str(struct cursor *cursor, const char *str, int len)
-{
-	return push_data(cursor, (unsigned char*)str, len);
-}
-
-int push_str(struct cursor *cursor, const char *str)
-{
-	return push_data(cursor, (unsigned char*)str, strlen(str));
-}
-
-int cursor_remaining_capacity(struct cursor *cursor)
-{
-	return cursor->end - cursor->p;
-}
diff --git a/src/cursor.h b/src/cursor.h
@@ -63,7 +63,7 @@ static inline int cursor_index(struct cursor *cursor, int elem_size)
 
 static inline int pull_byte(struct cursor *cursor, unsigned char *c)
 {
-	if (unlikely(cursor->p + 1 > cursor->end))
+	if (unlikely(cursor->p + 1 >= cursor->end))
 		return 0;
 
 	*c = *cursor->p;
@@ -75,7 +75,7 @@ static inline int pull_byte(struct cursor *cursor, unsigned char *c)
 
 static inline int push_byte(struct cursor *cursor, unsigned char c)
 {
-	if (unlikely(cursor->p + 1 > cursor->end)) {
+	if (unlikely(cursor->p + 1 >= cursor->end)) {
 		return 0;
 	}
 
diff --git a/src/http.c b/src/http.c
@@ -1,5 +1,7 @@
 #include "http.h"
 #include "util.h"
+#include "parse.h"
+
 #include <stdio.h>
 #include <assert.h>
 
@@ -26,12 +28,6 @@ static int parse_segment_with(struct parser *p, char **seg, char delim)
 	return 1;
 }
 
-static int consume_char(struct cursor *cur, char match)
-{
-	char c;
-	return pull_byte(cur, (unsigned char*)&c) && c == match;
-}
-
 static int parse_segment(struct parser *p, char **seg)
 {
 	return parse_segment_with(p, seg, ' ');
diff --git a/src/json.c b/src/json.c
@@ -1,7 +1,11 @@
 
 #include "cursor.h"
+#include "json.h"
+#include "parse.h"
+
 #include <ctype.h>
 
+#define note_error(p, msg, ...) fprintf(stderr, "%s: " msg "\n", __FUNCTION__, ##__VA_ARGS__);
 
 struct json_parser {
 	struct cursor cur;
@@ -16,16 +20,192 @@ static void consume_whitespace(struct cursor *cur)
 	}
 }
 
+static int pull_utf8_cont(struct cursor *cur, unsigned char *c)
+{
+	if (!pull_byte(cur, c)) {
+		return 0;
+	}
+
+	if ((*c & 0xC0) != 0x80) {
+		return 0;
+	}
+
+	*c = *c & 0x3F;
+
+	return 1;
+}
+
+static int parse_utf8_char(struct cursor *cur, unsigned int *chr)
+{
+	unsigned char c[4];
+
+	if (!pull_byte(cur, &c[0])) {
+		return 0;
+	}
+
+	if ((c[0] & 0x80) == 0) {
+		*chr = c[0];
+		return 1;
+	}
+
+	if ((c[0] & 0xE0) == 0xC0) {
+		if (!pull_utf8_cont(cur, &c[1])) {
+			return 0;
+		}
+		if (c[1] >= 0) {
+			*chr = ((c[0] & 0x1F) << 6) | c[1];
+			if (*chr >= 128) {
+				return 1;
+			}
+		}
+	} else if ((c[0] & 0xF0) == 0xE0) {
+		if (!(pull_utf8_cont(cur, &c[1]) &&
+                      pull_utf8_cont(cur, &c[2]))) {
+			return 0;
+		}
+		if ((c[1] | c[2]) >= 0) {
+			*chr = ((c[0] & 0x0F) << 12) | (c[1] << 6) | c[2];
+			if (*chr >= 2048 && (*chr < 55296 || *chr > 57343)) {
+				return 1;
+			}
+		}
+	} else if ((c[0] & 0xF8) == 0xF0) {
+		if (!(pull_utf8_cont(cur, &c[1]) &&
+                      pull_utf8_cont(cur, &c[2]) &&
+		      pull_utf8_cont(cur, &c[3]))) {
+			return 0;
+		}
+		if ((c[1] | c[2] | c[3]) >= 0) {
+			*chr = ((c[0] & 0x08) << 18) | (c[1] << 12)
+			                             | (c[2] << 6) | c[3];
+			if (*chr >= 65536 && *chr <= 1114111) {
+				return 1;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int push_ubjson_len(struct cursor *ubjson, unsigned int len)
+{
+	if (len <= 0xFF) {
+		if (!push_byte(ubjson, 'U')) {
+			return 0;
+		}
+		if (!push_byte(ubjson, (unsigned char)len)) {
+			return 0;
+		}
+	} else if (len <= 0xFFFF) {
+		if (!push_byte(ubjson, 'I')) {
+			return 0;
+		}
+		/* TODO: big-endian push */
+		if (!push_u16(ubjson, (unsigned int)len)) {
+			return 0;
+		}
+	} else {
+		if (!push_byte(ubjson, 'l')) {
+			return 0;
+		}
+		/* TODO: big-endian push */
+		if (!push_int(ubjson, len)) {
+			return 0;
+		}
+	}
+	return 1;
+}
+
+static int push_ubjson_str(struct cursor *ubjson, unsigned char *text, 
+		unsigned int size)
+{
+
+	if (!push_byte(ubjson, 'S')) {
+		return 0;
+	}
+
+	if (!push_ubjson_len(ubjson, size)) {
+		return 0;
+	}
+
+	/* TODO push string without escapes */
+	if (!push_data(ubjson, text, size)) {
+		return 0;
+	}
+
+	return 1;
+}
+
+static int parse_escape(struct json_parser *p)
+{
+	(void)p;
+	note_error(p, "implement parse_escape");
+	return 0;
+}
+
 static int parse_string(struct json_parser *p)
 {
+	char c;
+	unsigned int chr;
+	unsigned char *start;
+	c = 0;
+	chr = 0;
+
+	start = p->cur.p;
+
 	if (!parse_char(&p->cur, &c, '"')) {
 		note_error(p, "expected '\"', got '%c'", c);
 		return 0;
 	}
+
+	while (1) {
+		if (!parse_utf8_char(&p->cur, &chr)) {
+			note_error(p, "invalid utf-8 codepoint");
+			break;
+		}
+
+		if (chr == '"') {
+			/* TODO @generalize push json string */
+			return push_ubjson_str(&p->ubjson, start+1, p->cur.p-start-2);
+		}
+
+		if (chr == '\\' && !parse_escape(p)) {
+			note_error(p, "invalid escape char '%c'", c);
+			break;
+		}
+	}
+
+	p->cur.p = start;
+	return 0;
+}
+
+static int parse_number(struct json_parser *p)
+{
+	(void)p;
+	note_error(p, "not implemented");
+	return 0;
 }
 
+static int parse_value(struct json_parser *p);
+
 static int parse_kv(struct json_parser *p)
 {
+	char c;
+	c = 0;
+
+	if (!parse_string(p)) {
+		note_error(p, "key");
+		return 0;
+	}
+
+	consume_whitespace(&p->cur);
+
+	if (!parse_char(&p->cur, &c, ':')) {
+		note_error(p, "expected ':', got %c", c);
+		return 0;
+	}
+
+	return parse_value(p);
 }
 
 static int parse_object(struct json_parser *p)
@@ -38,6 +218,9 @@ static int parse_object(struct json_parser *p)
 		return 0;
 	}
 
+	if (!push_byte(&p->ubjson, '{'))
+		return 0;
+
 	while(1) {
 		consume_whitespace(&p->cur);
 
@@ -84,19 +267,77 @@ static int parse_object(struct json_parser *p)
 	return 0;
 }
 
+static int parse_array(struct json_parser *p)
+{
+	(void)p;
+	note_error(p, "implement parse_array");
+	return 0;
+}
+
+static inline int parse_bool(struct json_parser *p)
+{
+	if (parse_str(&p->cur, "true")) {
+		/* TODO @generalize push true */
+		return push_byte(&p->ubjson, 'T');
+	}
+
+	if (parse_str(&p->cur, "false")) {
+		return push_byte(&p->ubjson, 'F');
+	}
+
+	return 0;
+}
+
+static inline int push_ubjson_null(struct cursor *ubjson)
+{
+	return push_byte(ubjson, 'Z');
+}
+
+static int parse_null(struct json_parser *p)
+{
+	if (!parse_str(&p->cur, "null")) {
+		fprintf(stderr, "got %.*s instead of null\n", 4, p->cur.p);
+		note_error(p, "not null");
+		return 0;
+	}
+
+	/* TODO @generalize push null */
+	return push_ubjson_null(&p->ubjson);
+}
+
+static int parse_value(struct json_parser *p)
+{
+	int res;
+
+	consume_whitespace(&p->cur);
+
+	res = parse_string(p) || 
+	      parse_number(p) ||
+	      parse_object(p) ||
+	      parse_array(p)  ||
+	      parse_bool(p)   ||
+	      parse_null(p);
+
+	consume_whitespace(&p->cur);
+
+	return res;
+}
+
+
 static int parse_array_or_object(struct json_parser *p)
 {
 	char c;
-	consume_whitespace(p);
+	consume_whitespace(&p->cur);
 
 	if (!peek_char(&p->cur, &c)) {
 		note_error(p, "oob");
 		return 0;
 	}
 
-	if (c == '{')
+	if (c == '{') {
 		return parse_object(p);
-	
+	}
+
 	if (c == '[')
 		return parse_array(p);
 
@@ -104,14 +345,19 @@ static int parse_array_or_object(struct json_parser *p)
 	return 0;
 }
 
-int parse_json(unsigned char *buf, size_t buf_size, unsigned char *mem, 
-               size_t mem_size)
+int parse_json(unsigned char *buf, size_t buf_size, struct ubjson *out)
 {
+	int res;
 	struct json_parser p;
+
 	memset(&p, 0, sizeof(p));
 
-	make_cursor(buf, buf + buf_size, &p->cur);
-	make_cursor(buf, mem + mem_size, &p->mem);
+	make_cursor(buf, buf + buf_size, &p.cur);
+	make_cursor(out->data, out->data + out->data_size, &p.ubjson);
 
-	return parse_array_or_object(&p);
+	res = parse_array_or_object(&p);
+	out->data_size = p.ubjson.p - p.ubjson.start;
+	return res;
 }
+
+
diff --git a/src/json.h b/src/json.h
@@ -0,0 +1,16 @@
+
+#ifndef WOLFSOCKS_JSON
+#define WOLFSOCKS_JSON
+
+#include <stddef.h>
+
+struct ubjson {
+	unsigned char *data;
+	size_t data_size;
+};
+
+
+int print_ubjson(struct ubjson *json);
+int parse_json(unsigned char *buf, size_t buf_size, struct ubjson *out);
+
+#endif
diff --git a/src/parse.h b/src/parse.h
@@ -19,4 +19,29 @@ static inline int parse_char(struct cursor *cur, char *out, char match)
 	return 0;
 }
 
+static inline int parse_str(struct cursor *cur, const char *match)
+{
+	int len;
+	len = strlen(match);
+
+	if (cur->p + len >= cur->end) {
+		return 0;
+	}
+
+	if (!memcmp(match, cur->p, len)) {
+		cur->p += len;
+		return 1;
+	}
+
+	return 0;
+}
+
+static inline int consume_char(struct cursor *cur, char match)
+{
+	char c;
+	return pull_byte(cur, (unsigned char*)&c) && c == match;
+}
+
+
+
 #endif /* WOLFSOCKS_PARSE */
diff --git a/src/test_json.c b/src/test_json.c
@@ -0,0 +1,36 @@
+
+#include "json.h"
+
+#include <assert.h>
+#include <stdio.h>
+
+static void write_data(unsigned char *data, int data_size)
+{
+	FILE *file;
+
+	file = fopen("test_out.ubjson", "wb");
+	fwrite(data, data_size, 1, file) ;
+	fclose(file);
+}
+
+int main(int argc, char *argv[])
+{
+	static unsigned char out[1024] = {0};
+	struct ubjson ubjson;
+
+	ubjson.data = out;
+	ubjson.data_size = sizeof(out);
+
+	//unsigned char bad[] = "{a}";
+	//assert(!parse_json(bad, sizeof(bad), &ubjson));
+	//ubjson.data_size = sizeof(out);
+
+	unsigned char empty[] = "{}";
+	assert(parse_json(empty, sizeof(empty), &ubjson));
+	ubjson.data_size = sizeof(out);
+
+	unsigned char kv[] = "{ \"key\" : null , \"key2\": \"stringy\"}";
+	assert(parse_json(kv, sizeof(kv), &ubjson));
+
+	write_data(ubjson.data, ubjson.data_size);
+}

	chibipub Unnamed repository; edit this file 'description' to name the repository.
	Log \| Files \| Refs \| README \| LICENSE

M	.gitignore	\|	2	++
M	Makefile	\|	12	++++++++----
D	src/cursor.c	\|	137	-------------------------------------------------------------------------------
M	src/cursor.h	\|	4	++--
M	src/http.c	\|	8	++------
M	src/json.c	\|	262	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
A	src/json.h	\|	16	++++++++++++++++
M	src/parse.h	\|	25	+++++++++++++++++++++++++
A	src/test_json.c	\|	36	++++++++++++++++++++++++++++++++++++