chibipub

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit 57f13bbc561517eaec1e7b48e1e38c4abb470306
parent f29b59ae633b2fec66409f9cc2f6fcb753ecd14b
Author: William Casarin <jb55@jb55.com>
Date:   Sun,  6 Dec 2020 16:28:15 -0800

json progress

Diffstat:
M.gitignore | 2++
MMakefile | 12++++++++----
Dsrc/cursor.c | 137-------------------------------------------------------------------------------
Msrc/cursor.h | 4++--
Msrc/http.c | 8++------
Msrc/json.c | 262++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Asrc/json.h | 16++++++++++++++++
Msrc/parse.h | 25+++++++++++++++++++++++++
Asrc/test_json.c | 36++++++++++++++++++++++++++++++++++++
9 files changed, 345 insertions(+), 157 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -2,3 +2,5 @@ /wolfsocks *.o /tags +/test_out.ubjson +src/test_json diff --git a/Makefile b/Makefile @@ -5,13 +5,17 @@ OBJS = src/http.o src/base64.o src/inbox.o src/json.o HEADERS = $(wildcard src/*.h) -DEPS = src/wolfsocks.c $(OBJS) +wolfsocks: src/wolfsocks.c $(OBJS) $(HEADERS) + $(CC) $(CFLAGS) $< $(OBJS) $(LDFLAGS) -o $@ -wolfsocks: $(DEPS) $(HEADERS) - $(CC) $(CFLAGS) $(DEPS) $(LDFLAGS) -o $@ +src/test_json: src/test_json.c $(OBJS) $(HEADERS) + $(CC) $(CFLAGS) $< $(OBJS) $(LDFLAGS) -o $@ + +check: src/test_json + ./src/test_json clean: fake - rm -f $(OBJS) wolfsocks + rm -f $(OBJS) wolfsocks src/test_json tags: fake ctags src/*.c src/*.h diff --git a/src/cursor.c b/src/cursor.c @@ -1,137 +0,0 @@ - -#include "cursor.h" - -#include <stdio.h> -#include <string.h> - -void copy_cursor(struct cursor *src, struct cursor *dest) -{ - dest->start = src->start; - dest->p = src->p; - dest->end = src->end; -} - -void make_cursor(unsigned char *start, unsigned char *end, struct cursor *cursor) -{ - cursor->start = start; - cursor->p = start; - cursor->end = end; -} - -int cursor_index(struct cursor *cursor, int elem_size) -{ - return (cursor->p - cursor->start) / elem_size; -} - - -int pull_byte(struct cursor *cursor, unsigned char *c) -{ - if (cursor->p + 1 > cursor->end) - return 0; - - *c = *cursor->p; - cursor->p++; - - return 1; -} - - -int push_byte(struct cursor *cursor, unsigned char c) -{ - if (cursor->p + 1 >= cursor->end) { - return 0; - } - - *cursor->p = c; - cursor->p++; - - return 1; -} - -int pull_data_into_cursor(struct cursor *cursor, - struct cursor *dest, - unsigned char **data, - int len) -{ - int ok; - - if (dest->p + len >= dest->end) { - printf("not enough room in dest buffer\n"); - return 0; - } - - ok = pull_data(cursor, dest->p, len); - if (!ok) return 0; - - *data = dest->p; - dest->p += len; - - return 1; -} - -int pull_data(struct cursor *cursor, unsigned char *data, int len) -{ - if (cursor->p + len >= cursor->end) { - return 0; - } - - memcpy(data, cursor->p, len); - cursor->p += len; - - return 1; -} - -int push_data(struct cursor *cursor, unsigned char *data, int len) -{ - if (cursor->p + len > cursor->end) { - printf("push_data oob\n"); - return 0; - } - - memcpy(cursor->p, data, len); - cursor->p += len; - - return 1; -} - -int push_int(struct cursor *cursor, int i) -{ - return push_data(cursor, (unsigned char*)&i, sizeof(i)); -} - -int pull_int(struct cursor *cursor, int *i) -{ - return pull_data(cursor, (unsigned char*)i, sizeof(*i)); -} - -int push_u16(struct cursor *cursor, unsigned short i) -{ - return push_data(cursor, (unsigned char*)&i, sizeof(i)); -} - -void *index_cursor(struct cursor *cursor, unsigned short index, int elem_size) -{ - unsigned char *p; - p = &cursor->start[elem_size * index]; - - if (p > cursor->end) - return NULL; - - return (void*)p; -} - - -int push_sized_str(struct cursor *cursor, const char *str, int len) -{ - return push_data(cursor, (unsigned char*)str, len); -} - -int push_str(struct cursor *cursor, const char *str) -{ - return push_data(cursor, (unsigned char*)str, strlen(str)); -} - -int cursor_remaining_capacity(struct cursor *cursor) -{ - return cursor->end - cursor->p; -} diff --git a/src/cursor.h b/src/cursor.h @@ -63,7 +63,7 @@ static inline int cursor_index(struct cursor *cursor, int elem_size) static inline int pull_byte(struct cursor *cursor, unsigned char *c) { - if (unlikely(cursor->p + 1 > cursor->end)) + if (unlikely(cursor->p + 1 >= cursor->end)) return 0; *c = *cursor->p; @@ -75,7 +75,7 @@ static inline int pull_byte(struct cursor *cursor, unsigned char *c) static inline int push_byte(struct cursor *cursor, unsigned char c) { - if (unlikely(cursor->p + 1 > cursor->end)) { + if (unlikely(cursor->p + 1 >= cursor->end)) { return 0; } diff --git a/src/http.c b/src/http.c @@ -1,5 +1,7 @@ #include "http.h" #include "util.h" +#include "parse.h" + #include <stdio.h> #include <assert.h> @@ -26,12 +28,6 @@ static int parse_segment_with(struct parser *p, char **seg, char delim) return 1; } -static int consume_char(struct cursor *cur, char match) -{ - char c; - return pull_byte(cur, (unsigned char*)&c) && c == match; -} - static int parse_segment(struct parser *p, char **seg) { return parse_segment_with(p, seg, ' '); diff --git a/src/json.c b/src/json.c @@ -1,7 +1,11 @@ #include "cursor.h" +#include "json.h" +#include "parse.h" + #include <ctype.h> +#define note_error(p, msg, ...) fprintf(stderr, "%s: " msg "\n", __FUNCTION__, ##__VA_ARGS__); struct json_parser { struct cursor cur; @@ -16,16 +20,192 @@ static void consume_whitespace(struct cursor *cur) } } +static int pull_utf8_cont(struct cursor *cur, unsigned char *c) +{ + if (!pull_byte(cur, c)) { + return 0; + } + + if ((*c & 0xC0) != 0x80) { + return 0; + } + + *c = *c & 0x3F; + + return 1; +} + +static int parse_utf8_char(struct cursor *cur, unsigned int *chr) +{ + unsigned char c[4]; + + if (!pull_byte(cur, &c[0])) { + return 0; + } + + if ((c[0] & 0x80) == 0) { + *chr = c[0]; + return 1; + } + + if ((c[0] & 0xE0) == 0xC0) { + if (!pull_utf8_cont(cur, &c[1])) { + return 0; + } + if (c[1] >= 0) { + *chr = ((c[0] & 0x1F) << 6) | c[1]; + if (*chr >= 128) { + return 1; + } + } + } else if ((c[0] & 0xF0) == 0xE0) { + if (!(pull_utf8_cont(cur, &c[1]) && + pull_utf8_cont(cur, &c[2]))) { + return 0; + } + if ((c[1] | c[2]) >= 0) { + *chr = ((c[0] & 0x0F) << 12) | (c[1] << 6) | c[2]; + if (*chr >= 2048 && (*chr < 55296 || *chr > 57343)) { + return 1; + } + } + } else if ((c[0] & 0xF8) == 0xF0) { + if (!(pull_utf8_cont(cur, &c[1]) && + pull_utf8_cont(cur, &c[2]) && + pull_utf8_cont(cur, &c[3]))) { + return 0; + } + if ((c[1] | c[2] | c[3]) >= 0) { + *chr = ((c[0] & 0x08) << 18) | (c[1] << 12) + | (c[2] << 6) | c[3]; + if (*chr >= 65536 && *chr <= 1114111) { + return 1; + } + } + } + + return 0; +} + +static int push_ubjson_len(struct cursor *ubjson, unsigned int len) +{ + if (len <= 0xFF) { + if (!push_byte(ubjson, 'U')) { + return 0; + } + if (!push_byte(ubjson, (unsigned char)len)) { + return 0; + } + } else if (len <= 0xFFFF) { + if (!push_byte(ubjson, 'I')) { + return 0; + } + /* TODO: big-endian push */ + if (!push_u16(ubjson, (unsigned int)len)) { + return 0; + } + } else { + if (!push_byte(ubjson, 'l')) { + return 0; + } + /* TODO: big-endian push */ + if (!push_int(ubjson, len)) { + return 0; + } + } + return 1; +} + +static int push_ubjson_str(struct cursor *ubjson, unsigned char *text, + unsigned int size) +{ + + if (!push_byte(ubjson, 'S')) { + return 0; + } + + if (!push_ubjson_len(ubjson, size)) { + return 0; + } + + /* TODO push string without escapes */ + if (!push_data(ubjson, text, size)) { + return 0; + } + + return 1; +} + +static int parse_escape(struct json_parser *p) +{ + (void)p; + note_error(p, "implement parse_escape"); + return 0; +} + static int parse_string(struct json_parser *p) { + char c; + unsigned int chr; + unsigned char *start; + c = 0; + chr = 0; + + start = p->cur.p; + if (!parse_char(&p->cur, &c, '"')) { note_error(p, "expected '\"', got '%c'", c); return 0; } + + while (1) { + if (!parse_utf8_char(&p->cur, &chr)) { + note_error(p, "invalid utf-8 codepoint"); + break; + } + + if (chr == '"') { + /* TODO @generalize push json string */ + return push_ubjson_str(&p->ubjson, start+1, p->cur.p-start-2); + } + + if (chr == '\\' && !parse_escape(p)) { + note_error(p, "invalid escape char '%c'", c); + break; + } + } + + p->cur.p = start; + return 0; +} + +static int parse_number(struct json_parser *p) +{ + (void)p; + note_error(p, "not implemented"); + return 0; } +static int parse_value(struct json_parser *p); + static int parse_kv(struct json_parser *p) { + char c; + c = 0; + + if (!parse_string(p)) { + note_error(p, "key"); + return 0; + } + + consume_whitespace(&p->cur); + + if (!parse_char(&p->cur, &c, ':')) { + note_error(p, "expected ':', got %c", c); + return 0; + } + + return parse_value(p); } static int parse_object(struct json_parser *p) @@ -38,6 +218,9 @@ static int parse_object(struct json_parser *p) return 0; } + if (!push_byte(&p->ubjson, '{')) + return 0; + while(1) { consume_whitespace(&p->cur); @@ -84,19 +267,77 @@ static int parse_object(struct json_parser *p) return 0; } +static int parse_array(struct json_parser *p) +{ + (void)p; + note_error(p, "implement parse_array"); + return 0; +} + +static inline int parse_bool(struct json_parser *p) +{ + if (parse_str(&p->cur, "true")) { + /* TODO @generalize push true */ + return push_byte(&p->ubjson, 'T'); + } + + if (parse_str(&p->cur, "false")) { + return push_byte(&p->ubjson, 'F'); + } + + return 0; +} + +static inline int push_ubjson_null(struct cursor *ubjson) +{ + return push_byte(ubjson, 'Z'); +} + +static int parse_null(struct json_parser *p) +{ + if (!parse_str(&p->cur, "null")) { + fprintf(stderr, "got %.*s instead of null\n", 4, p->cur.p); + note_error(p, "not null"); + return 0; + } + + /* TODO @generalize push null */ + return push_ubjson_null(&p->ubjson); +} + +static int parse_value(struct json_parser *p) +{ + int res; + + consume_whitespace(&p->cur); + + res = parse_string(p) || + parse_number(p) || + parse_object(p) || + parse_array(p) || + parse_bool(p) || + parse_null(p); + + consume_whitespace(&p->cur); + + return res; +} + + static int parse_array_or_object(struct json_parser *p) { char c; - consume_whitespace(p); + consume_whitespace(&p->cur); if (!peek_char(&p->cur, &c)) { note_error(p, "oob"); return 0; } - if (c == '{') + if (c == '{') { return parse_object(p); - + } + if (c == '[') return parse_array(p); @@ -104,14 +345,19 @@ static int parse_array_or_object(struct json_parser *p) return 0; } -int parse_json(unsigned char *buf, size_t buf_size, unsigned char *mem, - size_t mem_size) +int parse_json(unsigned char *buf, size_t buf_size, struct ubjson *out) { + int res; struct json_parser p; + memset(&p, 0, sizeof(p)); - make_cursor(buf, buf + buf_size, &p->cur); - make_cursor(buf, mem + mem_size, &p->mem); + make_cursor(buf, buf + buf_size, &p.cur); + make_cursor(out->data, out->data + out->data_size, &p.ubjson); - return parse_array_or_object(&p); + res = parse_array_or_object(&p); + out->data_size = p.ubjson.p - p.ubjson.start; + return res; } + + diff --git a/src/json.h b/src/json.h @@ -0,0 +1,16 @@ + +#ifndef WOLFSOCKS_JSON +#define WOLFSOCKS_JSON + +#include <stddef.h> + +struct ubjson { + unsigned char *data; + size_t data_size; +}; + + +int print_ubjson(struct ubjson *json); +int parse_json(unsigned char *buf, size_t buf_size, struct ubjson *out); + +#endif diff --git a/src/parse.h b/src/parse.h @@ -19,4 +19,29 @@ static inline int parse_char(struct cursor *cur, char *out, char match) return 0; } +static inline int parse_str(struct cursor *cur, const char *match) +{ + int len; + len = strlen(match); + + if (cur->p + len >= cur->end) { + return 0; + } + + if (!memcmp(match, cur->p, len)) { + cur->p += len; + return 1; + } + + return 0; +} + +static inline int consume_char(struct cursor *cur, char match) +{ + char c; + return pull_byte(cur, (unsigned char*)&c) && c == match; +} + + + #endif /* WOLFSOCKS_PARSE */ diff --git a/src/test_json.c b/src/test_json.c @@ -0,0 +1,36 @@ + +#include "json.h" + +#include <assert.h> +#include <stdio.h> + +static void write_data(unsigned char *data, int data_size) +{ + FILE *file; + + file = fopen("test_out.ubjson", "wb"); + fwrite(data, data_size, 1, file) ; + fclose(file); +} + +int main(int argc, char *argv[]) +{ + static unsigned char out[1024] = {0}; + struct ubjson ubjson; + + ubjson.data = out; + ubjson.data_size = sizeof(out); + + //unsigned char bad[] = "{a}"; + //assert(!parse_json(bad, sizeof(bad), &ubjson)); + //ubjson.data_size = sizeof(out); + + unsigned char empty[] = "{}"; + assert(parse_json(empty, sizeof(empty), &ubjson)); + ubjson.data_size = sizeof(out); + + unsigned char kv[] = "{ \"key\" : null , \"key2\": \"stringy\"}"; + assert(parse_json(kv, sizeof(kv), &ubjson)); + + write_data(ubjson.data, ubjson.data_size); +}