chibipub

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit c05b6ec8c3b521e2caaf855dd8fdfbab2ac9de48
parent 88b7b87269ddeb58c0fe2dfe1e546c30a330f69d
Author: William Casarin <jb55@jb55.com>
Date:   Wed,  9 Dec 2020 06:37:14 -0800

numbers

Diffstat:
M.gitignore | 1+
MMakefile | 5++++-
Msrc/errors.h | 8+++++++-
Msrc/json.c | 257+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------
Msrc/json.h | 3+++
Msrc/test_json.c | 37+++++++++++++++++++++++++++++++++++--
6 files changed, 257 insertions(+), 54 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -4,3 +4,4 @@ /tags /test_out.ubjson src/test_json +/corpus/math.json diff --git a/Makefile b/Makefile @@ -7,10 +7,13 @@ HEADERS = $(wildcard src/*.h) all: wolfsocks +corpus/math.json: + curl -sL 'https://data.cityofnewyork.us/api/views/x4ai-kstz/rows.json?accessType=DOWNLOAD' > $@ + wolfsocks: src/wolfsocks.c $(OBJS) $(HEADERS) $(CC) $(CFLAGS) $< $(OBJS) $(LDFLAGS) -o $@ -src/test_json: src/test_json.c $(OBJS) $(HEADERS) +src/test_json: src/test_json.c $(OBJS) $(HEADERS) corpus/math.json $(CC) $(CFLAGS) $< $(OBJS) $(LDFLAGS) -o $@ check: src/test_json diff --git a/src/errors.h b/src/errors.h @@ -10,6 +10,11 @@ struct errors { int record; }; +static inline void copy_errors(struct errors *src, struct errors *dst) +{ + dst->record = src->record; +} + static inline void init_errors(struct errors *errs) { errs->record = 1; @@ -17,8 +22,9 @@ static inline void init_errors(struct errors *errs) static inline void note_error_(struct errors *errs, const char *fmt, ...) { - if (!errs->record) + if (!errs->record) { return; + } va_list ap; va_start(ap, fmt); diff --git a/src/json.c b/src/json.c @@ -3,6 +3,7 @@ #include "json.h" #include "parse.h" +#include <assert.h> #include <ctype.h> struct json_parser { @@ -11,6 +12,40 @@ struct json_parser { struct errors errs; }; +void init_ubjson(struct ubjson *ubjson, unsigned char *buf, size_t bufsize) +{ + make_cursor(buf, buf + bufsize, &ubjson->cur); + init_errors(&ubjson->errs); +} + +#define max(a,b) ((a) > (b) ? (a) : (b)) +static void print_around(struct cursor *cur, int range) +{ + unsigned char *c; + + c = max(cur->p - range, cur->start); + for (; c < cur->end && c < (cur->p + range); c++) { + if (*c < 32) + printf("%02x", *c); + else + printf("%c", *c); + } + printf("\n"); + + c = max(cur->p - range, cur->start); + for (; c < cur->end && c < (cur->p + range); c++) { + if (c == cur->p) { + printf("^"); + continue; + } + if (*c < 32) + printf(" "); + else + printf(" "); + } + printf("\n"); +} + static void consume_whitespace(struct cursor *cur) { for (; cur->p < cur->end; cur->p++) { @@ -86,7 +121,7 @@ static int parse_utf8_char(struct cursor *cur, unsigned int *chr) return 0; } -static int push_ubjson_len(struct cursor *ubjson, unsigned int len) +static int push_ubjson_num(struct cursor *ubjson, unsigned int len) { if (len <= 0xFF) { if (!push_byte(ubjson, 'U')) { @@ -123,7 +158,7 @@ static int push_ubjson_str(struct cursor *ubjson, unsigned char *text, return 0; } - if (!push_ubjson_len(ubjson, size)) { + if (!push_ubjson_num(ubjson, size)) { return 0; } @@ -178,13 +213,96 @@ static int parse_string(struct json_parser *p) return 0; } -static int parse_number(struct json_parser *p) +static int parse_digits(struct json_parser *p, unsigned int *out) { - (void)p; - note_error(&p->errs, "not implemented"); + char c; + + if (!peek_char(&p->cur, &c)) { + note_error(&p->errs, "oob"); + return 0; + } + + if (c < '1' || c > '9') { + note_error(&p->errs, "expected 1-9, got '%c'", c); + return 0; + } + + for (*out = 0; p->cur.p < p->cur.end; p->cur.p++) { + c = *p->cur.p; + + if (c < '0' || c > '9') { + return 1; + } + + *out = 10*(*out) + (c-'0'); + } + + note_error(&p->errs, "oob"); return 0; } +static int parse_fraction(struct json_parser *p) +{ + note_error(&p->errs, "floating point numbers not supported"); + return 0; + /* + if (!peek_char(&p->cur, &c)) { + note_error(&p->errs, "oob"); + return 0; + } + + if (c !== '.') { + p->cur.p++; + } else (parse_digits(p)) { + } + */ +} + +static int parse_number(struct json_parser *p) +{ + char c; + int sign = 1; + unsigned int digits; + + if (!peek_char(&p->cur, &c)) { + note_error(&p->errs, "oob"); + return 0; + } + + if (c == '-') { + sign = -1; + p->cur.p++; + if (!peek_char(&p->cur, &c)) { + note_error(&p->errs, "oob"); + return 0; + } + } + + if (c == '0') { + p->cur.p++; + if (!peek_char(&p->cur, &c)) { + note_error(&p->errs, "oob"); + return 0; + } + if (c != '.' && (c < '0' || c > '9')) { + /* just a 0 */ + push_ubjson_num(&p->ubjson, 0); + return 1; + } + return parse_fraction(p); + } else if (c >= '1' && c <= '9') { + if (!parse_digits(p, &digits)) { + note_error(&p->errs, "digits"); + return 0; + } + + return push_ubjson_num(&p->ubjson, digits * sign); + } else { + note_error(&p->errs, "expected '-', '0' or '1-9', got '%c'", c) + return 0; + } +} + static int parse_value(struct json_parser *p); static int parse_kv(struct json_parser *p) @@ -311,7 +429,6 @@ static inline int push_ubjson_null(struct cursor *ubjson) static int parse_null(struct json_parser *p) { if (!parse_str(&p->cur, "null")) { - fprintf(stderr, "got %.*s instead of null\n", 4, p->cur.p); note_error(&p->errs, "not null"); return 0; } @@ -369,7 +486,7 @@ static int parse_array_or_object(struct json_parser *p) return 0; } -static int parse_ubjson_size(struct ubjson *ubjson, unsigned int *len) +static int parse_ubjson_sized_len(struct ubjson *ubjson, unsigned int *len, unsigned int *size) { unsigned char byte; unsigned short u16; @@ -379,10 +496,13 @@ static int parse_ubjson_size(struct ubjson *ubjson, unsigned int *len) if (!pull_byte(&ubjson->cur, &byte)) { note_error(&ubjson->errs, "oob"); + return 0; } else if (byte == 'U' || byte == 'i') { if (!pull_byte(&ubjson->cur, &byte)) { note_error(&ubjson->errs, "pull byte after u8 int"); } else { + if (size) + *size = 1; *len = byte; return 1; } @@ -390,6 +510,8 @@ static int parse_ubjson_size(struct ubjson *ubjson, unsigned int *len) if (!pull_data(&ubjson->cur, (unsigned char*)&u16, sizeof(u16))) { note_error(&ubjson->errs, "pull byte after u16 int"); } else { + if (size) + *size = 2; *len = u16; return 1; } @@ -397,6 +519,8 @@ static int parse_ubjson_size(struct ubjson *ubjson, unsigned int *len) if (!pull_int(&ubjson->cur, (int*)len)) { note_error(&ubjson->errs, "pull byte after u16 int"); } else { + if (size) + *size = 4; return 1; } } else { @@ -407,23 +531,32 @@ static int parse_ubjson_size(struct ubjson *ubjson, unsigned int *len) return 0; } +static inline int parse_ubjson_len(struct ubjson *ubjson, unsigned int *len) +{ + return parse_ubjson_sized_len(ubjson, len, NULL); +} + static int parse_ubjson_string(struct ubjson *ubjson, struct json *val) { char byte; byte = 0; if (!parse_char(&ubjson->cur, &byte, 'S')) { + print_around(&ubjson->cur, 10); note_error(&ubjson->errs, "expected S tag, got '%c'", byte); return 0; } - if (!parse_ubjson_size(ubjson, &val->len)) { + if (!parse_ubjson_len(ubjson, &val->len)) { note_error(&ubjson->errs, "size"); return 0; } val->type = JSON_STRING; val->string = (char*)ubjson->cur.p; + + ubjson->cur.p += val->len; + return 1; } @@ -432,43 +565,13 @@ static inline int valid_ubjson_tag(char c) return c == 'S' || c == '{' || c == '[' || + c == 'U' || c == 'Z'; } -static int consume_ubjson_value(struct ubjson *u) -{ - unsigned char c; - unsigned int len; - - c = 0; - - if (!pull_byte(&u->cur, &c)) { - note_error(&u->errs, "oob"); - return 0; - } - if (!valid_ubjson_tag(c)) { - note_error(&u->errs, "invalid value tag '%c'", c); - return 0; - } - - if (c == 'Z') { - len = 0; - } else if (!parse_ubjson_size(u, &len)) { - note_error(&u->errs, "value size for tag '%c'", c); - return 0; - } - - if (u->cur.p + len > u->cur.end) { - note_error(&u->errs, "value size oob"); - return 0; - } - - u->cur.p += len; - return 1; -} - static inline void copy_ubjson(struct ubjson *src, struct ubjson *dst) { copy_cursor(&src->cur, &dst->cur); + copy_errors(&src->errs, &dst->errs); dst->data_end = src->data_end; } @@ -478,6 +581,7 @@ static int parse_ubjson_object(struct ubjson *ubjson, struct json *val) c = 0; val->type = JSON_OBJECT; + assert(ubjson != &val->container); copy_ubjson(ubjson, &val->container); if (!parse_char(&ubjson->cur, &c, '{')) { @@ -485,7 +589,7 @@ static int parse_ubjson_object(struct ubjson *ubjson, struct json *val) return 0; } - if (!parse_ubjson_size(ubjson, &val->len)) { + if (!parse_ubjson_len(ubjson, &val->len)) { /* reset */ ubjson->cur.p = val->container.cur.p; note_error(&ubjson->errs, "object len"); @@ -504,9 +608,45 @@ static int parse_ubjson_object(struct ubjson *ubjson, struct json *val) return 1; } +static int parse_ubjson_number(struct ubjson *ubjson, struct json *val) +{ + if (!parse_ubjson_sized_len(ubjson, &val->number_int, &val->len)) { + note_error(&ubjson->errs, ""); + return 0; + } + + val->type = JSON_NUMBER_INT; + return 1; +} + +static inline int is_number_tag(unsigned char tag) +{ + return tag == 'U' || tag == 'l' || tag == 'I' || tag == 'L' || tag == 'i'; +} + +static int parse_ubjson_bool(struct ubjson *ubjson, struct json *val) +{ + unsigned char byte; + if (!pull_byte(&ubjson->cur, &byte)) { + note_error(&ubjson->errs, "pull byte oob"); + return 0; + } + + if (byte != 'T' && byte != 'F') { + note_error(&ubjson->errs, "invalid bool tag: '%c'", byte); + return 0; + } + + val->type = JSON_BOOL; + val->len = 0; + val->boolean = byte == 'T'; + return 1; +} + int parse_ubjson_value(struct ubjson *ubjson, struct json *val) { char tag; + assert(&ubjson->cur != &val->container.cur); if (!peek_char(&ubjson->cur, &tag)) { note_error(&ubjson->errs, "peek value tag oob"); return 0; @@ -520,6 +660,10 @@ int parse_ubjson_value(struct ubjson *ubjson, struct json *val) return parse_ubjson_string(ubjson, val); } else if (tag == '{') { return parse_ubjson_object(ubjson, val); + } else if (is_number_tag(tag)) { + return parse_ubjson_number(ubjson, val); + } else if (tag == 'F' || tag == 'T') { + return parse_ubjson_bool(ubjson, val); } note_error(&ubjson->errs, "unhandled type '%c'", tag); @@ -530,14 +674,16 @@ static int ubjson_obj_lookup(struct ubjson *ubjson, const char *path, struct jso { char byte; unsigned int len; + struct json blackhole; byte = 0; + assert(&ubjson->cur != &val->container.cur); if (!parse_char(&ubjson->cur, &byte, '{')) { note_error(&ubjson->errs, "no object tag, got '%c'", byte); return 0; } - if (!parse_ubjson_size(ubjson, &len)) { + if (!parse_ubjson_len(ubjson, &len)) { note_error(&ubjson->errs, "object size"); return 0; } @@ -549,6 +695,7 @@ static int ubjson_obj_lookup(struct ubjson *ubjson, const char *path, struct jso } if (byte == '}') { + note_error(&ubjson->errs, "not found") break; } @@ -557,21 +704,21 @@ static int ubjson_obj_lookup(struct ubjson *ubjson, const char *path, struct jso break; } - ubjson->cur.p += val->len; - if (strlen(path) != val->len || memcmp(path, val->string, val->len)) { /* skip over value */ - if (!consume_ubjson_value(ubjson)) { + if (!parse_ubjson_value(ubjson, &blackhole)) { note_error(&ubjson->errs, "skip value"); return 0; } + continue; } return parse_ubjson_value(ubjson, val); } + note_error(&ubjson->errs, "not found"); ubjson->cur.p = ubjson->cur.start; return 0; } @@ -581,26 +728,27 @@ int ubjson_lookup(struct ubjson *ubjson, const char **path, int path_len, struct int i; const char *seg; char byte; - struct ubjson *next; + struct ubjson next; byte = 0; - next = ubjson; + copy_ubjson(ubjson, &next); for (i = 0; i < path_len; i++) { seg = path[i]; - if (!ubjson_obj_lookup(next, seg, val)) { + if (!ubjson_obj_lookup(&next, seg, val)) { + print_around(&ubjson->cur, 10); note_error(&ubjson->errs, "lookup path segment: '%s'", seg); return 0; } /* not at the last segment and don't have an object or array */ if (i != path_len-1 && val->type != JSON_OBJECT) { - note_error(&ubjson->errs, + note_error(&ubjson->errs, "segment '%s' not an object, got '%c'", seg, byte); return 0; } else if (val->type == JSON_OBJECT) { - next = &val->container; + copy_ubjson(&val->container, &next); } } @@ -616,6 +764,15 @@ void print_value(struct json *val) case JSON_NULL: printf("null"); return; + case JSON_NUMBER_INT: + printf("%d", val->number_int); + return; + case JSON_NUMBER: + printf("%f", val->number); + return; + case JSON_BOOL: + printf("%s", val->boolean? "true" : "false"); + return; default: printf("implement print %d", val->type); } diff --git a/src/json.h b/src/json.h @@ -14,6 +14,7 @@ struct ubjson { enum json_value_type { JSON_NUMBER, + JSON_NUMBER_INT, JSON_STRING, JSON_NULL, JSON_BOOL, @@ -27,11 +28,13 @@ struct json { union { struct ubjson container; double number; + unsigned int number_int; const char *string; int boolean; }; }; +void init_ubjson(struct ubjson *ubjson, unsigned char *buf, size_t bufsize); int print_ubjson(struct ubjson *json); int parse_json(unsigned char *buf, size_t buf_size, struct ubjson *out); int ubjson_lookup(struct ubjson *ubjson, const char **path, int path_len, struct json *val); diff --git a/src/test_json.c b/src/test_json.c @@ -4,6 +4,26 @@ #include <assert.h> #include <stdio.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <fcntl.h> +#include <unistd.h> + +int map_file(const char *filename, unsigned char **p, size_t *flen) +{ + struct stat st; + int des; + stat(filename, &st); + *flen = st.st_size; + + des = open(filename, O_RDONLY); + + *p = mmap(NULL, *flen, PROT_READ, MAP_PRIVATE, des, 0); + close(des); + + return *p != MAP_FAILED; +} static void write_data(unsigned char *data, int data_size) { @@ -17,10 +37,12 @@ static void write_data(unsigned char *data, int data_size) int main(int argc, char *argv[]) { static unsigned char out[1024] = {0}; + unsigned char *p; struct ubjson ubjson; struct json val; + size_t flen; - make_cursor(out, out + sizeof(out), &ubjson.cur); + init_ubjson(&ubjson, out, sizeof(out)); //unsigned char bad[] = "{a}"; //assert(!parse_json(bad, sizeof(bad), &ubjson)); @@ -38,8 +60,19 @@ int main(int argc, char *argv[]) printf("found val: "); print_value(&val); - printf("\n"); + printf("\n---\n"); + + ubjson.cur.p = ubjson.cur.start; + assert(map_file("corpus/math.json", &p, &flen)); + assert(ubjson.errs.record); + parse_json(p, flen, &ubjson); + printf("\n---\n"); write_data(ubjson.cur.start, ubjson.data_end - ubjson.cur.start); + const char *path2[] = {"meta", "view", "oid"}; + assert(ubjson_lookup(&ubjson, path2, 3, &val)); + + print_value(&val); + printf("\n"); }