commit c05b6ec8c3b521e2caaf855dd8fdfbab2ac9de48
parent 88b7b87269ddeb58c0fe2dfe1e546c30a330f69d
Author: William Casarin <jb55@jb55.com>
Date: Wed, 9 Dec 2020 06:37:14 -0800
numbers
Diffstat:
6 files changed, 257 insertions(+), 54 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -4,3 +4,4 @@
/tags
/test_out.ubjson
src/test_json
+/corpus/math.json
diff --git a/Makefile b/Makefile
@@ -7,10 +7,13 @@ HEADERS = $(wildcard src/*.h)
all: wolfsocks
+corpus/math.json:
+ curl -sL 'https://data.cityofnewyork.us/api/views/x4ai-kstz/rows.json?accessType=DOWNLOAD' > $@
+
wolfsocks: src/wolfsocks.c $(OBJS) $(HEADERS)
$(CC) $(CFLAGS) $< $(OBJS) $(LDFLAGS) -o $@
-src/test_json: src/test_json.c $(OBJS) $(HEADERS)
+src/test_json: src/test_json.c $(OBJS) $(HEADERS) corpus/math.json
$(CC) $(CFLAGS) $< $(OBJS) $(LDFLAGS) -o $@
check: src/test_json
diff --git a/src/errors.h b/src/errors.h
@@ -10,6 +10,11 @@ struct errors {
int record;
};
+static inline void copy_errors(struct errors *src, struct errors *dst)
+{
+ dst->record = src->record;
+}
+
static inline void init_errors(struct errors *errs)
{
errs->record = 1;
@@ -17,8 +22,9 @@ static inline void init_errors(struct errors *errs)
static inline void note_error_(struct errors *errs, const char *fmt, ...)
{
- if (!errs->record)
+ if (!errs->record) {
return;
+ }
va_list ap;
va_start(ap, fmt);
diff --git a/src/json.c b/src/json.c
@@ -3,6 +3,7 @@
#include "json.h"
#include "parse.h"
+#include <assert.h>
#include <ctype.h>
struct json_parser {
@@ -11,6 +12,40 @@ struct json_parser {
struct errors errs;
};
+void init_ubjson(struct ubjson *ubjson, unsigned char *buf, size_t bufsize)
+{
+ make_cursor(buf, buf + bufsize, &ubjson->cur);
+ init_errors(&ubjson->errs);
+}
+
+#define max(a,b) ((a) > (b) ? (a) : (b))
+static void print_around(struct cursor *cur, int range)
+{
+ unsigned char *c;
+
+ c = max(cur->p - range, cur->start);
+ for (; c < cur->end && c < (cur->p + range); c++) {
+ if (*c < 32)
+ printf("%02x", *c);
+ else
+ printf("%c", *c);
+ }
+ printf("\n");
+
+ c = max(cur->p - range, cur->start);
+ for (; c < cur->end && c < (cur->p + range); c++) {
+ if (c == cur->p) {
+ printf("^");
+ continue;
+ }
+ if (*c < 32)
+ printf(" ");
+ else
+ printf(" ");
+ }
+ printf("\n");
+}
+
static void consume_whitespace(struct cursor *cur)
{
for (; cur->p < cur->end; cur->p++) {
@@ -86,7 +121,7 @@ static int parse_utf8_char(struct cursor *cur, unsigned int *chr)
return 0;
}
-static int push_ubjson_len(struct cursor *ubjson, unsigned int len)
+static int push_ubjson_num(struct cursor *ubjson, unsigned int len)
{
if (len <= 0xFF) {
if (!push_byte(ubjson, 'U')) {
@@ -123,7 +158,7 @@ static int push_ubjson_str(struct cursor *ubjson, unsigned char *text,
return 0;
}
- if (!push_ubjson_len(ubjson, size)) {
+ if (!push_ubjson_num(ubjson, size)) {
return 0;
}
@@ -178,13 +213,96 @@ static int parse_string(struct json_parser *p)
return 0;
}
-static int parse_number(struct json_parser *p)
+static int parse_digits(struct json_parser *p, unsigned int *out)
{
- (void)p;
- note_error(&p->errs, "not implemented");
+ char c;
+
+ if (!peek_char(&p->cur, &c)) {
+ note_error(&p->errs, "oob");
+ return 0;
+ }
+
+ if (c < '1' || c > '9') {
+ note_error(&p->errs, "expected 1-9, got '%c'", c);
+ return 0;
+ }
+
+ for (*out = 0; p->cur.p < p->cur.end; p->cur.p++) {
+ c = *p->cur.p;
+
+ if (c < '0' || c > '9') {
+ return 1;
+ }
+
+ *out = 10*(*out) + (c-'0');
+ }
+
+ note_error(&p->errs, "oob");
return 0;
}
+static int parse_fraction(struct json_parser *p)
+{
+ note_error(&p->errs, "floating point numbers not supported");
+ return 0;
+ /*
+ if (!peek_char(&p->cur, &c)) {
+ note_error(&p->errs, "oob");
+ return 0;
+ }
+
+ if (c !== '.') {
+ p->cur.p++;
+ } else (parse_digits(p)) {
+ }
+ */
+}
+
+static int parse_number(struct json_parser *p)
+{
+ char c;
+ int sign = 1;
+ unsigned int digits;
+
+ if (!peek_char(&p->cur, &c)) {
+ note_error(&p->errs, "oob");
+ return 0;
+ }
+
+ if (c == '-') {
+ sign = -1;
+ p->cur.p++;
+ if (!peek_char(&p->cur, &c)) {
+ note_error(&p->errs, "oob");
+ return 0;
+ }
+ }
+
+ if (c == '0') {
+ p->cur.p++;
+ if (!peek_char(&p->cur, &c)) {
+ note_error(&p->errs, "oob");
+ return 0;
+ }
+ if (c != '.' && (c < '0' || c > '9')) {
+ /* just a 0 */
+ push_ubjson_num(&p->ubjson, 0);
+ return 1;
+ }
+ return parse_fraction(p);
+ } else if (c >= '1' && c <= '9') {
+ if (!parse_digits(p, &digits)) {
+ note_error(&p->errs, "digits");
+ return 0;
+ }
+
+ return push_ubjson_num(&p->ubjson, digits * sign);
+ } else {
+ note_error(&p->errs, "expected '-', '0' or '1-9', got '%c'", c)
+ return 0;
+ }
+}
+
static int parse_value(struct json_parser *p);
static int parse_kv(struct json_parser *p)
@@ -311,7 +429,6 @@ static inline int push_ubjson_null(struct cursor *ubjson)
static int parse_null(struct json_parser *p)
{
if (!parse_str(&p->cur, "null")) {
- fprintf(stderr, "got %.*s instead of null\n", 4, p->cur.p);
note_error(&p->errs, "not null");
return 0;
}
@@ -369,7 +486,7 @@ static int parse_array_or_object(struct json_parser *p)
return 0;
}
-static int parse_ubjson_size(struct ubjson *ubjson, unsigned int *len)
+static int parse_ubjson_sized_len(struct ubjson *ubjson, unsigned int *len, unsigned int *size)
{
unsigned char byte;
unsigned short u16;
@@ -379,10 +496,13 @@ static int parse_ubjson_size(struct ubjson *ubjson, unsigned int *len)
if (!pull_byte(&ubjson->cur, &byte)) {
note_error(&ubjson->errs, "oob");
+ return 0;
} else if (byte == 'U' || byte == 'i') {
if (!pull_byte(&ubjson->cur, &byte)) {
note_error(&ubjson->errs, "pull byte after u8 int");
} else {
+ if (size)
+ *size = 1;
*len = byte;
return 1;
}
@@ -390,6 +510,8 @@ static int parse_ubjson_size(struct ubjson *ubjson, unsigned int *len)
if (!pull_data(&ubjson->cur, (unsigned char*)&u16, sizeof(u16))) {
note_error(&ubjson->errs, "pull byte after u16 int");
} else {
+ if (size)
+ *size = 2;
*len = u16;
return 1;
}
@@ -397,6 +519,8 @@ static int parse_ubjson_size(struct ubjson *ubjson, unsigned int *len)
if (!pull_int(&ubjson->cur, (int*)len)) {
note_error(&ubjson->errs, "pull byte after u16 int");
} else {
+ if (size)
+ *size = 4;
return 1;
}
} else {
@@ -407,23 +531,32 @@ static int parse_ubjson_size(struct ubjson *ubjson, unsigned int *len)
return 0;
}
+static inline int parse_ubjson_len(struct ubjson *ubjson, unsigned int *len)
+{
+ return parse_ubjson_sized_len(ubjson, len, NULL);
+}
+
static int parse_ubjson_string(struct ubjson *ubjson, struct json *val)
{
char byte;
byte = 0;
if (!parse_char(&ubjson->cur, &byte, 'S')) {
+ print_around(&ubjson->cur, 10);
note_error(&ubjson->errs, "expected S tag, got '%c'", byte);
return 0;
}
- if (!parse_ubjson_size(ubjson, &val->len)) {
+ if (!parse_ubjson_len(ubjson, &val->len)) {
note_error(&ubjson->errs, "size");
return 0;
}
val->type = JSON_STRING;
val->string = (char*)ubjson->cur.p;
+
+ ubjson->cur.p += val->len;
+
return 1;
}
@@ -432,43 +565,13 @@ static inline int valid_ubjson_tag(char c)
return c == 'S' ||
c == '{' ||
c == '[' ||
+ c == 'U' ||
c == 'Z';
}
-static int consume_ubjson_value(struct ubjson *u)
-{
- unsigned char c;
- unsigned int len;
-
- c = 0;
-
- if (!pull_byte(&u->cur, &c)) {
- note_error(&u->errs, "oob");
- return 0;
- }
- if (!valid_ubjson_tag(c)) {
- note_error(&u->errs, "invalid value tag '%c'", c);
- return 0;
- }
-
- if (c == 'Z') {
- len = 0;
- } else if (!parse_ubjson_size(u, &len)) {
- note_error(&u->errs, "value size for tag '%c'", c);
- return 0;
- }
-
- if (u->cur.p + len > u->cur.end) {
- note_error(&u->errs, "value size oob");
- return 0;
- }
-
- u->cur.p += len;
- return 1;
-}
-
static inline void copy_ubjson(struct ubjson *src, struct ubjson *dst) {
copy_cursor(&src->cur, &dst->cur);
+ copy_errors(&src->errs, &dst->errs);
dst->data_end = src->data_end;
}
@@ -478,6 +581,7 @@ static int parse_ubjson_object(struct ubjson *ubjson, struct json *val)
c = 0;
val->type = JSON_OBJECT;
+ assert(ubjson != &val->container);
copy_ubjson(ubjson, &val->container);
if (!parse_char(&ubjson->cur, &c, '{')) {
@@ -485,7 +589,7 @@ static int parse_ubjson_object(struct ubjson *ubjson, struct json *val)
return 0;
}
- if (!parse_ubjson_size(ubjson, &val->len)) {
+ if (!parse_ubjson_len(ubjson, &val->len)) {
/* reset */
ubjson->cur.p = val->container.cur.p;
note_error(&ubjson->errs, "object len");
@@ -504,9 +608,45 @@ static int parse_ubjson_object(struct ubjson *ubjson, struct json *val)
return 1;
}
+static int parse_ubjson_number(struct ubjson *ubjson, struct json *val)
+{
+ if (!parse_ubjson_sized_len(ubjson, &val->number_int, &val->len)) {
+ note_error(&ubjson->errs, "");
+ return 0;
+ }
+
+ val->type = JSON_NUMBER_INT;
+ return 1;
+}
+
+static inline int is_number_tag(unsigned char tag)
+{
+ return tag == 'U' || tag == 'l' || tag == 'I' || tag == 'L' || tag == 'i';
+}
+
+static int parse_ubjson_bool(struct ubjson *ubjson, struct json *val)
+{
+ unsigned char byte;
+ if (!pull_byte(&ubjson->cur, &byte)) {
+ note_error(&ubjson->errs, "pull byte oob");
+ return 0;
+ }
+
+ if (byte != 'T' && byte != 'F') {
+ note_error(&ubjson->errs, "invalid bool tag: '%c'", byte);
+ return 0;
+ }
+
+ val->type = JSON_BOOL;
+ val->len = 0;
+ val->boolean = byte == 'T';
+ return 1;
+}
+
int parse_ubjson_value(struct ubjson *ubjson, struct json *val)
{
char tag;
+ assert(&ubjson->cur != &val->container.cur);
if (!peek_char(&ubjson->cur, &tag)) {
note_error(&ubjson->errs, "peek value tag oob");
return 0;
@@ -520,6 +660,10 @@ int parse_ubjson_value(struct ubjson *ubjson, struct json *val)
return parse_ubjson_string(ubjson, val);
} else if (tag == '{') {
return parse_ubjson_object(ubjson, val);
+ } else if (is_number_tag(tag)) {
+ return parse_ubjson_number(ubjson, val);
+ } else if (tag == 'F' || tag == 'T') {
+ return parse_ubjson_bool(ubjson, val);
}
note_error(&ubjson->errs, "unhandled type '%c'", tag);
@@ -530,14 +674,16 @@ static int ubjson_obj_lookup(struct ubjson *ubjson, const char *path, struct jso
{
char byte;
unsigned int len;
+ struct json blackhole;
byte = 0;
+ assert(&ubjson->cur != &val->container.cur);
if (!parse_char(&ubjson->cur, &byte, '{')) {
note_error(&ubjson->errs, "no object tag, got '%c'", byte);
return 0;
}
- if (!parse_ubjson_size(ubjson, &len)) {
+ if (!parse_ubjson_len(ubjson, &len)) {
note_error(&ubjson->errs, "object size");
return 0;
}
@@ -549,6 +695,7 @@ static int ubjson_obj_lookup(struct ubjson *ubjson, const char *path, struct jso
}
if (byte == '}') {
+ note_error(&ubjson->errs, "not found")
break;
}
@@ -557,21 +704,21 @@ static int ubjson_obj_lookup(struct ubjson *ubjson, const char *path, struct jso
break;
}
- ubjson->cur.p += val->len;
-
if (strlen(path) != val->len ||
memcmp(path, val->string, val->len)) {
/* skip over value */
- if (!consume_ubjson_value(ubjson)) {
+ if (!parse_ubjson_value(ubjson, &blackhole)) {
note_error(&ubjson->errs, "skip value");
return 0;
}
+
continue;
}
return parse_ubjson_value(ubjson, val);
}
+ note_error(&ubjson->errs, "not found");
ubjson->cur.p = ubjson->cur.start;
return 0;
}
@@ -581,26 +728,27 @@ int ubjson_lookup(struct ubjson *ubjson, const char **path, int path_len, struct
int i;
const char *seg;
char byte;
- struct ubjson *next;
+ struct ubjson next;
byte = 0;
- next = ubjson;
+ copy_ubjson(ubjson, &next);
for (i = 0; i < path_len; i++) {
seg = path[i];
- if (!ubjson_obj_lookup(next, seg, val)) {
+ if (!ubjson_obj_lookup(&next, seg, val)) {
+ print_around(&ubjson->cur, 10);
note_error(&ubjson->errs, "lookup path segment: '%s'", seg);
return 0;
}
/* not at the last segment and don't have an object or array */
if (i != path_len-1 && val->type != JSON_OBJECT) {
- note_error(&ubjson->errs,
+ note_error(&ubjson->errs,
"segment '%s' not an object, got '%c'", seg, byte);
return 0;
} else if (val->type == JSON_OBJECT) {
- next = &val->container;
+ copy_ubjson(&val->container, &next);
}
}
@@ -616,6 +764,15 @@ void print_value(struct json *val)
case JSON_NULL:
printf("null");
return;
+ case JSON_NUMBER_INT:
+ printf("%d", val->number_int);
+ return;
+ case JSON_NUMBER:
+ printf("%f", val->number);
+ return;
+ case JSON_BOOL:
+ printf("%s", val->boolean? "true" : "false");
+ return;
default:
printf("implement print %d", val->type);
}
diff --git a/src/json.h b/src/json.h
@@ -14,6 +14,7 @@ struct ubjson {
enum json_value_type {
JSON_NUMBER,
+ JSON_NUMBER_INT,
JSON_STRING,
JSON_NULL,
JSON_BOOL,
@@ -27,11 +28,13 @@ struct json {
union {
struct ubjson container;
double number;
+ unsigned int number_int;
const char *string;
int boolean;
};
};
+void init_ubjson(struct ubjson *ubjson, unsigned char *buf, size_t bufsize);
int print_ubjson(struct ubjson *json);
int parse_json(unsigned char *buf, size_t buf_size, struct ubjson *out);
int ubjson_lookup(struct ubjson *ubjson, const char **path, int path_len, struct json *val);
diff --git a/src/test_json.c b/src/test_json.c
@@ -4,6 +4,26 @@
#include <assert.h>
#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+int map_file(const char *filename, unsigned char **p, size_t *flen)
+{
+ struct stat st;
+ int des;
+ stat(filename, &st);
+ *flen = st.st_size;
+
+ des = open(filename, O_RDONLY);
+
+ *p = mmap(NULL, *flen, PROT_READ, MAP_PRIVATE, des, 0);
+ close(des);
+
+ return *p != MAP_FAILED;
+}
static void write_data(unsigned char *data, int data_size)
{
@@ -17,10 +37,12 @@ static void write_data(unsigned char *data, int data_size)
int main(int argc, char *argv[])
{
static unsigned char out[1024] = {0};
+ unsigned char *p;
struct ubjson ubjson;
struct json val;
+ size_t flen;
- make_cursor(out, out + sizeof(out), &ubjson.cur);
+ init_ubjson(&ubjson, out, sizeof(out));
//unsigned char bad[] = "{a}";
//assert(!parse_json(bad, sizeof(bad), &ubjson));
@@ -38,8 +60,19 @@ int main(int argc, char *argv[])
printf("found val: ");
print_value(&val);
- printf("\n");
+ printf("\n---\n");
+
+ ubjson.cur.p = ubjson.cur.start;
+ assert(map_file("corpus/math.json", &p, &flen));
+ assert(ubjson.errs.record);
+ parse_json(p, flen, &ubjson);
+ printf("\n---\n");
write_data(ubjson.cur.start, ubjson.data_end - ubjson.cur.start);
+ const char *path2[] = {"meta", "view", "oid"};
+ assert(ubjson_lookup(&ubjson, path2, 3, &val));
+
+ print_value(&val);
+ printf("\n");
}