chibipub

experimental activitypub node in C
git clone git://jb55.com/chibipub
Log | Files | Refs | README | LICENSE

ubjson.c (11006B)


      1 
      2 #include "ubjson.h"
      3 #include "json.h"
      4 #include "parse.h"
      5 
      6 #include <assert.h>
      7 
      8 static inline int push_ubjson_null(struct cursor *ubjson)
      9 {
     10 	return push_byte(ubjson, 'Z');
     11 }
     12 
     13 static inline int is_number_tag(unsigned char tag)
     14 {
     15 	return tag == 'U' || tag == 'l' || tag == 'I' || tag == 'L' || tag == 'i';
     16 }
     17 
     18 static int parse_ubjson_sized_len(struct ubjson *ubjson, unsigned int *len, unsigned int *size)
     19 {
     20 	unsigned char byte;
     21 	unsigned short u16;
     22 	unsigned char *start;
     23 
     24 	start = ubjson->cur.p;
     25 
     26 	if (!pull_byte(&ubjson->cur, &byte)) {
     27 		note_error(&ubjson->errs, "oob");
     28 		return 0;
     29 	} else if (byte == 'U' || byte == 'i') {
     30 		if (!pull_byte(&ubjson->cur, &byte)) {
     31 			note_error(&ubjson->errs, "pull byte after u8 int");
     32 		} else {
     33 			if (size)
     34 				*size = 1;
     35 			*len = byte;
     36 			return 1;
     37 		}
     38 	} else if (byte == 'I') {
     39 		if (!pull_data(&ubjson->cur, (unsigned char*)&u16, sizeof(u16))) {
     40 			note_error(&ubjson->errs, "pull byte after u16 int");
     41 		} else {
     42 			if (size)
     43 				*size = 2;
     44 			*len = u16;
     45 			return 1;
     46 		}
     47 	} else if (byte == 'l' || byte == 'L') {
     48 		if (!pull_int(&ubjson->cur, (int*)len)) {
     49 			note_error(&ubjson->errs, "pull byte after u16 int");
     50 		} else {
     51 			if (size)
     52 				*size = 4;
     53 			return 1;
     54 		}
     55 	} else {
     56 		note_error(&ubjson->errs, "unhandled number tag '%c'", byte);
     57 	}
     58 
     59 	ubjson->cur.p = start;
     60 	return 0;
     61 }
     62 
     63 static inline int parse_ubjson_len(struct ubjson *ubjson, unsigned int *len)
     64 {
     65 	return parse_ubjson_sized_len(ubjson, len, NULL);
     66 }
     67 
     68 static int parse_ubjson_string(struct ubjson *ubjson, struct json *val)
     69 {
     70 	char byte;
     71 	byte = 0;
     72 
     73 	if (!parse_char(&ubjson->cur, &byte, 'S')) {
     74 		note_error(&ubjson->errs, "expected S tag, got '%c'", byte);
     75 		return 0;
     76 	}
     77 
     78 	if (!parse_ubjson_len(ubjson, &val->len)) {
     79 		note_error(&ubjson->errs, "size");
     80 		return 0;
     81 	}
     82 
     83 	val->type = JSON_STRING;
     84 	val->string = (char*)ubjson->cur.p;
     85 
     86 	ubjson->cur.p += val->len;
     87 
     88 	return 1;
     89 }
     90 
     91 static inline int push_ubjson_close(struct cursor *ubjson, unsigned int *len, char end)
     92 {
     93 	*len = ubjson->p + 1 - ((unsigned char*)len)-4;
     94 	return push_byte(ubjson, end);
     95 }
     96 
     97 static int push_ubjson_open(struct cursor *ubjson, char typ, unsigned int **len)
     98 {
     99 	if (!push_byte(ubjson, typ))
    100 		return 0;
    101 
    102 	if (!push_byte(ubjson, 'l'))
    103 		return 0;
    104 
    105 	/* fill this in later */
    106 	*len = (unsigned int *)ubjson->p;
    107 
    108 	if (!push_int(ubjson, 0))
    109 		return 0;
    110 
    111 	return 1;
    112 }
    113 
    114 static int push_ubjson_num(struct cursor *ubjson, unsigned int len)
    115 {
    116 	if (len <= 0xFF) {
    117 		if (!push_byte(ubjson, 'U')) {
    118 			return 0;
    119 		}
    120 		if (!push_byte(ubjson, (unsigned char)len)) {
    121 			return 0;
    122 		}
    123 	} else if (len <= 0xFFFF) {
    124 		if (!push_byte(ubjson, 'I')) {
    125 			return 0;
    126 		}
    127 		/* TODO: big-endian push */
    128 		if (!push_u16(ubjson, (unsigned int)len)) {
    129 			return 0;
    130 		}
    131 	} else {
    132 		if (!push_byte(ubjson, 'l')) {
    133 			return 0;
    134 		}
    135 		/* TODO: big-endian push */
    136 		if (!push_int(ubjson, len)) {
    137 			return 0;
    138 		}
    139 	}
    140 	return 1;
    141 }
    142 
    143 static inline int is_escape_char(char c)
    144 {
    145 	return c == '"' || c == '\\' || c == '/' || c == 'b' || c == 'f' ||
    146 		c == 'n' || c == 'r' || c == 't';
    147 }
    148 
    149 static int count_skipped(unsigned char *text, int size)
    150 {
    151 	unsigned char *p;
    152 	int skipped = 0;
    153 
    154 	for (p = text; p < text+size; p++) {
    155 		if (*p == '\\' && (p+1 < text+size) && is_escape_char(*(p+1))) {
    156 			p++;
    157 			skipped++;
    158 		}
    159 	}
    160 
    161 	return skipped;
    162 }
    163 
    164 static char escaped_char(char c)
    165 {
    166 	switch (c) {
    167 		case 'n': return '\n';
    168 		case 't': return '\t';
    169 		case 'r': return '\r';
    170 		case 'b': return '\b';
    171 		case 'f': return '\f';
    172 	}
    173 	return c;
    174 }
    175 
    176 static int push_unescaped(struct cursor *cur, unsigned char *text, int size)
    177 {
    178 	unsigned char *p;
    179 	unsigned char c;
    180 
    181 	for (p = text; p < text+size; p++) {
    182 		if (*p == '\\' && (p+1 < text+size) && is_escape_char(*(p+1))) {
    183 			p++;
    184 			c = (unsigned char)escaped_char((char)*p);
    185 		} else {
    186 			c = *p;
    187 		}
    188 
    189 		if (!push_byte(cur, c)) {
    190 			return 0;
    191 		}
    192 	}
    193 
    194 	return 1;
    195 }
    196 
    197 
    198 static int push_ubjson_str(struct cursor *ubjson, unsigned char *text,
    199 		unsigned int size)
    200 {
    201 	int skipped;
    202 
    203 	if (!push_byte(ubjson, 'S')) {
    204 		return 0;
    205 	}
    206 
    207 	skipped = count_skipped(text, size);
    208 
    209 	if (!push_ubjson_num(ubjson, size-skipped)) {
    210 		return 0;
    211 	}
    212 
    213 	if (!push_unescaped(ubjson, text, size)) {
    214 		return 0;
    215 	}
    216 
    217 	return 1;
    218 }
    219 
    220 static inline int valid_ubjson_tag(char c)
    221 {
    222 	return c == 'S' ||
    223 	       c == '{' ||
    224 	       c == '[' ||
    225 	       c == 'T' ||
    226 	       c == 'F' ||
    227 	       c == 'Z' || is_number_tag(c);
    228 }
    229 
    230 static inline void copy_ubjson(struct ubjson *src, struct ubjson *dst) {
    231 	copy_cursor(&src->cur, &dst->cur);
    232 	copy_errors(&src->errs, &dst->errs);
    233 	dst->data_end = src->data_end;
    234 }
    235 
    236 static int parse_ubjson_container(struct ubjson *ubjson, struct json *val, char token)
    237 {
    238 	char c;
    239 	c = 0;
    240 
    241 	val->type = JSON_OBJECT;
    242 	assert(ubjson != &val->container);
    243 	copy_ubjson(ubjson, &val->container);
    244 
    245 	if (!parse_char(&ubjson->cur, &c, token)) {
    246 		note_error(&ubjson->errs, "expected '{' tag, got '%c'", c);
    247 		return 0;
    248 	}
    249 
    250 	if (!parse_ubjson_len(ubjson, &val->len)) {
    251 		/* reset */
    252 		ubjson->cur.p = val->container.cur.p;
    253 		note_error(&ubjson->errs, "object len");
    254 		return 0;
    255 	}
    256 
    257 	if (ubjson->cur.p + val->len >= ubjson->data_end) {
    258 		/* reset */
    259 		ubjson->cur.p = val->container.cur.p;
    260 		note_error(&ubjson->errs, "invalid len %d", val->len);
    261 		return 0;
    262 	}
    263 
    264 	ubjson->cur.p += val->len;
    265 
    266 	return 1;
    267 }
    268 
    269 static inline int parse_ubjson_object(struct ubjson *ubjson, struct json *val)
    270 {
    271 	return parse_ubjson_container(ubjson, val, '{');
    272 }
    273 
    274 static inline int parse_ubjson_array(struct ubjson *ubjson, struct json *val)
    275 {
    276 	return parse_ubjson_container(ubjson, val, '[');
    277 }
    278 
    279 static int parse_ubjson_number(struct ubjson *ubjson, struct json *val)
    280 {
    281 	if (!parse_ubjson_sized_len(ubjson, &val->number_int, &val->len)) {
    282 		note_error(&ubjson->errs, "");
    283 		return 0;
    284 	}
    285 
    286 	val->type = JSON_NUMBER_INT;
    287 	return 1;
    288 }
    289 
    290 static int parse_ubjson_bool(struct ubjson *ubjson, struct json *val)
    291 {
    292 	unsigned char byte;
    293 	if (!pull_byte(&ubjson->cur, &byte)) {
    294 		note_error(&ubjson->errs, "pull byte oob");
    295 		return 0;
    296 	}
    297 
    298 	if (byte != 'T' && byte != 'F') {
    299 		note_error(&ubjson->errs, "invalid bool tag: '%c'", byte);
    300 		return 0;
    301 	}
    302 
    303 	val->type = JSON_BOOL;
    304 	val->len = 0;
    305 	val->boolean = byte == 'T';
    306 	return 1;
    307 }
    308 
    309 int parse_ubjson_value(struct ubjson *ubjson, struct json *val)
    310 {
    311 	char tag;
    312 	assert(&ubjson->cur != &val->container.cur);
    313 	if (!peek_char(&ubjson->cur, &tag)) {
    314 		note_error(&ubjson->errs, "peek value tag oob");
    315 		return 0;
    316 	}
    317 
    318 	if (tag == 'Z') {
    319 		val->type = JSON_NULL;
    320 		ubjson->cur.p++;
    321 		return 1;
    322 	} else if (tag == 'S') {
    323 		return parse_ubjson_string(ubjson, val);
    324 	} else if (tag == '{') {
    325 		return parse_ubjson_object(ubjson, val);
    326 	} else if (tag == '[') {
    327 		return parse_ubjson_array(ubjson, val);
    328 	} else if (is_number_tag(tag)) {
    329 		return parse_ubjson_number(ubjson, val);
    330 	} else if (tag == 'F' || tag == 'T') {
    331 		return parse_ubjson_bool(ubjson, val);
    332 	}
    333 
    334 	note_error(&ubjson->errs, "unhandled type '%c'", tag);
    335 	return 0;
    336 }
    337 
    338 static int ubjson_obj_lookup(struct ubjson *ubjson, const char *path, struct json *val)
    339 {
    340 	char byte;
    341 	unsigned int len;
    342 	struct json blackhole;
    343 	byte = 0;
    344 	assert(&ubjson->cur != &val->container.cur);
    345 
    346 	if (!parse_char(&ubjson->cur, &byte, '{')) {
    347 		note_error(&ubjson->errs, "no object tag, got '%c'", byte);
    348 		return 0;
    349 	}
    350 
    351 	if (!parse_ubjson_len(ubjson, &len)) {
    352 		note_error(&ubjson->errs, "object size");
    353 		return 0;
    354 	}
    355 
    356 	while (1) {
    357 		if (!peek_char(&ubjson->cur, &byte)) {
    358 			note_error(&ubjson->errs, "oob");
    359 			break;
    360 		}
    361 
    362 		if (byte == '}') {
    363 			note_error(&ubjson->errs, "not found")
    364 			break;
    365 		}
    366 
    367 		if (!parse_ubjson_string(ubjson, val)) {
    368 			note_error(&ubjson->errs, "string");
    369 			break;
    370 		}
    371 
    372 		if (strlen(path) != val->len ||
    373 		    memcmp(path, val->string, val->len)) {
    374 			/* skip over value */
    375 			if (!parse_ubjson_value(ubjson, &blackhole)) {
    376 				note_error(&ubjson->errs, "skip value");
    377 				return 0;
    378 			}
    379 
    380 			continue;
    381 		}
    382 
    383 		return parse_ubjson_value(ubjson, val);
    384 	}
    385 
    386 	note_error(&ubjson->errs, "not found");
    387 	ubjson->cur.p = ubjson->cur.start;
    388 	return 0;
    389 }
    390 
    391 int ubjson_lookup(struct ubjson *ubjson, const char **path, int path_len, struct json *val)
    392 {
    393 	int i;
    394 	const char *seg;
    395 	char byte;
    396 	struct ubjson next;
    397 
    398 	byte = 0;
    399 	copy_ubjson(ubjson, &next);
    400 
    401 	for (i = 0; i < path_len; i++) {
    402 		seg = path[i];
    403 
    404 		if (!ubjson_obj_lookup(&next, seg, val)) {
    405 			note_error(&ubjson->errs, "lookup path segment: '%s'", seg);
    406 			return 0;
    407 		}
    408 
    409 		/* not at the last segment and don't have an object or array */
    410 		if (i != path_len-1 && val->type != JSON_OBJECT) {
    411 			note_error(&ubjson->errs,
    412 			    "segment '%s' not an object, got '%c'", seg, byte);
    413 			return 0;
    414 		} else if (val->type == JSON_OBJECT) {
    415 			copy_ubjson(&val->container, &next);
    416 		}
    417 	}
    418 
    419 	return 1;
    420 }
    421 
    422 
    423 static inline int handle_ubjson_str(struct json_handlers *h, struct json_strtok *str)
    424 {
    425 	return push_ubjson_str((struct cursor*)h->data, str->text, str->size);
    426 }
    427 
    428 static inline int handle_ubjson_bool(struct json_handlers *h, int val)
    429 {
    430 	return push_byte((struct cursor*)h->data, val ? 'T' : 'F');
    431 }
    432 
    433 static inline int handle_ubjson_null(struct json_handlers *h)
    434 {
    435 	return push_ubjson_null((struct cursor*)h->data);
    436 }
    437 
    438 static inline int handle_ubjson_number(struct json_handlers *h, unsigned int num)
    439 {
    440 	return push_ubjson_num((struct cursor*)h->data, num);
    441 }
    442 
    443 static inline int handle_ubjson_open(struct json_handlers *h, char type, unsigned int **len)
    444 {
    445 	return push_ubjson_open((struct cursor*)h->data, type, len);
    446 }
    447 
    448 static inline int handle_ubjson_close(struct json_handlers *h, char type, unsigned int *len)
    449 {
    450 	return push_ubjson_close((struct cursor*)h->data, len, type);
    451 }
    452 
    453 static int handle_ubjson_token(struct json_handlers *h, char token, unsigned int **len)
    454 {
    455 	if (token == '{' || token == '[')
    456 		return handle_ubjson_open(h, token, len);
    457 	else if (token == '}' || token == ']')
    458 		return handle_ubjson_close(h, token, *len);
    459 	else
    460 		return 1;
    461 }
    462 
    463 void make_ubjson_handlers(struct json_handlers *h, struct cursor *ubjson)
    464 {
    465 	h->data = (void*)ubjson;
    466 
    467 	h->string = handle_ubjson_str;
    468 	h->boolean = handle_ubjson_bool;
    469 	h->null = handle_ubjson_null;
    470 	h->number = handle_ubjson_number;
    471 	h->token = handle_ubjson_token;
    472 }
    473 
    474 int parse_ubjson(unsigned char *buf, size_t buf_size, struct ubjson *out)
    475 {
    476 	struct json_parser p;
    477 	struct json_handlers h;
    478 	int ok;
    479 
    480 	make_ubjson_handlers(&h, &out->cur);
    481 
    482 	init_json_parser(&p, buf, buf_size, &h);
    483 	ok = parse_json(&p);
    484 
    485 	out->data_end = out->cur.p;
    486 	out->cur.p = out->cur.start;
    487 
    488 	return ok;
    489 }
    490 
    491 void init_ubjson(struct ubjson *ubjson, unsigned char *buf, size_t bufsize)
    492 {
    493 	make_cursor(buf, buf + bufsize, &ubjson->cur);
    494 	init_errors(&ubjson->errs);
    495 }
    496 
    497 void print_value(struct json *val)
    498 {
    499 	switch (val->type) {
    500 	case JSON_STRING:
    501 		printf("%.*s", val->len, val->string);
    502 		return;
    503 	case JSON_NULL:
    504 		printf("null");
    505 		return;
    506 	case JSON_NUMBER_INT:
    507 		printf("%d", val->number_int);
    508 		return;
    509 	case JSON_NUMBER:
    510 		printf("%f", val->number);
    511 		return;
    512 	case JSON_BOOL:
    513 		printf("%s", val->boolean? "true" : "false");
    514 		return;
    515 	default:
    516 		printf("implement print %d", val->type);
    517 	}
    518 }