chibipub

experimental activitypub node in C
git clone git://jb55.com/chibipub
Log | Files | Refs | README | LICENSE

json.c (11130B)


      1 
      2 #include "cursor.h"
      3 #include "json.h"
      4 #include "parse.h"
      5 #include "util.h"
      6 
      7 #include <assert.h>
      8 #include <ctype.h>
      9 
     10 void init_json_strtok(struct json_strtok *s)
     11 {
     12 	memset(s, 0, sizeof(*s));
     13 }
     14 
     15 void init_json_pusher_with(struct json_pusher *p, struct cursor *out)
     16 {
     17 	memset(p, 0, sizeof(*p));
     18 	init_errors(&p->errs);
     19 	copy_cursor(out, &p->cur);
     20 }
     21 
     22 void init_json_pusher(struct json_pusher *p, unsigned char *buf, size_t size)
     23 {
     24 	memset(p, 0, sizeof(*p));
     25 	init_errors(&p->errs);
     26 	make_cursor(buf, buf+size, &p->cur);
     27 }
     28 
     29 void init_json_handlers(struct json_handlers *h)
     30 {
     31 	memset(h, 0, sizeof(*h));
     32 }
     33 
     34 static void consume_whitespace(struct cursor *cur)
     35 {
     36 	for (; cur->p < cur->end; cur->p++) {
     37 		if (!isspace(*cur->p))
     38 			return;
     39 	}
     40 }
     41 
     42 static int pull_utf8_cont(struct cursor *cur, unsigned char *c)
     43 {
     44 	if (!pull_byte(cur, c)) {
     45 		return 0;
     46 	}
     47 
     48 	if ((*c & 0xC0) != 0x80) {
     49 		return 0;
     50 	}
     51 
     52 	*c = *c & 0x3F;
     53 
     54 	return 1;
     55 }
     56 
     57 static int parse_utf8_char(struct cursor *cur, unsigned int *chr)
     58 {
     59 	unsigned char c[4];
     60 
     61 	if (!pull_byte(cur, &c[0])) {
     62 		return 0;
     63 	}
     64 
     65 	if ((c[0] & 0x80) == 0) {
     66 		*chr = c[0];
     67 		return 1;
     68 	}
     69 
     70 	if ((c[0] & 0xE0) == 0xC0) {
     71 		if (!pull_utf8_cont(cur, &c[1])) {
     72 			return 0;
     73 		}
     74 		if (c[1] >= 0) {
     75 			*chr = ((c[0] & 0x1F) << 6) | c[1];
     76 			if (*chr >= 128) {
     77 				return 1;
     78 			}
     79 		}
     80 	} else if ((c[0] & 0xF0) == 0xE0) {
     81 		if (!(pull_utf8_cont(cur, &c[1]) &&
     82                       pull_utf8_cont(cur, &c[2]))) {
     83 			return 0;
     84 		}
     85 		if ((c[1] | c[2]) >= 0) {
     86 			*chr = ((c[0] & 0x0F) << 12) | (c[1] << 6) | c[2];
     87 			if (*chr >= 2048 && (*chr < 55296 || *chr > 57343)) {
     88 				return 1;
     89 			}
     90 		}
     91 	} else if ((c[0] & 0xF8) == 0xF0) {
     92 		if (!(pull_utf8_cont(cur, &c[1]) &&
     93                       pull_utf8_cont(cur, &c[2]) &&
     94 		      pull_utf8_cont(cur, &c[3]))) {
     95 			return 0;
     96 		}
     97 		if ((c[1] | c[2] | c[3]) >= 0) {
     98 			*chr = ((c[0] & 0x08) << 18) | (c[1] << 12)
     99 			                             | (c[2] << 6) | c[3];
    100 			if (*chr >= 65536 && *chr <= 1114111) {
    101 				return 1;
    102 			}
    103 		}
    104 	}
    105 
    106 	return 0;
    107 }
    108 
    109 static inline int parse_escape(struct json_parser *p)
    110 {
    111 	p->cur.p++;
    112 	return 1;
    113 }
    114 
    115 static int parse_string_with(struct json_parser *p, int is_key)
    116 {
    117 	char c;
    118 	unsigned int chr;
    119 	unsigned char *start;
    120 	struct json_strtok str;
    121 	c = 0;
    122 	chr = 0;
    123 
    124 	init_json_strtok(&str);
    125 
    126 	start = p->cur.p;
    127 
    128 	if (!parse_char(&p->cur, &c, '"')) {
    129 		note_error(&p->errs, "expected '\"', got '%c'", c);
    130 		return 0;
    131 	}
    132 
    133 	while (1) {
    134 		if (!parse_utf8_char(&p->cur, &chr)) {
    135 			note_error(&p->errs, "invalid utf-8 codepoint");
    136 			break;
    137 		}
    138 
    139 		if (chr == '"') {
    140 			str.text = start+1;
    141 			str.size = p->cur.p - start - 2;
    142 			str.is_key = is_key;
    143 
    144 			return (*p->handlers.string)(&p->handlers, &str);
    145 		}
    146 
    147 		if (chr == '\\' && !parse_escape(p)) {
    148 			note_error(&p->errs, "invalid escape char '%c'", c);
    149 			break;
    150 		}
    151 	}
    152 
    153 	p->cur.p = start;
    154 	return 0;
    155 }
    156 
    157 static int inline parse_string(struct json_parser *p)
    158 {
    159 	return parse_string_with(p, 0);
    160 }
    161 
    162 static int inline parse_key(struct json_parser *p)
    163 {
    164 	return parse_string_with(p, 1);
    165 }
    166 
    167 static int parse_digits(struct json_parser *p, unsigned int *out)
    168 {
    169 	char c;
    170 
    171 	if (!peek_char(&p->cur, &c)) {
    172 		note_error(&p->errs, "oob");
    173 		return 0;
    174 	}
    175 
    176 	if (c < '1' || c > '9') {
    177 		note_error(&p->errs, "expected 1-9, got '%c'", c);
    178 		return 0;
    179 	}
    180 
    181 	for (*out = 0; p->cur.p < p->cur.end; p->cur.p++) {
    182 		c = *p->cur.p;
    183 
    184 		if (c < '0' || c > '9') {
    185 			return 1;
    186 		}
    187 
    188 		*out = 10*(*out) + (c-'0');
    189 	}
    190 
    191 	note_error(&p->errs, "oob");
    192 	return 0;
    193 }
    194 
    195 static int parse_fraction(struct json_parser *p)
    196 {
    197 	note_error(&p->errs, "floating point numbers not supported");
    198 	return 0;
    199 	/*
    200 	if (!peek_char(&p->cur, &c)) {
    201 		note_error(&p->errs, "oob");
    202 		return 0;
    203 	}
    204 
    205 	if (c !== '.') {
    206 		p->cur.p++;
    207 	} else (parse_digits(p)) {
    208 	}
    209 	*/
    210 }
    211 
    212 static int parse_number(struct json_parser *p)
    213 {
    214 	char c;
    215 	int sign = 1;
    216 	unsigned int digits;
    217 
    218 	if (!peek_char(&p->cur, &c)) {
    219 		note_error(&p->errs, "oob");
    220 		return 0;
    221 	}
    222 
    223 	if (c == '-') {
    224 		sign = -1;
    225 		p->cur.p++;
    226 		if (!peek_char(&p->cur, &c)) {
    227 			note_error(&p->errs, "oob");
    228 			return 0;
    229 		}
    230 	}
    231 
    232 	if (c == '0') {
    233 		p->cur.p++;
    234 		if (!peek_char(&p->cur, &c)) {
    235 			note_error(&p->errs, "oob");
    236 			return 0;
    237 		}
    238 		if (c != '.' && (c < '0' || c > '9')) {
    239 			/* just a 0 */
    240 			return (*p->handlers.number)(&p->handlers, 0);
    241 		}
    242 		return parse_fraction(p);
    243 	} else if (c >= '1' && c <= '9') {
    244 		if (!parse_digits(p, &digits)) {
    245 			note_error(&p->errs, "digits");
    246 			return 0;
    247 		}
    248 
    249 		/* @generalize */
    250 		return (*p->handlers.number)(&p->handlers, digits * sign);
    251 	} else {
    252 		note_error(&p->errs, "expected '-', '0' or '1-9', got '%c'", c)
    253 		return 0;
    254 	}
    255 
    256 	note_error(&p->errs, "impossibru");
    257 	return 0;
    258 }
    259 
    260 static int parse_value(struct json_parser *p);
    261 
    262 static int parse_kv(struct json_parser *p)
    263 {
    264 	char c;
    265 	unsigned int *len;
    266 	c = 0;
    267 
    268 	if (!parse_key(p)) {
    269 		note_error(&p->errs, "key");
    270 		return 0;
    271 	}
    272 
    273 	consume_whitespace(&p->cur);
    274 
    275 	if (!parse_char(&p->cur, &c, ':')) {
    276 		note_error(&p->errs, "expected ':', got %c", c);
    277 		return 0;
    278 	}
    279 
    280 	if (!(*p->handlers.token)(&p->handlers, ':', &len)) {
    281 		note_error(&p->errs, "':' handler");
    282 		return 0;
    283 	}
    284 
    285 	return parse_value(p);
    286 }
    287 
    288 
    289 static int parse_object(struct json_parser *p)
    290 {
    291 	char c;
    292 	unsigned int *len;
    293 	c = 0;
    294 
    295 	if (!parse_char(&p->cur, &c, '{')) {
    296 		note_error(&p->errs, "expected '{' got '%c'", c);
    297 		return 0;
    298 	}
    299 
    300 	if (!(*p->handlers.token)(&p->handlers, '{', &len)) {
    301 		note_error(&p->errs, "obj handle open", c);
    302 		return 0;
    303 	}
    304 
    305 	while(1) {
    306 		consume_whitespace(&p->cur);
    307 
    308 		if (!peek_char(&p->cur, &c)) {
    309 			note_error(&p->errs, "oob");
    310 			return 0;
    311 		}
    312 
    313 		if (c == '}') {
    314 			p->cur.p++;
    315 			if (!(*p->handlers.token)(&p->handlers, '}', &len)) {
    316 				note_error(&p->errs, "'}' handler 1");
    317 				return 0;
    318 			}
    319 			return 1;
    320 		}
    321 
    322 		if (c != '"') {
    323 			note_error(&p->errs, "expected '\"' or '}', got '%c'", c);
    324 			return 0;
    325 		}
    326 
    327 		if (!parse_kv(p)) {
    328 			note_error(&p->errs, "expected key value pair");
    329 			return 0;
    330 		}
    331 
    332 		if (!peek_char(&p->cur, &c)) {
    333 			note_error(&p->errs, "oob");
    334 			return 0;
    335 		}
    336 
    337 		if (c == ',') {
    338 			if (!(*p->handlers.token)(&p->handlers, ',', &len)) {
    339 				note_error(&p->errs, "',' handler");
    340 				return 0;
    341 			}
    342 			p->cur.p++;
    343 			continue;
    344 		}
    345 
    346 		if (c == '}') {
    347 			p->cur.p++;
    348 			if (!(*p->handlers.token)(&p->handlers, '}', &len)) {
    349 				note_error(&p->errs, "'}' handler 2");
    350 				return 0;
    351 			}
    352 			return 1;
    353 		}
    354 
    355 		note_error(&p->errs, "expected ',' or '}', got '%c'", c);
    356 		return 0;
    357 	}
    358 
    359 	note_error(&p->errs, "impossibru");
    360 	return 0;
    361 }
    362 
    363 static int parse_array(struct json_parser *p)
    364 {
    365 	char c;
    366 	unsigned int *len;
    367 
    368 	c = 0;
    369 
    370 	if (!parse_char(&p->cur, &c, '[')) {
    371 		note_error(&p->errs, "expected '[', got '%c'", c);
    372 		return 0;
    373 	}
    374 
    375 	if (!(*p->handlers.token)(&p->handlers, '[', &len)) {
    376 		note_error(&p->errs, "array handle open", c);
    377 		return 0;
    378 	}
    379 
    380 	consume_whitespace(&p->cur);
    381 
    382 	/* empty array case */
    383 	if (!peek_char(&p->cur, &c)) {
    384 		note_error(&p->errs, "empty array peek oob");
    385 		return 0;
    386 	}
    387 
    388 	if (c == ']') {
    389 		p->cur.p++;
    390 		return (*p->handlers.token)(&p->handlers, ']', &len);
    391 	}
    392 
    393 	while(1) {
    394 		if (!parse_value(p)) {
    395 			note_error(&p->errs, "expected json value");
    396 			return 0;
    397 		}
    398 
    399 		if (!peek_char(&p->cur, &c)) {
    400 			note_error(&p->errs, "oob");
    401 			return 0;
    402 		}
    403 
    404 		if (c == ']') {
    405 			p->cur.p++;
    406 			return (*p->handlers.token)(&p->handlers, ']', &len);
    407 		}
    408 
    409 		if (c != ',') {
    410 			note_error(&p->errs, "expected ',', got '%c'", c);
    411 			return 0;
    412 		}
    413 
    414 		if (!(*p->handlers.token)(&p->handlers, ',', &len)) {
    415 			note_error(&p->errs, "']' handler");
    416 			return 0;
    417 		}
    418 
    419 		p->cur.p++;
    420 	}
    421 
    422 	return 0;
    423 }
    424 
    425 static inline int parse_bool(struct json_parser *p)
    426 {
    427 	if (parse_str(&p->cur, "true")) {
    428 		return (*p->handlers.boolean)(&p->handlers, 1);
    429 	}
    430 
    431 	if (parse_str(&p->cur, "false")) {
    432 		return (*p->handlers.boolean)(&p->handlers, 0);
    433 	}
    434 
    435 	return 0;
    436 }
    437 
    438 static int parse_null(struct json_parser *p)
    439 {
    440 	if (!parse_str(&p->cur, "null")) {
    441 		note_error(&p->errs, "not null");
    442 		return 0;
    443 	}
    444 
    445 	return (*p->handlers.null)(&p->handlers);
    446 }
    447 
    448 static int parse_value(struct json_parser *p)
    449 {
    450 	int res;
    451 
    452 	consume_whitespace(&p->cur);
    453 
    454 	p->errs.record = 0;
    455 
    456 	res = parse_string(p) ||
    457 	      parse_object(p) ||
    458 	      parse_number(p) ||
    459 	      parse_array(p)  ||
    460 	      parse_bool(p)   ||
    461 	      parse_null(p);
    462 
    463 	p->errs.record = 1;
    464 
    465 	if (!res) {
    466 		print_around(&p->cur, 20);
    467 		note_error(&p->errs, "couldn't parse json value");
    468 		return 0;
    469 	}
    470 
    471 	consume_whitespace(&p->cur);
    472 
    473 	return res;
    474 }
    475 
    476 
    477 static int parse_array_or_object(struct json_parser *p)
    478 {
    479 	char c;
    480 	consume_whitespace(&p->cur);
    481 
    482 	if (!peek_char(&p->cur, &c)) {
    483 		note_error(&p->errs, "oob");
    484 		return 0;
    485 	}
    486 
    487 	if (c == '{') {
    488 		return parse_object(p);
    489 	}
    490 
    491 	if (c == '[') {
    492 		return parse_array(p);
    493 	}
    494 
    495 	note_error(&p->errs, "expected '{' or '[', got '%c'", c);
    496 	return 0;
    497 }
    498 
    499 
    500 void copy_json_handlers(struct json_handlers *src, struct json_handlers *dst)
    501 {
    502 	dst->data = src->data;
    503 	dst->string = src->string;
    504 	dst->boolean = src->boolean;
    505 	dst->null = src->null;
    506 	dst->number = src->number;
    507 	dst->token = src->token;
    508 }
    509 
    510 int push_escaped(struct cursor *cur, unsigned char *text, int size)
    511 {
    512 	unsigned char *p;
    513 	for (p = text; p < text+size; p++) {
    514 		if (*p == '"') {
    515 			if (!push_str(cur, "\\\""))
    516 				return 0;
    517 			continue;
    518 		}
    519 
    520 		if (!push_byte(cur, *p)) {
    521 			return 0;
    522 		}
    523 	}
    524 
    525 	return 1;
    526 }
    527 
    528 int handle_string(struct json_handlers *h, struct json_strtok *str)
    529 {
    530 	struct json_pusher *p = (struct json_pusher*)h->data;
    531 
    532 	if (!push_byte(&p->cur, '"')) {
    533 		note_error(&p->errs, "oob");
    534 		return 0;
    535 	}
    536 
    537 	if (str->needs_escape) {
    538 		if (!push_escaped(&p->cur, str->text, str->size)) {
    539 			note_error(&p->errs, "oob");
    540 			return 0;
    541 		}
    542 	} else {
    543 		if (!push_data(&p->cur, str->text, str->size)) {
    544 			note_error(&p->errs, "oob");
    545 			return 0;
    546 		}
    547 	}
    548 
    549 	if (!push_byte(&p->cur, '"')) {
    550 		note_error(&p->errs, "oob");
    551 		return 0;
    552 	}
    553 
    554 	return 1;
    555 }
    556 
    557 int handle_bool(struct json_handlers *h, int val)
    558 {
    559 	struct json_pusher *p = (struct json_pusher*)h->data;
    560 	return push_str(&p->cur, val? "true" : "false");
    561 }
    562 
    563 int handle_null(struct json_handlers *h)
    564 {
    565 	struct json_pusher *p = (struct json_pusher*)h->data;
    566 	return push_data(&p->cur, (unsigned char*)"null", 4);
    567 }
    568 
    569 int handle_number(struct json_handlers *h, unsigned int number)
    570 {
    571 	struct json_pusher *p = (struct json_pusher*)h->data;
    572 	static char digits[16];
    573 	int c;
    574 
    575 	c = snprintf(digits, sizeof(digits), "%d", number);
    576 	return push_data(&p->cur, (unsigned char*)digits, c);
    577 }
    578 
    579 int handle_token(struct json_handlers *h, char token, unsigned int **len)
    580 {
    581 	struct json_pusher *p = (struct json_pusher*)h->data;
    582 	return push_byte(&p->cur, token);
    583 }
    584 
    585 void make_compact_handlers(struct json_handlers *h, struct json_pusher *out)
    586 {
    587 	h->data = out;
    588 	h->string = handle_string;
    589 	h->boolean = handle_bool;
    590 	h->null = handle_null;
    591 	h->number = handle_number;
    592 	h->token = handle_token;
    593 }
    594 
    595 void init_json_parser(struct json_parser *p, unsigned char *buf, size_t buf_size,
    596 		struct json_handlers *h)
    597 {
    598 	memset(p, 0, sizeof(*p));
    599 	make_cursor(buf, buf + buf_size, &p->cur);
    600 	init_errors(&p->errs);
    601 	copy_json_handlers(h, &p->handlers);
    602 }
    603 
    604 int parse_json(struct json_parser *p)
    605 {
    606 	return parse_array_or_object(p);
    607 }
    608