polyadvent

A game engine from scratch in C
git clone git://jb55.com/polyadvent
Log | Files | Refs | README

xml.c (10835B)


      1 #include <sys/types.h>
      2 
      3 #include <ctype.h>
      4 #include <errno.h>
      5 #include <limits.h>
      6 #include <stdio.h>
      7 #include <stdlib.h>
      8 #include <string.h>
      9 
     10 #include "xml.h"
     11 
     12 static void
     13 xml_parseattrs(struct xmlparser *x)
     14 {
     15 	size_t namelen = 0, valuelen;
     16 	int c, endsep, endname = 0, valuestart = 0;
     17 
     18 	while ((c = GETNEXT) != EOF) {
     19 		if (isspace(c)) {
     20 			if (namelen)
     21 				endname = 1;
     22 			continue;
     23 		} else if (c == '?')
     24 			; /* ignore */
     25 		else if (c == '=') {
     26 			x->name[namelen] = '\0';
     27 			valuestart = 1;
     28 			endname = 1;
     29 		} else if (namelen && ((endname && !valuestart && isalpha(c)) || (c == '>' || c == '/'))) {
     30 			/* attribute without value */
     31 			x->name[namelen] = '\0';
     32 			if (x->xmlattrstart)
     33 				x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
     34 			if (x->xmlattr)
     35 				x->xmlattr(x, x->tag, x->taglen, x->name, namelen, "", 0);
     36 			if (x->xmlattrend)
     37 				x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
     38 			endname = 0;
     39 			x->name[0] = c;
     40 			namelen = 1;
     41 		} else if (namelen && valuestart) {
     42 			/* attribute with value */
     43 			if (x->xmlattrstart)
     44 				x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
     45 
     46 			valuelen = 0;
     47 			if (c == '\'' || c == '"') {
     48 				endsep = c;
     49 			} else {
     50 				endsep = ' '; /* isspace() */
     51 				goto startvalue;
     52 			}
     53 
     54 			while ((c = GETNEXT) != EOF) {
     55 startvalue:
     56 				if (c == '&') { /* entities */
     57 					x->data[valuelen] = '\0';
     58 					/* call data function with data before entity if there is data */
     59 					if (valuelen && x->xmlattr)
     60 						x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
     61 					x->data[0] = c;
     62 					valuelen = 1;
     63 					while ((c = GETNEXT) != EOF) {
     64 						if (c == endsep || (endsep == ' ' && (c == '>' || isspace(c))))
     65 							break;
     66 						if (valuelen < sizeof(x->data) - 1)
     67 							x->data[valuelen++] = c;
     68 						else {
     69 							/* entity too long for buffer, handle as normal data */
     70 							x->data[valuelen] = '\0';
     71 							if (x->xmlattr)
     72 								x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
     73 							x->data[0] = c;
     74 							valuelen = 1;
     75 							break;
     76 						}
     77 						if (c == ';') {
     78 							x->data[valuelen] = '\0';
     79 							if (x->xmlattrentity)
     80 								x->xmlattrentity(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
     81 							valuelen = 0;
     82 							break;
     83 						}
     84 					}
     85 				} else if (c != endsep && !(endsep == ' ' && (c == '>' || isspace(c)))) {
     86 					if (valuelen < sizeof(x->data) - 1) {
     87 						x->data[valuelen++] = c;
     88 					} else {
     89 						x->data[valuelen] = '\0';
     90 						if (x->xmlattr)
     91 							x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
     92 						x->data[0] = c;
     93 						valuelen = 1;
     94 					}
     95 				}
     96 				if (c == endsep || (endsep == ' ' && (c == '>' || isspace(c)))) {
     97 					x->data[valuelen] = '\0';
     98 					if (x->xmlattr)
     99 						x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
    100 					if (x->xmlattrend)
    101 						x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
    102 					break;
    103 				}
    104 			}
    105 			namelen = endname = valuestart = 0;
    106 		} else if (namelen < sizeof(x->name) - 1) {
    107 			x->name[namelen++] = c;
    108 		}
    109 		if (c == '>') {
    110 			break;
    111 		} else if (c == '/') {
    112 			x->isshorttag = 1;
    113 			x->name[0] = '\0';
    114 			namelen = 0;
    115 		}
    116 	}
    117 }
    118 
    119 static void
    120 xml_parsecomment(struct xmlparser *x)
    121 {
    122 	size_t datalen = 0, i = 0;
    123 	int c;
    124 
    125 	if (x->xmlcommentstart)
    126 		x->xmlcommentstart(x);
    127 	while ((c = GETNEXT) != EOF) {
    128 		if (c == '-' || c == '>') {
    129 			if (x->xmlcomment && datalen) {
    130 				x->data[datalen] = '\0';
    131 				x->xmlcomment(x, x->data, datalen);
    132 				datalen = 0;
    133 			}
    134 		}
    135 
    136 		if (c == '-') {
    137 			if (++i > 2) {
    138 				if (x->xmlcomment)
    139 					for (; i > 2; i--)
    140 						x->xmlcomment(x, "-", 1);
    141 				i = 2;
    142 			}
    143 			continue;
    144 		} else if (c == '>' && i == 2) {
    145 			if (x->xmlcommentend)
    146 				x->xmlcommentend(x);
    147 			return;
    148 		} else if (i) {
    149 			if (x->xmlcomment) {
    150 				for (; i > 0; i--)
    151 					x->xmlcomment(x, "-", 1);
    152 			}
    153 			i = 0;
    154 		}
    155 
    156 		if (datalen < sizeof(x->data) - 1) {
    157 			x->data[datalen++] = c;
    158 		} else {
    159 			x->data[datalen] = '\0';
    160 			if (x->xmlcomment)
    161 				x->xmlcomment(x, x->data, datalen);
    162 			x->data[0] = c;
    163 			datalen = 1;
    164 		}
    165 	}
    166 }
    167 
    168 static void
    169 xml_parsecdata(struct xmlparser *x)
    170 {
    171 	size_t datalen = 0, i = 0;
    172 	int c;
    173 
    174 	if (x->xmlcdatastart)
    175 		x->xmlcdatastart(x);
    176 	while ((c = GETNEXT) != EOF) {
    177 		if (c == ']' || c == '>') {
    178 			if (x->xmlcdata && datalen) {
    179 				x->data[datalen] = '\0';
    180 				x->xmlcdata(x, x->data, datalen);
    181 				datalen = 0;
    182 			}
    183 		}
    184 
    185 		if (c == ']') {
    186 			if (++i > 2) {
    187 				if (x->xmlcdata)
    188 					for (; i > 2; i--)
    189 						x->xmlcdata(x, "]", 1);
    190 				i = 2;
    191 			}
    192 			continue;
    193 		} else if (c == '>' && i == 2) {
    194 			if (x->xmlcdataend)
    195 				x->xmlcdataend(x);
    196 			return;
    197 		} else if (i) {
    198 			if (x->xmlcdata)
    199 				for (; i > 0; i--)
    200 					x->xmlcdata(x, "]", 1);
    201 			i = 0;
    202 		}
    203 
    204 		if (datalen < sizeof(x->data) - 1) {
    205 			x->data[datalen++] = c;
    206 		} else {
    207 			x->data[datalen] = '\0';
    208 			if (x->xmlcdata)
    209 				x->xmlcdata(x, x->data, datalen);
    210 			x->data[0] = c;
    211 			datalen = 1;
    212 		}
    213 	}
    214 }
    215 
    216 static int
    217 codepointtoutf8(long r, char *s)
    218 {
    219 	if (r == 0) {
    220 		return 0; /* NUL byte */
    221 	} else if (r <= 0x7F) {
    222 		/* 1 byte: 0aaaaaaa */
    223 		s[0] = r;
    224 		return 1;
    225 	} else if (r <= 0x07FF) {
    226 		/* 2 bytes: 00000aaa aabbbbbb */
    227 		s[0] = 0xC0 | ((r & 0x0007C0) >>  6); /* 110aaaaa */
    228 		s[1] = 0x80 |  (r & 0x00003F);        /* 10bbbbbb */
    229 		return 2;
    230 	} else if (r <= 0xFFFF) {
    231 		/* 3 bytes: aaaabbbb bbcccccc */
    232 		s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */
    233 		s[1] = 0x80 | ((r & 0x000FC0) >>  6); /* 10bbbbbb */
    234 		s[2] = 0x80 |  (r & 0x00003F);        /* 10cccccc */
    235 		return 3;
    236 	} else {
    237 		/* 4 bytes: 000aaabb bbbbcccc ccdddddd */
    238 		s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */
    239 		s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */
    240 		s[2] = 0x80 | ((r & 0x000FC0) >>  6); /* 10cccccc */
    241 		s[3] = 0x80 |  (r & 0x00003F);        /* 10dddddd */
    242 		return 4;
    243 	}
    244 }
    245 
    246 static int
    247 namedentitytostr(const char *e, char *buf, size_t bufsiz)
    248 {
    249 	static const struct {
    250 		const char *entity;
    251 		int c;
    252 	} entities[] = {
    253 		{ "amp;",  '&'  },
    254 		{ "lt;",   '<'  },
    255 		{ "gt;",   '>'  },
    256 		{ "apos;", '\'' },
    257 		{ "quot;", '"'  },
    258 		{ "AMP;",  '&'  },
    259 		{ "LT;",   '<'  },
    260 		{ "GT;",   '>'  },
    261 		{ "APOS;", '\'' },
    262 		{ "QUOT;", '"'  }
    263 	};
    264 	size_t i;
    265 
    266 	/* buffer is too small */
    267 	if (bufsiz < 2)
    268 		return -1;
    269 
    270 	for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) {
    271 		if (!strcmp(e, entities[i].entity)) {
    272 			buf[0] = entities[i].c;
    273 			buf[1] = '\0';
    274 			return 1;
    275 		}
    276 	}
    277 	return 0;
    278 }
    279 
    280 static int
    281 numericentitytostr(const char *e, char *buf, size_t bufsiz)
    282 {
    283 	long l;
    284 	int len;
    285 	char *end;
    286 
    287 	/* buffer is too small */
    288 	if (bufsiz < 5)
    289 		return -1;
    290 
    291 	errno = 0;
    292 	/* hex (16) or decimal (10) */
    293 	if (*e == 'x')
    294 		l = strtoul(e + 1, &end, 16);
    295 	else
    296 		l = strtoul(e, &end, 10);
    297 	/* invalid value or not a well-formed entity or too high codepoint */
    298 	if (errno || *end != ';' || l > 0x10FFFF)
    299 		return 0;
    300 	len = codepointtoutf8(l, buf);
    301 	buf[len] = '\0';
    302 
    303 	return len;
    304 }
    305 
    306 /* convert named- or numeric entity string to buffer string
    307  * returns byte-length of string. */
    308 int
    309 xml_entitytostr(const char *e, char *buf, size_t bufsiz)
    310 {
    311 	/* doesn't start with & */
    312 	if (e[0] != '&')
    313 		return 0;
    314 	/* numeric entity */
    315 	if (e[1] == '#')
    316 		return numericentitytostr(e + 2, buf, bufsiz);
    317 	else /* named entity */
    318 		return namedentitytostr(e + 1, buf, bufsiz);
    319 }
    320 
    321 void
    322 xml_parse(struct xmlparser *x)
    323 {
    324 	size_t datalen, tagdatalen;
    325 	int c, isend;
    326 
    327 	while ((c = GETNEXT) != EOF && c != '<')
    328 		; /* skip until < */
    329 
    330 	while (c != EOF) {
    331 		if (c == '<') { /* parse tag */
    332 			if ((c = GETNEXT) == EOF)
    333 				return;
    334 
    335 			if (c == '!') { /* cdata and comments */
    336 				for (tagdatalen = 0; (c = GETNEXT) != EOF;) {
    337 					/* NOTE: sizeof(x->data) must be atleast sizeof("[CDATA[") */
    338 					if (tagdatalen <= sizeof("[CDATA[") - 1)
    339 						x->data[tagdatalen++] = c;
    340 					if (c == '>')
    341 						break;
    342 					else if (c == '-' && tagdatalen == sizeof("--") - 1 &&
    343 							(x->data[0] == '-')) {
    344 						xml_parsecomment(x);
    345 						break;
    346 					} else if (c == '[') {
    347 						if (tagdatalen == sizeof("[CDATA[") - 1 &&
    348 						    !strncmp(x->data, "[CDATA[", tagdatalen)) {
    349 							xml_parsecdata(x);
    350 							break;
    351 						}
    352 					}
    353 				}
    354 			} else {
    355 				/* normal tag (open, short open, close), processing instruction. */
    356 				x->tag[0] = c;
    357 				x->taglen = 1;
    358 				x->isshorttag = isend = 0;
    359 
    360 				/* treat processing instruction as shorttag, don't strip "?" prefix. */
    361 				if (c == '?') {
    362 					x->isshorttag = 1;
    363 				} else if (c == '/') {
    364 					if ((c = GETNEXT) == EOF)
    365 						return;
    366 					x->tag[0] = c;
    367 					isend = 1;
    368 				}
    369 
    370 				while ((c = GETNEXT) != EOF) {
    371 					if (c == '/')
    372 						x->isshorttag = 1; /* short tag */
    373 					else if (c == '>' || isspace(c)) {
    374 						x->tag[x->taglen] = '\0';
    375 						if (isend) { /* end tag, starts with </ */
    376 							if (x->xmltagend)
    377 								x->xmltagend(x, x->tag, x->taglen, x->isshorttag);
    378 							x->tag[0] = '\0';
    379 							x->taglen = 0;
    380 						} else {
    381 							/* start tag */
    382 							if (x->xmltagstart)
    383 								x->xmltagstart(x, x->tag, x->taglen);
    384 							if (isspace(c))
    385 								xml_parseattrs(x);
    386 							if (x->xmltagstartparsed)
    387 								x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag);
    388 						}
    389 						/* call tagend for shortform or processing instruction */
    390 						if (x->isshorttag) {
    391 							if (x->xmltagend)
    392 								x->xmltagend(x, x->tag, x->taglen, x->isshorttag);
    393 							x->tag[0] = '\0';
    394 							x->taglen = 0;
    395 						}
    396 						break;
    397 					} else if (x->taglen < sizeof(x->tag) - 1)
    398 						x->tag[x->taglen++] = c; /* NOTE: tag name truncation */
    399 				}
    400 			}
    401 		} else {
    402 			/* parse tag data */
    403 			datalen = 0;
    404 			if (x->xmldatastart)
    405 				x->xmldatastart(x);
    406 			while ((c = GETNEXT) != EOF) {
    407 				if (c == '&') {
    408 					if (datalen) {
    409 						x->data[datalen] = '\0';
    410 						if (x->xmldata)
    411 							x->xmldata(x, x->data, datalen);
    412 					}
    413 					x->data[0] = c;
    414 					datalen = 1;
    415 					while ((c = GETNEXT) != EOF) {
    416 						if (c == '<')
    417 							break;
    418 						if (datalen < sizeof(x->data) - 1)
    419 							x->data[datalen++] = c;
    420 						else {
    421 							/* entity too long for buffer, handle as normal data */
    422 							x->data[datalen] = '\0';
    423 							if (x->xmldata)
    424 								x->xmldata(x, x->data, datalen);
    425 							x->data[0] = c;
    426 							datalen = 1;
    427 							break;
    428 						}
    429 						if (c == ';') {
    430 							x->data[datalen] = '\0';
    431 							if (x->xmldataentity)
    432 								x->xmldataentity(x, x->data, datalen);
    433 							datalen = 0;
    434 							break;
    435 						}
    436 					}
    437 				} else if (c != '<') {
    438 					if (datalen < sizeof(x->data) - 1) {
    439 						x->data[datalen++] = c;
    440 					} else {
    441 						x->data[datalen] = '\0';
    442 						if (x->xmldata)
    443 							x->xmldata(x, x->data, datalen);
    444 						x->data[0] = c;
    445 						datalen = 1;
    446 					}
    447 				}
    448 				if (c == '<') {
    449 					x->data[datalen] = '\0';
    450 					if (x->xmldata && datalen)
    451 						x->xmldata(x, x->data, datalen);
    452 					if (x->xmldataend)
    453 						x->xmldataend(x);
    454 					break;
    455 				}
    456 			}
    457 		}
    458 	}
    459 }