util.c (1683B)
1 /* See LICENSE file for copyright and license details. */ 2 #include <stdarg.h> 3 #include <stdio.h> 4 #include <stdlib.h> 5 #include <string.h> 6 7 #include "util.h" 8 9 static const unsigned char utfbyte[UTF_SIZ + 1] = {0x80, 0, 0xC0, 0xE0, 0xF0}; 10 static const unsigned char utfmask[UTF_SIZ + 1] = {0xC0, 0x80, 0xE0, 0xF0, 0xF8}; 11 static const long utfmin[UTF_SIZ + 1] = { 0, 0, 0x80, 0x800, 0x10000}; 12 static const long utfmax[UTF_SIZ + 1] = {0x10FFFF, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF}; 13 14 void * 15 ecalloc(size_t nmemb, size_t size) 16 { 17 void *p; 18 19 if (!(p = calloc(nmemb, size))) 20 die("calloc:"); 21 return p; 22 } 23 24 void 25 die(const char *fmt, ...) { 26 va_list ap; 27 28 va_start(ap, fmt); 29 vfprintf(stderr, fmt, ap); 30 va_end(ap); 31 32 if (fmt[0] && fmt[strlen(fmt)-1] == ':') { 33 fputc(' ', stderr); 34 perror(NULL); 35 } else { 36 fputc('\n', stderr); 37 } 38 39 exit(1); 40 } 41 42 long 43 utf8decodebyte(const char c, size_t *i) 44 { 45 for (*i = 0; *i < (UTF_SIZ + 1); ++(*i)) 46 if (((unsigned char)c & utfmask[*i]) == utfbyte[*i]) 47 return (unsigned char)c & ~utfmask[*i]; 48 return 0; 49 } 50 51 size_t 52 utf8validate(long *u, size_t i) 53 { 54 if (!BETWEEN(*u, utfmin[i], utfmax[i]) || BETWEEN(*u, 0xD800, 0xDFFF)) 55 *u = UTF_INVALID; 56 for (i = 1; *u > utfmax[i]; ++i) 57 ; 58 return i; 59 } 60 61 size_t 62 utf8decode(const char *c, long *u, size_t clen) 63 { 64 size_t i, j, len, type; 65 long udecoded; 66 67 *u = UTF_INVALID; 68 if (!clen) 69 return 0; 70 udecoded = utf8decodebyte(c[0], &len); 71 if (!BETWEEN(len, 1, UTF_SIZ)) 72 return 1; 73 for (i = 1, j = 1; i < clen && j < len; ++i, ++j) { 74 udecoded = (udecoded << 6) | utf8decodebyte(c[i], &type); 75 if (type) 76 return j; 77 } 78 if (j < len) 79 return 0; 80 *u = udecoded; 81 utf8validate(u, len); 82 83 return len; 84 }