nostrdb

an unfairly fast embedded nostr database backed by lmdb
git clone git://jb55.com/nostrdb
Log | Files | Refs | Submodules | README | LICENSE

utf8.h (1667B)


      1 /* MIT (BSD) license - see LICENSE file for details */
      2 #ifndef CCAN_UTF8_H
      3 #define CCAN_UTF8_H
      4 #include <inttypes.h>
      5 #include <stdbool.h>
      6 #include <string.h>
      7 
      8 /* Unicode is limited to 21 bits. */
      9 #define UTF8_MAX_LEN    4
     10 
     11 struct utf8_state {
     12     /* How many characters we are expecting as part of this Unicode point */
     13     uint16_t total_len;
     14     /* How many characters we've already seen. */
     15     uint16_t used_len;
     16     /* Compound character, aka Unicode point. */
     17     uint32_t c;
     18 };
     19 
     20 #define UTF8_STATE_INIT { 0, 0, 0 }
     21 
     22 static inline void utf8_state_init(struct utf8_state *utf8_state)
     23 {
     24     memset(utf8_state, 0, sizeof(*utf8_state));
     25 }
     26 
     27 /**
     28  * utf8_decode - continue UTF8 decoding with this character.
     29  * @utf8_state - initialized UTF8 state.
     30  * @c - the character.
     31  *
     32  * Returns false if it needs another character to give results.
     33  * Otherwise returns true, @utf8_state can be reused without initializeation,
     34  * and sets errno:
     35  * 0: success
     36  * EINVAL: bad encoding (including a NUL character).
     37  * EFBIG: not a minimal encoding.
     38  * ERANGE: encoding of invalid character.
     39  *
     40  * You can extract the character from @utf8_state->c; @utf8_state->used_len
     41  * indicates how many characters have been consumed.
     42  */
     43 bool utf8_decode(struct utf8_state *utf8_state, char c);
     44 
     45 /**
     46  * utf8_encode - encode a point into UTF8.
     47  * @point - Unicode point to include.
     48  * @dest - buffer to fill.
     49  *
     50  * Returns 0 if point was invalid, otherwise bytes of dest used.
     51  * Sets errno to ERANGE if point was invalid.
     52  */
     53 size_t utf8_encode(uint32_t point, char dest[UTF8_MAX_LEN]);
     54 
     55 /* Check for valid UTF-8 */
     56 bool utf8_check(const void *vbuf, size_t buflen);
     57 #endif /* CCAN_UTF8_H */