nostrdb

an unfairly fast embedded nostr database backed by lmdb
git clone git://jb55.com/nostrdb
Log | Files | Refs | Submodules | README | LICENSE

commit 86ae238edc913f825359bd9f9da0b0682c114337
parent 9c2c4b9cc90c0d417f026d72b5700e630445327f
Author: William Casarin <jb55@jb55.com>
Date:   Fri, 24 Oct 2025 11:15:23 -0700

binmoji: add to project

this is needed for the metadata table

Signed-off-by: William Casarin <jb55@jb55.com>

Diffstat:
MMakefile | 2+-
Asrc/binmoji.c | 278+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/binmoji.h | 34++++++++++++++++++++++++++++++++++
Asrc/binmoji_table.h | 169+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 482 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile @@ -5,7 +5,7 @@ CCAN_HDRS := ccan/ccan/utf8/utf8.h ccan/ccan/container_of/container_of.h ccan/cc HEADERS = deps/lmdb/lmdb.h deps/secp256k1/include/secp256k1.h src/nostrdb.h src/cursor.h src/hex.h src/jsmn.h src/config.h src/random.h src/memchr.h src/cpu.h src/nostr_bech32.h src/block.h src/str_block.h src/print_util.h $(C_BINDINGS) $(CCAN_HDRS) $(BOLT11_HDRS) FLATCC_SRCS=deps/flatcc/src/runtime/json_parser.c deps/flatcc/src/runtime/verifier.c deps/flatcc/src/runtime/builder.c deps/flatcc/src/runtime/emitter.c deps/flatcc/src/runtime/refmap.c BOLT11_SRCS = src/bolt11/bolt11.c src/bolt11/bech32.c src/bolt11/amount.c src/bolt11/hash_u5.c -SRCS = src/nostrdb.c src/invoice.c src/nostr_bech32.c src/content_parser.c src/block.c $(BOLT11_SRCS) $(FLATCC_SRCS) $(CCAN_SRCS) +SRCS = src/nostrdb.c src/invoice.c src/nostr_bech32.c src/content_parser.c src/block.c src/binmoji.c $(BOLT11_SRCS) $(FLATCC_SRCS) $(CCAN_SRCS) LDS = $(OBJS) $(ARS) OBJS = $(SRCS:.c=.o) DEPS = $(OBJS) $(HEADERS) $(ARS) diff --git a/src/binmoji.c b/src/binmoji.c @@ -0,0 +1,278 @@ +#include <assert.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "binmoji.h" + +#define PRIMARY_CP_SHIFT 42 +#define HASH_SHIFT 10 +#define TONE1_SHIFT 7 +#define TONE2_SHIFT 4 +#define FLAGS_SHIFT 0 + +#define PRIMARY_CP_MASK 0x3FFFFF +#define HASH_MASK 0xFFFFFFFF +#define TONE_MASK 0x7 +#define FLAGS_MASK 0xF + +typedef struct { + uint32_t hash; + size_t count; + uint32_t components[16]; +} EmojiHashEntry; + +#include "binmoji_table.h" + +const size_t num_hash_entries = + sizeof(binmoji_table) / sizeof(binmoji_table[0]); + +static uint32_t crc32(const uint32_t *data, size_t length) +{ + uint32_t item, bit, crc = 0xFFFFFFFF; + size_t i; + int j; + + if (data == NULL || length == 0) + return 0; + for (i = 0; i < length; ++i) { + item = data[i]; + for (j = 0; j < 32; ++j) { + bit = (item >> (31 - j)) & 1; + if ((crc >> 31) ^ bit) { + crc = (crc << 1) ^ 0x04C11DB7; + } else { + crc = (crc << 1); + } + } + } + return crc; +} + +static int is_base_emoji(uint32_t codepoint) +{ + if (codepoint >= 0x1F3FB && codepoint <= 0x1F3FF) /* Skin Tones */ + return 0; + if (codepoint == 0x200D) /* Zero Width Joiner */ + return 0; + return 1; +} + +void binmoji_parse(const char *emoji_str, struct binmoji *binmoji) +{ + const unsigned char *s; + memset(binmoji, 0, sizeof(struct binmoji)); + s = (const unsigned char *)emoji_str; + + while (*s) { + uint32_t codepoint = 0; + int len = 0; + if (*s < 0x80) { + len = 1; + codepoint = s[0]; + } else if ((*s & 0xE0) == 0xC0) { + len = 2; + codepoint = ((s[0] & 0x1F) << 6) | (s[1] & 0x3F); + } else if ((*s & 0xF0) == 0xE0) { + len = 3; + codepoint = ((s[0] & 0x0F) << 12) | + ((s[1] & 0x3F) << 6) | (s[2] & 0x3F); + } else if ((*s & 0xF8) == 0xF0) { + len = 4; + codepoint = ((s[0] & 0x07) << 18) | + ((s[1] & 0x3F) << 12) | + ((s[2] & 0x3F) << 6) | (s[3] & 0x3F); + } else { + s++; + continue; + } + s += len; + + if (codepoint >= 0x1F3FB && codepoint <= 0x1F3FF) { + uint8_t tone_val = (codepoint - 0x1F3FB) + 1; + if (binmoji->skin_tone1 == 0) + binmoji->skin_tone1 = tone_val; + else if (binmoji->skin_tone2 == 0) + binmoji->skin_tone2 = tone_val; + } else if (is_base_emoji(codepoint)) { + if (binmoji->primary_codepoint == 0) { + binmoji->primary_codepoint = codepoint; + } else if (binmoji->component_count < 16) { + binmoji->component_list + [binmoji->component_count++] = codepoint; + } + } + } + binmoji->component_hash = + crc32(binmoji->component_list, binmoji->component_count); +} + +uint64_t binmoji_encode(const struct binmoji *binmoji) +{ + uint64_t id = 0; + id |= ((uint64_t)(binmoji->primary_codepoint & PRIMARY_CP_MASK) + << PRIMARY_CP_SHIFT); + id |= ((uint64_t)(binmoji->component_hash & HASH_MASK) << HASH_SHIFT); + id |= ((uint64_t)(binmoji->skin_tone1 & TONE_MASK) << TONE1_SHIFT); + id |= ((uint64_t)(binmoji->skin_tone2 & TONE_MASK) << TONE2_SHIFT); + id |= ((uint64_t)(binmoji->flags & FLAGS_MASK) << FLAGS_SHIFT); + return id; +} + +/** + * @brief Comparison function for bsearch. + * + * Compares a target hash key against an EmojiHashEntry's hash. + * @param key Pointer to the target uint32_t hash. + * @param element Pointer to the EmojiHashEntry from the array. + * @return <0 if key is less than element's hash, 0 if equal, >0 if greater. + */ +static int compare_emoji_hash(const void *key, const void *element) +{ + const uint32_t hash_key = *(const uint32_t *)key; + const EmojiHashEntry *entry = (const EmojiHashEntry *)element; + + if (hash_key < entry->hash) { + return -1; + } else if (hash_key > entry->hash) { + return 1; + } else { + return 0; + } +} + +/** + * @brief Optimized lookup using binary search. + */ +static int lookup_binmoji_by_hash(uint32_t hash, uint32_t *out_binmoji, + size_t *out_count) +{ + const EmojiHashEntry *result = + bsearch(&hash, binmoji_table, num_hash_entries, + sizeof(EmojiHashEntry), compare_emoji_hash); + + if (result != NULL) { + *out_count = result->count; + memcpy(out_binmoji, result->components, + (*out_count) * sizeof(uint32_t)); + return 1; /* Found */ + } + + *out_count = 0; + return 0; /* Not found */ +} + +void binmoji_decode(uint64_t id, struct binmoji *binmoji) +{ + memset(binmoji, 0, sizeof(struct binmoji)); + binmoji->primary_codepoint = (id >> PRIMARY_CP_SHIFT) & PRIMARY_CP_MASK; + binmoji->component_hash = (id >> HASH_SHIFT) & HASH_MASK; + binmoji->skin_tone1 = (id >> TONE1_SHIFT) & TONE_MASK; + binmoji->skin_tone2 = (id >> TONE2_SHIFT) & TONE_MASK; + binmoji->flags = (id >> FLAGS_SHIFT) & FLAGS_MASK; + if (binmoji->component_hash != 0) { + lookup_binmoji_by_hash(binmoji->component_hash, + binmoji->component_list, + &binmoji->component_count); + } +} + +static int append_utf8(char *buf, size_t buf_size, size_t *offset, + uint32_t codepoint) +{ + char *p; + int bytes_to_write = 0; + + if (!buf) + return 0; + if (codepoint < 0x80) + bytes_to_write = 1; + else if (codepoint < 0x800) + bytes_to_write = 2; + else if (codepoint < 0x10000) + bytes_to_write = 3; + else if (codepoint < 0x110000) + bytes_to_write = 4; + else + return 0; + if (*offset + bytes_to_write >= buf_size) + return 0; + + p = buf + *offset; + if (bytes_to_write == 1) { + *p = (char)codepoint; + } else if (bytes_to_write == 2) { + p[0] = 0xC0 | (codepoint >> 6); + p[1] = 0x80 | (codepoint & 0x3F); + } else if (bytes_to_write == 3) { + p[0] = 0xE0 | (codepoint >> 12); + p[1] = 0x80 | ((codepoint >> 6) & 0x3F); + p[2] = 0x80 | (codepoint & 0x3F); + } else { + p[0] = 0xF0 | (codepoint >> 18); + p[1] = 0x80 | ((codepoint >> 12) & 0x3F); + p[2] = 0x80 | ((codepoint >> 6) & 0x3F); + p[3] = 0x80 | (codepoint & 0x3F); + } + *offset += bytes_to_write; + return bytes_to_write; +} + +void binmoji_to_string(const struct binmoji *binmoji, char *out_str, + size_t out_str_size) +{ + size_t i, offset; + uint32_t comp; + int needs_zwj, is_country_flag, is_subdivision_flag, no_zwj_sequence; + + if (!binmoji || !out_str || out_str_size == 0) + return; + + offset = 0; + out_str[0] = '\0'; + + is_country_flag = (binmoji->primary_codepoint >= 0x1F1E6 && + binmoji->primary_codepoint <= 0x1F1FF); + + is_subdivision_flag = (binmoji->primary_codepoint == 0x1F3F4 && + binmoji->component_count > 0 && + binmoji->component_list[0] >= 0xE0020 && + binmoji->component_list[0] <= 0xE007F); + + no_zwj_sequence = is_country_flag || is_subdivision_flag; + + if (binmoji->primary_codepoint > 0) { + append_utf8(out_str, out_str_size, &offset, + binmoji->primary_codepoint); + } + + if (binmoji->skin_tone1 > 0) { + append_utf8(out_str, out_str_size, &offset, + 0x1F3FB + binmoji->skin_tone1 - 1); + } + + for (i = 0; i < binmoji->component_count; i++) { + comp = binmoji->component_list[i]; + needs_zwj = + (comp != 0xFE0F && comp != 0x20E3 && !no_zwj_sequence); + + if (needs_zwj) { + append_utf8(out_str, out_str_size, &offset, + 0x200D); /* ZWJ */ + } + append_utf8(out_str, out_str_size, &offset, comp); + + if (i == binmoji->component_count - 1 && + binmoji->skin_tone2 > 0) { + append_utf8(out_str, out_str_size, &offset, + 0x1F3FB + binmoji->skin_tone2 - 1); + } + } + + if (offset < out_str_size) + out_str[offset] = '\0'; + else if (out_str_size > 0) + out_str[out_str_size - 1] = '\0'; +} diff --git a/src/binmoji.h b/src/binmoji.h @@ -0,0 +1,34 @@ + +#ifndef BINMOJI_H +#define BINMOJI_H + +#include <stdint.h> +#include <stdlib.h> + +struct binmoji { + uint32_t primary_codepoint; + uint32_t component_list[16]; + size_t component_count; + uint32_t component_hash; + uint8_t skin_tone1; + uint8_t skin_tone2; + uint8_t flags; +}; + +static const uint64_t USER_FLAG_MASK = 1 << 3; + +void binmoji_to_string(const struct binmoji *binmoji, char *out_str, size_t out_str_size); +void binmoji_decode(uint64_t id, struct binmoji *binmoji); +void binmoji_parse(const char *emoji, struct binmoji *binmoji); +uint64_t binmoji_encode(const struct binmoji *binmoji); + +/* some user flag helpers */ +static __inline uint64_t binmoji_set_user_flag(uint64_t binmoji, uint8_t enable) { + return enable ? (binmoji | USER_FLAG_MASK) : (binmoji & ~USER_FLAG_MASK); +} + +static __inline uint8_t binmoji_get_user_flag(uint64_t binmoji) { + return (binmoji & USER_FLAG_MASK) == USER_FLAG_MASK; +} + +#endif /* BINMOJI_H */ diff --git a/src/binmoji_table.h b/src/binmoji_table.h @@ -0,0 +1,169 @@ +#ifndef EMOJI_HASH_TABLE_H +#define EMOJI_HASH_TABLE_H + +#include <stdint.h> + +/* This file is auto-generated by generate_hash_table.py */ + +static const EmojiHashEntry binmoji_table[] = { + {0x009D7FB7, 1, {0x1F1E9}}, + {0x03827B8D, 2, {0xFE0F, 0x2642}}, + {0x03A767C3, 1, {0x2194}}, + {0x045C6200, 1, {0x1F1E8}}, + {0x07667A74, 1, {0x2195}}, + {0x08C7368D, 2, {0x1F9BD, 0x27A1}}, + {0x091F44D9, 1, {0x1F1EB}}, + {0x0A0040E3, 2, {0xFE0F, 0x2640}}, + {0x0A4D693F, 2, {0x26A7, 0xFE0F}}, + {0x0DDE596E, 1, {0x1F1EA}}, + {0x0FF6A5EC, 1, {0x1F52C}}, + {0x1035EC63, 2, {0x2194, 0xFE0F}}, + {0x10E7495E, 3, {0x1F469, 0x1F467, 0x1F466}}, + {0x11C8318C, 2, {0xFE0F, 0x1F525}}, + {0x11CE3159, 3, {0x1F9D1, 0x1F9D2, 0x1F9D2}}, + {0x12E2D1B2, 3, {0x2642, 0x27A1, 0xFE0F}}, + {0x1399096B, 1, {0x1F1ED}}, + {0x13BA8F92, 2, {0xFE0F, 0x20E3}}, + {0x142654E9, 3, {0x1F469, 0x1F467, 0x1F467}}, + {0x175814DC, 1, {0x1F1EC}}, + {0x17ADDABC, 2, {0x1F5E8, 0xFE0F}}, + {0x18E0A986, 2, {0x2695, 0xFE0F}}, + {0x190FF79B, 2, {0x1F9AF, 0x27A1}}, + {0x1A1B3205, 1, {0x1F1EF}}, + {0x1AD4A79F, 2, {0x1F467, 0x1F467}}, + {0x1B706904, 4, {0x2764, 0xFE0F, 0x1F48B, 0x1F9D1}}, + {0x1E15BA28, 2, {0x1F467, 0x1F466}}, + {0x1EDA2FB2, 1, {0x1F1EE}}, + {0x24BD6E8D, 1, {0x1F527}}, + {0x253E99D4, 4, {0x2642, 0xFE0F, 0x27A1, 0xFE0F}}, + {0x2586DE9E, 1, {0x27A1}}, + {0x2D3F55E3, 1, {0x1F525}}, + {0x2DE4D231, 2, {0x2642, 0x27A1}}, + {0x2F4DEBFD, 1, {0x20E3}}, + {0x30A9040E, 1, {0x2695}}, + {0x36F7248C, 1, {0x2640}}, + {0x38D2C20A, 1, {0x1F1E6}}, + {0x3C13DFBD, 1, {0x1F1E7}}, + {0x3C5FF473, 2, {0x1F9D1, 0x1F9D2}}, + {0x3D07A1FC, 1, {0x1F384}}, + {0x3DEA22D7, 1, {0x2696}}, + {0x3F751FE2, 1, {0x2642}}, + {0x417843A9, 1, {0x1F5E8}}, + {0x41CA5100, 2, {0x1F9BC, 0x27A1}}, + {0x41CF821E, 1, {0x1F1FA}}, + {0x450E9FA9, 1, {0x1F1FB}}, + {0x484DB970, 1, {0x1F1F8}}, + {0x48D80AE6, 3, {0x2640, 0xFE0F, 0x27A1}}, + {0x4C8CA4C7, 1, {0x1F1F9}}, + {0x4F9091D9, 3, {0x1F9BD, 0x27A1, 0xFE0F}}, + {0x52CBF4C2, 1, {0x1F1FE}}, + {0x560AE975, 1, {0x1F1FF}}, + {0x5718DDA5, 2, {0x1F466, 0x1F466}}, + {0x58A121D3, 2, {0x1F430, 0x1F469}}, + {0x58AC2E7A, 2, {0x2764, 0x1F468}}, + {0x59388BEE, 2, {0x2195, 0xFE0F}}, + {0x59EA2ED3, 3, {0x1F469, 0x1F466, 0x1F466}}, + {0x5B49CFAC, 1, {0x1F1FC}}, + {0x5B5F53CD, 2, {0x1F91D, 0x1F9D1}}, + {0x5C603C64, 2, {0x1F430, 0x1F468}}, + {0x5C6D33CD, 2, {0x2764, 0x1F469}}, + {0x5DF0BE19, 2, {0xFE0F, 0x1F308}}, + {0x5F88D21B, 1, {0x1F1FD}}, + {0x60EA7029, 3, {0x2764, 0xFE0F, 0x1F9D1}}, + {0x6107DA76, 1, {0x1F308}}, + {0x63067211, 1, {0x1F1F3}}, + {0x63ED85D3, 1, {0xFE0F}}, + {0x67C76FA6, 1, {0x1F1F2}}, + {0x68B58497, 3, {0xFE0F, 0x2640, 0xFE0F}}, + {0x6A84497F, 1, {0x1F1F1}}, + {0x6B8E2FD6, 3, {0x2764, 0x1F48B, 0x1F469}}, + {0x6B8F897C, 2, {0x2642, 0xFE0F}}, + {0x6E4554C8, 1, {0x1F1F0}}, + {0x6F4F3261, 3, {0x2764, 0x1F48B, 0x1F468}}, + {0x6F512A89, 1, {0x1F393}}, + {0x6FD4811E, 2, {0x1FAEF, 0x1F9D1}}, + {0x700204CD, 1, {0x1F1F7}}, + {0x74C3197A, 1, {0x1F1F6}}, + {0x75AC9A14, 1, {0x1F9D2}}, + {0x785EE7CD, 1, {0x2B1B}}, + {0x79803FA3, 1, {0x1F1F5}}, + {0x7D412214, 1, {0x1F1F4}}, + {0x7D8F27C6, 2, {0xFE0F, 0x1F5E8}}, + {0x868D184D, 3, {0xFE0F, 0x1F5E8, 0xFE0F}}, + {0x90288C78, 1, {0x1F3A8}}, + {0x92B7B14D, 1, {0x1F37C}}, + {0x9499EDEC, 6, {0xE0067, 0xE0062, 0xE0073, 0xE0063, 0xE0074, 0xE007F}}, + {0x962A74D1, 1, {0x1F4A5}}, + {0x996BD75C, 6, {0xE0067, 0xE0062, 0xE0077, 0xE006C, 0xE0073, 0xE007F}}, + {0x99F2B752, 4, {0x2764, 0xFE0F, 0x1F48B, 0x1F468}}, + {0x9A52E21B, 1, {0x2620}}, + {0x9B6DABCE, 3, {0xFE0F, 0x26A7, 0xFE0F}}, + {0x9D1458D9, 2, {0x1F32B, 0xFE0F}}, + {0x9D33AAE5, 4, {0x2764, 0xFE0F, 0x1F48B, 0x1F469}}, + {0x9F647327, 1, {0x1FAF2}}, + {0xA0DE8B8D, 2, {0x1F468, 0x1F467}}, + {0xA38B28DE, 2, {0x27A1, 0xFE0F}}, + {0xA41F963A, 2, {0x1F468, 0x1F466}}, + {0xA4D50120, 1, {0x1F9AF}}, + {0xA524171C, 1, {0x1F3A4}}, + {0xA59E4F46, 2, {0x2708, 0xFE0F}}, + {0xA7E7F202, 1, {0x1F4A8}}, + {0xA818439D, 3, {0x2642, 0xFE0F, 0x27A1}}, + {0xA9EDDAD6, 1, {0x1F3EB}}, + {0xAAA4D4DB, 1, {0x1F4AB}}, + {0xAADD10BE, 2, {0xFE0F, 0x1F4A5}}, + {0xAAF80CF0, 1, {0x1F373}}, + {0xABEA84B5, 3, {0x1F468, 0x1F466, 0x1F466}}, + {0xAFB3FA54, 1, {0x1F33E}}, + {0xB36B9764, 1, {0x1F3ED}}, + {0xB715C37F, 3, {0x1F9AF, 0x27A1, 0xFE0F}}, + {0xB93E3755, 1, {0x1F680}}, + {0xB9CF624F, 6, {0xE0067, 0xE0062, 0xE0065, 0xE006E, 0xE0067, 0xE007F}}, + {0xBD903BBF, 3, {0x1F9BC, 0x27A1, 0xFE0F}}, + {0xBFFE1D2B, 2, {0x2640, 0x27A1}}, + {0xC222DF26, 1, {0x1F467}}, + {0xC249C89D, 2, {0x2744, 0xFE0F}}, + {0xC3F70111, 2, {0x2696, 0xFE0F}}, + {0xC6E3C291, 1, {0x1F466}}, + {0xD08B67ED, 1, {0x1F9B0}}, + {0xD1EE369F, 2, {0xFE0F, 0x26A7}}, + {0xD44A7A5A, 1, {0x1F9B1}}, + {0xD9095C83, 1, {0x1F9B2}}, + {0xD9DD8D9B, 2, {0x1F91D, 0x1F468}}, + {0xDA2EF02C, 2, {0x2764, 0x1F9D1}}, + {0xDD1C902C, 2, {0x1F91D, 0x1F469}}, + {0xDD2358A8, 1, {0x1FA79}}, + {0xDDC84134, 1, {0x1F9B3}}, + {0xDEE2E232, 2, {0x1F430, 0x1F9D1}}, + {0xE146E13E, 1, {0x1F9BD}}, + {0xE1D43CC7, 2, {0xFE0F, 0x1FA79}}, + {0xE268AE7F, 3, {0x2764, 0xFE0F, 0x1F468}}, + {0xE2E7E338, 3, {0x1F468, 0x1F467, 0x1F466}}, + {0xE54BC011, 1, {0x1F7E9}}, + {0xE587FC89, 1, {0x1F9BC}}, + {0xE626FE8F, 3, {0x1F468, 0x1F467, 0x1F467}}, + {0xE6A9B3C8, 3, {0x2764, 0xFE0F, 0x1F469}}, + {0xE6B50FAB, 1, {0x1F4BB}}, + {0xE7485CE4, 2, {0x1F9D2, 0x1F9D2}}, + {0xE99742FF, 2, {0x1FAEF, 0x1F469}}, + {0xE9D3EC00, 2, {0x1F469, 0x1F467}}, + {0xECC9FB7F, 1, {0x1F7EB}}, + {0xED12F1B7, 2, {0x1F469, 0x1F466}}, + {0xED1952F0, 1, {0x26A7}}, + {0xED565F48, 2, {0x1FAEF, 0x1F468}}, + {0xEDCDEC37, 3, {0x2764, 0x1F48B, 0x1F9D1}}, + {0xF0B72869, 4, {0x2640, 0xFE0F, 0x27A1, 0xFE0F}}, + {0xF22298C9, 3, {0x2640, 0x27A1, 0xFE0F}}, + {0xF4674A4F, 1, {0x1F32B}}, + {0xF660789F, 1, {0x2708}}, + {0xF7EA938C, 1, {0x2744}}, + {0xF8F25FAE, 1, {0x1F4BC}}, + {0xF9954666, 2, {0x2640, 0xFE0F}}, + {0xFAAF4B8D, 3, {0xFE0F, 0x2642, 0xFE0F}}, + {0xFC3A4497, 2, {0x2620, 0xFE0F}}, + {0xFCADD74B, 1, {0x1F692}}, + {0xFF01B13B, 1, {0x1F9BA}}, + {0xFFEAA8A7, 1, {0x1FA70}}, +}; + +#endif /* EMOJI_HASH_TABLE_H */