nostrdb

an unfairly fast embedded nostr database backed by lmdb
git clone git://jb55.com/nostrdb
Log | Files | Refs | Submodules | README | LICENSE

commit 53313c747a9399bb9ef2aa03f1bbe13f28a4fa58
parent 86ae238edc913f825359bd9f9da0b0682c114337
Author: William Casarin <jb55@jb55.com>
Date:   Fri, 24 Oct 2025 11:16:04 -0700

metadata: add initial metadata table api

Signed-off-by: William Casarin <jb55@jb55.com>

Diffstat:
MMakefile | 2+-
Asrc/metadata.c | 296+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/nostrdb.c | 64+++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
Msrc/nostrdb.h | 70+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
4 files changed, 423 insertions(+), 9 deletions(-)

diff --git a/Makefile b/Makefile @@ -5,7 +5,7 @@ CCAN_HDRS := ccan/ccan/utf8/utf8.h ccan/ccan/container_of/container_of.h ccan/cc HEADERS = deps/lmdb/lmdb.h deps/secp256k1/include/secp256k1.h src/nostrdb.h src/cursor.h src/hex.h src/jsmn.h src/config.h src/random.h src/memchr.h src/cpu.h src/nostr_bech32.h src/block.h src/str_block.h src/print_util.h $(C_BINDINGS) $(CCAN_HDRS) $(BOLT11_HDRS) FLATCC_SRCS=deps/flatcc/src/runtime/json_parser.c deps/flatcc/src/runtime/verifier.c deps/flatcc/src/runtime/builder.c deps/flatcc/src/runtime/emitter.c deps/flatcc/src/runtime/refmap.c BOLT11_SRCS = src/bolt11/bolt11.c src/bolt11/bech32.c src/bolt11/amount.c src/bolt11/hash_u5.c -SRCS = src/nostrdb.c src/invoice.c src/nostr_bech32.c src/content_parser.c src/block.c src/binmoji.c $(BOLT11_SRCS) $(FLATCC_SRCS) $(CCAN_SRCS) +SRCS = src/nostrdb.c src/invoice.c src/nostr_bech32.c src/content_parser.c src/block.c src/binmoji.c src/metadata.c $(BOLT11_SRCS) $(FLATCC_SRCS) $(CCAN_SRCS) LDS = $(OBJS) $(ARS) OBJS = $(SRCS:.c=.o) DEPS = $(OBJS) $(HEADERS) $(ARS) diff --git a/src/metadata.c b/src/metadata.c @@ -0,0 +1,296 @@ + +#include "nostrdb.h" +#include "binmoji.h" + +// these must be byte-aligned, they are directly accessing the serialized data +// representation +#pragma pack(push, 1) + +// 16 bytes +struct ndb_note_meta_entry { + // 4 byte entry header + uint16_t type; + uint16_t flags; + + // additional 4 bytes of aux storage for payloads that are >8 bytes + // + // for reactions types, this is used for counts + // normally this would have been padding but we make use of it + // in our manually packed structure + uint32_t aux; + + // 8 byte metadata payload + union { + uint64_t value; + + struct { + uint32_t offset; + uint32_t padding; + } offset; + + // the reaction binmoji[1] for reaction, count is stored in aux + union ndb_reaction_str reaction_str; + } payload; +}; +STATIC_ASSERT(sizeof(struct ndb_note_meta_entry) == 16, note_meta_entry_should_be_16_bytes); + +/* newtype wrapper around the header entry */ +struct ndb_note_meta { + // 4 bytes + uint8_t version; + uint8_t padding; + uint16_t count; + + // 4 bytes + uint32_t data_table_size; + + // 8 bytes + uint64_t flags; +}; +STATIC_ASSERT(sizeof(struct ndb_note_meta) == 16, note_meta_entry_should_be_16_bytes); + +#pragma pack(pop) + +int ndb_reaction_str_is_emoji(union ndb_reaction_str str) +{ + return binmoji_get_user_flag(str.binmoji) == 0; +} + +uint16_t ndb_note_meta_entries_count(struct ndb_note_meta *meta) +{ + return meta->count; +} + +static int ndb_reaction_set_emoji(union ndb_reaction_str *str, const char *emoji) +{ + struct binmoji binmoji; + /* TODO: parse failures? */ + binmoji_parse(emoji, &binmoji); + str->binmoji = binmoji_encode(&binmoji); + return 1; +} + +static int ndb_reaction_set_str(union ndb_reaction_str *reaction, const char *str) +{ + int i; + char c; + + /* this is like memset'ing the packed string to all 0s as well */ + reaction->binmoji = 0; + + /* set the binmoji user flag so we can catch corrupt binmojis */ + /* this is in the LSB so it will only touch reaction->packed.flag */ + reaction->binmoji = binmoji_set_user_flag(reaction->binmoji, 1); + assert(reaction->packed.flag != 0); + + for (i = 0; i < 7; i++) { + c = str[i]; + /* string is too big */ + if (i == 6 && c != '\0') + return 0; + reaction->packed.str[i] = c; + if (c == '\0') + return 1; + } + + return 0; +} + +/* set the value of an ndb_reaction_str to an emoji or small string */ +int ndb_reaction_set(union ndb_reaction_str *reaction, const char *str) +{ + struct binmoji binmoji; + char output_emoji[136]; + + /* our variant of emoji detection is to simply try to create + * a binmoji and parse it again. if we round-trip successfully + * then we know its an emoji, or at least a simple string + */ + binmoji_parse(str, &binmoji); + reaction->binmoji = binmoji_encode(&binmoji); + binmoji_to_string(&binmoji, output_emoji, sizeof(output_emoji)); + + /* round trip is successful, let's just use binmojis for this encoding */ + if (!strcmp(output_emoji, str)) + return 1; + + /* no round trip? let's just set a non-emoji string */ + return ndb_reaction_set_str(reaction, str); +} + +static void ndb_note_meta_header_init(struct ndb_note_meta *meta) +{ + meta->version = 1; + meta->flags = 0; + meta->count = 0; + meta->data_table_size = 0; +} + +static inline size_t ndb_note_meta_entries_size(struct ndb_note_meta *meta) +{ + return (sizeof(struct ndb_note_meta_entry) * meta->count); +} + +void *ndb_note_meta_data_table(struct ndb_note_meta *meta, size_t *size) +{ + return meta + ndb_note_meta_entries_size(meta); +} + +size_t ndb_note_meta_total_size(struct ndb_note_meta *header) +{ + size_t total_size = sizeof(*header) + header->data_table_size + ndb_note_meta_entries_size(header); + assert((total_size % 8) == 0); + return total_size; +} + +struct ndb_note_meta_entry *ndb_note_meta_add_entry(struct ndb_note_meta_builder *builder) +{ + struct ndb_note_meta *header = (struct ndb_note_meta *)builder->cursor.start; + struct ndb_note_meta_entry *entry = NULL; + + assert(builder->cursor.p != builder->cursor.start); + + if (!(entry = cursor_malloc(&builder->cursor, sizeof(*entry)))) + return NULL; + + /* increase count entry count */ + header->count++; + + return entry; +} + +int ndb_note_meta_builder_init(struct ndb_note_meta_builder *builder, unsigned char *buf, size_t bufsize) +{ + make_cursor(buf, buf + bufsize, &builder->cursor); + + /* allocate some space for the header */ + if (!cursor_malloc(&builder->cursor, sizeof(struct ndb_note_meta))) + return 0; + + ndb_note_meta_header_init((struct ndb_note_meta*)builder->cursor.start); + + return 1; +} + +/* note flags are stored in the header entry */ +uint32_t ndb_note_meta_flags(struct ndb_note_meta *meta) +{ + return meta->flags; +} + +/* note flags are stored in the header entry */ +void ndb_note_meta_set_flags(struct ndb_note_meta *meta, uint32_t flags) +{ + meta->flags = flags; +} + +static int compare_entries(const void *a, const void *b) +{ + struct ndb_note_meta_entry *entry_a, *entry_b; + uint64_t binmoji_a, binmoji_b; + int res; + + entry_a = (struct ndb_note_meta_entry *)a; + entry_b = (struct ndb_note_meta_entry *)b; + + res = entry_a->type - entry_b->type; + + if (res == 0 && entry_a->type == NDB_NOTE_META_REACTION) { + /* we sort by reaction string for stability */ + binmoji_a = entry_a->payload.reaction_str.binmoji; + binmoji_b = entry_b->payload.reaction_str.binmoji; + + if (binmoji_a < binmoji_b) { + return -1; + } else if (binmoji_a > binmoji_b) { + return 1; + } else { + return 0; + } + } else { + return res; + } +} + +struct ndb_note_meta_entry *ndb_note_meta_entries(struct ndb_note_meta *meta) +{ + /* entries start at the end of the header record */ + return (struct ndb_note_meta_entry *)((uint8_t*)meta + sizeof(*meta)); +} + +void ndb_note_meta_build(struct ndb_note_meta_builder *builder, struct ndb_note_meta **meta) +{ + /* sort entries */ + struct ndb_note_meta_entry *entries; + struct ndb_note_meta *header = (struct ndb_note_meta*)builder->cursor.start; + + /* not initialized */ + assert(builder->cursor.start != builder->cursor.p); + + if (header->count > 0) { + entries = ndb_note_meta_entries(header); + + /* ensure entries are always sorted so bsearch is possible for large metadata + * entries. probably won't need that for awhile though */ + qsort(entries, header->count, sizeof(struct ndb_note_meta_entry), compare_entries); + } + + *meta = header; + return; +} + +/* find a metadata entry, optionally matching a payload */ +struct ndb_note_meta_entry *ndb_note_meta_find_entry(struct ndb_note_meta *meta, uint16_t type, uint64_t *payload) +{ + struct ndb_note_meta_entry *entries, *entry; + int i; + + if (meta->count == 0) + return NULL; + + entries = ndb_note_meta_entries(meta); + + for (i = 0; i < meta->count; i++) { + entry = &entries[i]; + if (entry->type != type) + continue; + if (payload && *payload != entry->payload.value) + continue; + return entry; + } + + return NULL; +} + +void ndb_note_meta_reaction_set(struct ndb_note_meta_entry *entry, uint32_t count, union ndb_reaction_str str) +{ + entry->type = NDB_NOTE_META_REACTION; + entry->flags = 0; + entry->aux = count; + entry->payload.reaction_str = str; +} + +/* sets the quote repost count for this note */ +void ndb_note_meta_quotes_set(struct ndb_note_meta_entry *entry, uint32_t count) +{ + entry->type = NDB_NOTE_META_QUOTES; + entry->flags = 0; + entry->aux = count; + /* unused */ + entry->payload.value = 0; +} + +uint32_t ndb_note_meta_reaction_count(struct ndb_note_meta_entry *entry) +{ + return entry->aux; +} + +void ndb_note_meta_reaction_set_count(struct ndb_note_meta_entry *entry, uint32_t count) +{ + entry->aux = count; +} + +union ndb_reaction_str ndb_note_meta_reaction_str(struct ndb_note_meta_entry *entry) +{ + return entry->payload.reaction_str; +} diff --git a/src/nostrdb.c b/src/nostrdb.c @@ -142,6 +142,7 @@ enum ndb_writer_msgtype { NDB_WRITER_BLOCKS, // write parsed note blocks NDB_WRITER_MIGRATE, // migrate the database NDB_WRITER_NOTE_RELAY, // we already have the note, but we have more relays to write + NDB_WRITER_NOTE_META, // write note metadata to the db }; // keys used for storing data in the NDB metadata database (NDB_DB_NDB_META) @@ -2213,6 +2214,11 @@ struct ndb_writer_ndb_meta { uint64_t version; }; +struct ndb_writer_note_meta { + unsigned char note_id[32]; + struct ndb_note_meta *metadata; +}; + // Used in the writer thread when writing ndb_profile_fetch_record's // kv = pubkey: recor struct ndb_writer_last_fetch { @@ -2237,6 +2243,7 @@ struct ndb_writer_msg { struct ndb_writer_ndb_meta ndb_meta; struct ndb_writer_last_fetch last_fetch; struct ndb_writer_blocks blocks; + struct ndb_writer_note_meta note_meta; }; }; @@ -3334,6 +3341,42 @@ static unsigned char *ndb_note_last_id_tag(struct ndb_note *note, char type) return last; } +int ndb_set_note_meta(struct ndb *ndb, const unsigned char *id, struct ndb_note_meta *meta) +{ + struct ndb_writer_msg msg; + struct ndb_writer_note_meta *meta_msg = &msg.note_meta; + + msg.type = NDB_WRITER_NOTE_META; + + memcpy(meta_msg->note_id, id, 32); + meta_msg->metadata = meta; + + return ndb_writer_queue_msg(&ndb->writer, &msg); +} + +int ndb_writer_set_note_meta(struct ndb_txn *txn, const unsigned char *id, struct ndb_note_meta *meta) +{ + int rc; + MDB_val k, v; + MDB_dbi note_meta_db; + + // get dbs + note_meta_db = txn->lmdb->dbs[NDB_DB_META]; + + k.mv_data = (unsigned char *)id; + k.mv_size = 32; + + v.mv_data = (unsigned char *)meta; + v.mv_size = ndb_note_meta_total_size(meta); + + if ((rc = mdb_put(txn->mdb_txn, note_meta_db, &k, &v, 0))) { + ndb_debug("ndb_set_note_meta: write note metadata to db failed: %s\n", mdb_strerror(rc)); + return 0; + } + + return 1; +} + void *ndb_get_note_meta(struct ndb_txn *txn, const unsigned char *id, size_t *len) { MDB_val k, v; @@ -5514,13 +5557,16 @@ static void *ndb_writer_thread(void *data) for (i = 0 ; i < popped; i++) { msg = &msgs[i]; switch (msg->type) { - case NDB_WRITER_NOTE: needs_commit = 1; break; - case NDB_WRITER_PROFILE: needs_commit = 1; break; - case NDB_WRITER_DBMETA: needs_commit = 1; break; - case NDB_WRITER_PROFILE_LAST_FETCH: needs_commit = 1; break; - case NDB_WRITER_BLOCKS: needs_commit = 1; break; - case NDB_WRITER_MIGRATE: needs_commit = 1; break; - case NDB_WRITER_NOTE_RELAY: needs_commit = 1; break; + case NDB_WRITER_NOTE: + case NDB_WRITER_NOTE_META: + case NDB_WRITER_PROFILE: + case NDB_WRITER_DBMETA: + case NDB_WRITER_PROFILE_LAST_FETCH: + case NDB_WRITER_BLOCKS: + case NDB_WRITER_MIGRATE: + case NDB_WRITER_NOTE_RELAY: + needs_commit = 1; + break; case NDB_WRITER_QUIT: break; } } @@ -5561,6 +5607,10 @@ static void *ndb_writer_thread(void *data) ndb_debug("failed to write note\n"); } break; + case NDB_WRITER_NOTE_META: + ndb_writer_set_note_meta(&txn, msg->note_meta.note_id, msg->note_meta.metadata); + break; + case NDB_WRITER_NOTE: note_nkey = ndb_write_note(&txn, &msg->note, scratch, diff --git a/src/nostrdb.h b/src/nostrdb.h @@ -6,6 +6,10 @@ #include "win.h" #include "cursor.h" +/* static assert helper */ +#define STATIC_ASSERT(cond, msg) typedef char static_assert_##msg[(cond) ? 1 : -1] +/* #define STATIC_ASSERT(cond, msg) */ + // maximum number of filters allowed in a filter group #define NDB_PACKED_STR 0x1 #define NDB_PACKED_ID 0x2 @@ -34,9 +38,21 @@ struct ndb_note; struct ndb_tag; struct ndb_tags; struct ndb_lmdb; +struct ndb_note_meta; +struct ndb_note_meta_entry; +struct ndb_note_meta_builder; union ndb_packed_str; struct bolt11; +/* Types, standard types are multiplied by 2, since odd types are user defined. + * We explicitly multiply by two in the enum to be unambiguous + */ +enum ndb_metadata_type { + NDB_NOTE_META_RESERVED = 0, /* not used */ + NDB_NOTE_META_REACTION = 2, /* count of all the reactions on a post, grouped by different reaction strings */ + NDB_NOTE_META_QUOTES = 4, /* count of all the all the notes with q tag references to this note */ +}; + // some bindings like swift needs help with forward declared pointers struct ndb_tag_ptr { struct ndb_tag *ptr; }; struct ndb_tags_ptr { struct ndb_tags *ptr; }; @@ -48,6 +64,33 @@ struct ndb_t { struct ndb *ndb; }; +/* Compact reaction strings for the metadata table. + * + * We compact all emojis into 64bits using binmojis (github.com/jb55/binmoji) + * + * 6-byte non-emoji strings are also supported in the same memory layout. This + * is achieved by reserving the LSB byte of the compact string, which overlaps + * with the binmojis user flag bit. If this flag is set, then we know its not + * really a bitmoji, its a compact string. + */ +union ndb_reaction_str { + uint64_t binmoji; + struct { + /* flag is at LSB, which aligns with our binmoji user flag */ + uint8_t flag; + char str[7]; + } packed; +}; +STATIC_ASSERT(sizeof(union ndb_reaction_str) == 8, reaction_string_must_be_8_bytes); + +/* An ndb_note_meta builder. Maintains a cursor of a fixed sized buffer while adding + * metadata entries. + * + */ +struct ndb_note_meta_builder { + struct cursor cursor; +}; + struct ndb_str { // NDB_PACKED_STR, NDB_PACKED_ID unsigned char flag; @@ -241,6 +284,14 @@ struct ndb_note_relay_iterator { void *mdb_cur; }; +struct ndb_note_meta_iterator { + struct ndb_note_meta *header; + struct ndb_note_meta *cur; + + // current outer index + int index; +}; + struct ndb_iterator { struct ndb_note *note; struct ndb_tag *tag; @@ -527,7 +578,6 @@ uint64_t ndb_get_notekey_by_id(struct ndb_txn *txn, const unsigned char *id); uint64_t ndb_get_profilekey_by_pubkey(struct ndb_txn *txn, const unsigned char *id); struct ndb_note *ndb_get_note_by_id(struct ndb_txn *txn, const unsigned char *id, size_t *len, uint64_t *primkey); struct ndb_note *ndb_get_note_by_key(struct ndb_txn *txn, uint64_t key, size_t *len); -void *ndb_get_note_meta(struct ndb_txn *txn, const unsigned char *id, size_t *len); int ndb_note_seen_on_relay(struct ndb_txn *txn, uint64_t note_key, const char *relay); void ndb_destroy(struct ndb *); @@ -603,6 +653,24 @@ void ndb_text_search_config_set_limit(struct ndb_text_search_config *, int limit // QUERY int ndb_query(struct ndb_txn *txn, struct ndb_filter *filters, int num_filters, struct ndb_query_result *results, int result_capacity, int *count); +// NOTE METADATA +void *ndb_get_note_meta(struct ndb_txn *txn, const unsigned char *id, size_t *len); +int ndb_set_note_meta(struct ndb *ndb, const unsigned char *id, struct ndb_note_meta *meta); +size_t ndb_note_meta_total_size(struct ndb_note_meta *header); +int ndb_note_meta_builder_init(struct ndb_note_meta_builder *builder, unsigned char *, size_t); +void ndb_note_meta_build(struct ndb_note_meta_builder *builder, struct ndb_note_meta **meta); +uint16_t ndb_note_meta_entries_count(struct ndb_note_meta *meta); +struct ndb_note_meta_entry *ndb_note_meta_entries(struct ndb_note_meta *meta); +struct ndb_note_meta_entry *ndb_note_meta_add_entry(struct ndb_note_meta_builder *builder); +size_t ndb_note_meta_total_size(struct ndb_note_meta *meta); +void ndb_note_meta_reaction_set(struct ndb_note_meta_entry *entry, uint32_t count, union ndb_reaction_str str); +void ndb_note_meta_quotes_set(struct ndb_note_meta_entry *entry, uint32_t count); +uint32_t ndb_note_meta_reaction_count(struct ndb_note_meta_entry *entry); + +// META STRINGS +int ndb_reaction_set(union ndb_reaction_str *reaction, const char *str); +int ndb_reaction_str_is_emoji(union ndb_reaction_str); + // STATS int ndb_stat(struct ndb *ndb, struct ndb_stat *stat); void ndb_stat_counts_init(struct ndb_stat_counts *counts);