nostrdb

an unfairly fast embedded nostr database backed by lmdb
git clone git://jb55.com/nostrdb
Log | Files | Refs | README | LICENSE

commit 281b363654a4ca5be6332b446e66795015118dab
Author: William Casarin <jb55@jb55.com>
Date:   Wed, 19 Jul 2023 12:10:28 -0700

nostrdb: the unfairly fast nostr database

Diffstat:
A.gitignore | 4++++
ACOPYING | 1+
ALICENSE | 1+
AMakefile | 16++++++++++++++++
AREADME.md | 11+++++++++++
Acursor.h | 125+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anostr_index.fbs | 32++++++++++++++++++++++++++++++++
Anostrdb.c | 144+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anostrdb.h | 172+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest.c | 68++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
10 files changed, 574 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1,4 @@ +tags +test +.buildcmd +.build-result diff --git a/COPYING b/COPYING @@ -0,0 +1 @@ +MIT diff --git a/LICENSE b/LICENSE @@ -0,0 +1 @@ +MIT diff --git a/Makefile b/Makefile @@ -0,0 +1,16 @@ + +CFLAGS = -Wall + +check: test + ./test + +tags: + ctags *.c *.h + +test: test.c nostrdb.c nostrdb.h + $(CC) test.c nostrdb.c -o $@ + +%.o: %.c + $(CC) $(CFLAGS) + +.PHONY: tags diff --git a/README.md b/README.md @@ -0,0 +1,11 @@ + +# nostrdb + +The unfairly fast nostr database backed by lmdb. + +nostrdb stores nostr events as a custom in-memory representation that enables +zero-copy and O(1) access to all note fields. This is similar to flatbuffers +but it is custom built for nostr events. + +These events are then memory-mapped inside lmdb, enabling insanely fast, +zero-copy access and querying. diff --git a/cursor.h b/cursor.h @@ -0,0 +1,125 @@ + +#ifndef JB55_CURSOR_H +#define JB55_CURSOR_H + +//#include <ctype.h> +//#include <assert.h> + +#include <string.h> +#include <ctype.h> + +#define unlikely(x) __builtin_expect((x),0) +#define likely(x) __builtin_expect((x),1) + +struct cursor { + unsigned char *start; + unsigned char *p; + unsigned char *end; +}; + +static inline void make_cursor(unsigned char *start, unsigned char *end, struct cursor *cursor) +{ + cursor->start = start; + cursor->p = start; + cursor->end = end; +} + +static inline int cursor_push_byte(struct cursor *cursor, unsigned char c) +{ + if (unlikely(cursor->p + 1 > cursor->end)) { + return 0; + } + + *cursor->p = c; + cursor->p++; + + return 1; +} + +static inline int cursor_push(struct cursor *cursor, unsigned char *data, int len) +{ + if (unlikely(cursor->p + len >= cursor->end)) { + return 0; + } + + if (cursor->p != data) + memcpy(cursor->p, data, len); + + cursor->p += len; + + return 1; +} + +static inline int cursor_push_str(struct cursor *cursor, const char *str) +{ + return cursor_push(cursor, (unsigned char*)str, (int)strlen(str)); +} + +static inline int cursor_push_c_str(struct cursor *cursor, const char *str) +{ + return cursor_push_str(cursor, str) && cursor_push_byte(cursor, 0); +} + +static inline void *cursor_malloc(struct cursor *mem, unsigned long size) +{ + void *ret; + + if (mem->p + size > mem->end) { + return 0; + } + + ret = mem->p; + mem->p += size; + + return ret; +} + +static inline int cursor_slice(struct cursor *mem, struct cursor *slice, size_t size) +{ + unsigned char *p; + if (!(p = cursor_malloc(mem, size))) { + return 0; + } + make_cursor(p, mem->p, slice); + return 1; +} + +static inline size_t cursor_count(struct cursor *cursor, size_t elem_size) { + return (cursor->p - cursor->start)/elem_size; +} + +static inline int cursor_push_u32(struct cursor *cursor, uint32_t i) { + return cursor_push(cursor, (unsigned char*)&i, sizeof(i)); +} + +static inline int cursor_push_u16(struct cursor *cursor, uint16_t i) { + return cursor_push(cursor, (unsigned char*)&i, sizeof(i)); +} + +#define max(a,b) ((a) > (b) ? (a) : (b)) +#include <stdio.h> +static inline void cursor_print_around(struct cursor *cur, int range) +{ + unsigned char *c; + + printf("[%ld/%ld]\n", cur->p - cur->start, cur->end - cur->start); + + c = max(cur->p - range, cur->start); + for (; c < cur->end && c < (cur->p + range); c++) { + printf("%02x", *c); + } + printf("\n"); + + c = max(cur->p - range, cur->start); + for (; c < cur->end && c < (cur->p + range); c++) { + if (c == cur->p) { + printf("^"); + continue; + } + printf(" "); + } + printf("\n"); +} +#undef max + +#endif diff --git a/nostr_index.fbs b/nostr_index.fbs @@ -0,0 +1,32 @@ +namespace NostrIndex; + +struct Fixed32Bytes { + val: [ubyte:32]; +} + +struct Fixed64Bytes { + val: [ubyte:64]; +} + +table TagGeneral { + key: uint8; + val: [ubyte]; +} + +table TagFixed32 { + key: uint8; + val: Fixed32Bytes; +} + +table Event { + id: Fixed32Bytes; + pubkey: Fixed32Bytes; + created_at: uint64; + kind: uint64; + tagsGeneral: [TagGeneral]; + tagsFixed32: [TagFixed32]; + signature: Fixed64Bytes; +} + +table Empty {} +root_type Empty; diff --git a/nostrdb.c b/nostrdb.c @@ -0,0 +1,144 @@ + +#include "nostrdb.h" +#include <stdlib.h> + +int ndb_builder_new(struct ndb_builder *builder, int *bufsize) { + struct ndb_note *note; + struct cursor mem; + // 1MB + 0.5MB + builder->size = 1024 * 1024; + if (bufsize) + builder->size = *bufsize; + + int str_indices_size = sizeof(uint32_t) * (2<<14); + unsigned char *bytes = malloc(builder->size + str_indices_size); + if (!bytes) return 0; + + make_cursor(bytes, bytes + builder->size + str_indices_size, &mem); + + note = builder->note = (struct ndb_note *)bytes; + size_t half = builder->size >> 1; + + if (!cursor_slice(&mem, &builder->note_cur, half)) return 0; + if (!cursor_slice(&mem, &builder->strings, half)) return 0; + if (!cursor_slice(&mem, &builder->str_indices, str_indices_size)) return 0; + + memset(note, 0, sizeof(*note)); + builder->note_cur.p += sizeof(*note); + + note->version = 1; + + return 1; +} + +int ndb_builder_finalize(struct ndb_builder *builder, struct ndb_note **note) { + int strings_len = builder->strings.p - builder->strings.start; + unsigned char *end = builder->note_cur.p + strings_len; + int total_size = end - builder->note_cur.start; + + // move the strings buffer next to the end of our ndb_note + memmove(builder->note_cur.p, builder->strings.start, strings_len); + + // set the strings location + builder->note->strings = builder->note_cur.p - builder->note_cur.start; + + // remove any extra data + *note = realloc(builder->note_cur.start, total_size); + + return total_size; +} + +struct ndb_note *ndb_builder_note(struct ndb_builder *builder) { + return builder->note; +} + +int ndb_builder_make_string(struct ndb_builder *builder, const char *str, union packed_str *pstr) { + int len = strlen(str); + uint32_t loc; + + if (len == 0) { + *pstr = ndb_char_to_packed_str(0); + return 1; + } else if (len == 1) { + *pstr = ndb_char_to_packed_str(str[0]); + return 1; + } else if (len == 2) { + *pstr = ndb_chars_to_packed_str(str[0], str[1]); + return 1; + } + + // find existing matching string to avoid duplicate strings + int indices = cursor_count(&builder->str_indices, sizeof(uint32_t)); + for (int i = 0; i < indices; i++) { + uint32_t index = ((uint32_t*)builder->str_indices.start)[i]; + const char *some_str = (const char*)builder->strings.start + index; + + if (!strcmp(some_str, str)) { + // found an existing matching str, use that index + *pstr = ndb_offset_str(index); + return 1; + } + } + + // no string found, push a new one + loc = builder->strings.p - builder->strings.start; + if (!(cursor_push(&builder->strings, (unsigned char*)str, len) && + cursor_push_byte(&builder->strings, '\0'))) { + return 0; + } + *pstr = ndb_offset_str(loc); + + // record in builder indices. ignore return value, if we can't cache it + // then whatever + cursor_push_u32(&builder->str_indices, loc); + + return 1; +} + +int ndb_builder_set_content(struct ndb_builder *builder, const char *content) { + return ndb_builder_make_string(builder, content, &builder->note->content); +} + +void ndb_builder_set_pubkey(struct ndb_builder *builder, unsigned char *pubkey) { + memcpy(builder->note->pubkey, pubkey, 32); +} + +void ndb_builder_set_id(struct ndb_builder *builder, unsigned char *id) { + memcpy(builder->note->id, id, 32); +} + +void ndb_builder_set_signature(struct ndb_builder *builder, unsigned char *signature) { + memcpy(builder->note->signature, signature, 64); +} + +void ndb_builder_set_kind(struct ndb_builder *builder, uint32_t kind) { + builder->note->kind = kind; +} + +static inline int cursor_push_tag(struct cursor *cur, struct ndb_tag *tag) { + return cursor_push_u16(cur, tag->count); +} + +int ndb_builder_add_tag(struct ndb_builder *builder, const char **strs, uint16_t num_strs) { + int i; + union packed_str pstr; + const char *str; + struct ndb_tag tag; + + builder->note->tags.count++; + tag.count = num_strs; + + if (!cursor_push_tag(&builder->note_cur, &tag)) + return 0; + + for (i = 0; i < num_strs; i++) { + str = strs[i]; + if (!ndb_builder_make_string(builder, str, &pstr)) + return 0; + if (!cursor_push_u32(&builder->note_cur, pstr.offset)) + return 0; + } + + return 1; +} + diff --git a/nostrdb.h b/nostrdb.h @@ -0,0 +1,172 @@ +#ifndef NOSTRDB_H +#define NOSTRDB_H + +#include <inttypes.h> +#include "cursor.h" + +// these must be byte-aligned, they are directly accessing the serialized data +// representation +#pragma pack(push, 1) + +// we assume little endian everywhere. sorry not sorry. +union packed_str { + uint32_t offset; + + struct { + char str[3]; + unsigned char flag; + } packed; + + unsigned char bytes[4]; +}; + +struct ndb_tag { + uint16_t count; + union packed_str strs[0]; +}; + +struct ndb_tags { + uint16_t count; + struct ndb_tag tag[0]; +}; + +// v1 +struct ndb_note { + unsigned char version; // v=1 + unsigned char padding[3]; // keep things aligned + unsigned char id[32]; + unsigned char pubkey[32]; + unsigned char signature[64]; + + uint32_t created_at; + uint32_t kind; + union packed_str content; + uint32_t strings; + uint32_t json; + + // nothing can come after tags since it contains variadic data + struct ndb_tags tags; +}; + +#pragma pack(pop) + +struct ndb_builder { + struct cursor note_cur; + struct cursor strings; + struct cursor str_indices; + struct ndb_note *note; + int size; +}; + +struct ndb_iterator { + struct ndb_note *note; + struct ndb_tag *tag; + + // current outer index + int index; +}; + +int ndb_builder_new(struct ndb_builder *builder, int *bufsize); +int ndb_builder_finalize(struct ndb_builder *builder, struct ndb_note **note); +int ndb_builder_set_content(struct ndb_builder *builder, const char *content); +void ndb_builder_set_signature(struct ndb_builder *builder, unsigned char *signature); +void ndb_builder_set_pubkey(struct ndb_builder *builder, unsigned char *pubkey); +void ndb_builder_set_id(struct ndb_builder *builder, unsigned char *id); +void ndb_builder_set_kind(struct ndb_builder *builder, uint32_t kind); +int ndb_builder_add_tag(struct ndb_builder *builder, const char **strs, uint16_t num_strs); + +static inline unsigned char *ndb_note_id(struct ndb_note *note) { + return note->id; +} + +static inline unsigned char *ndb_note_pubkey(struct ndb_note *note) { + return note->pubkey; +} + +static inline unsigned char *ndb_note_signature(struct ndb_note *note) { + return note->signature; +} + +static inline uint32_t ndb_note_created_at(struct ndb_note *note) { + return note->created_at; +} + +static inline int ndb_str_is_packed(union packed_str str) { + return (str.offset >> 31) & 0x1; +} + +static inline struct ndb_note *ndb_note_from_bytes(unsigned char *bytes) { + struct ndb_note *note = (struct ndb_note *)bytes; + if (note->version != 1) + return 0; + return note; +} + +static inline union packed_str ndb_offset_str(uint32_t offset) { + // ensure accidents like -1 don't corrupt our packed_str + union packed_str str; + str.offset = offset & 0x7FFFFFFF; + return str; +} + +static inline union packed_str +ndb_char_to_packed_str(char c) { + union packed_str str; + str.packed.flag = 0xFF; + str.packed.str[0] = c; + str.packed.str[1] = '\0'; + return str; +} + +static inline union packed_str +ndb_chars_to_packed_str(char c1, char c2) { + union packed_str str; + str.packed.flag = 0xFF; + str.packed.str[0] = c1; + str.packed.str[1] = c2; + str.packed.str[2] = '\0'; + return str; +} + +static inline const char * +ndb_note_string(struct ndb_note *note, union packed_str *str) { + if (ndb_str_is_packed(*str)) + return str->packed.str; + + return ((const char *)note) + note->strings + str->offset; +} + +static inline const char * +ndb_note_tag_index(struct ndb_note *note, struct ndb_tag *tag, int index) { + if (index >= tag->count) { + return 0; + } + + return ndb_note_string(note, &tag->strs[index]); +} + +static inline int +ndb_tags_iterate_start(struct ndb_note *note, struct ndb_iterator *iter) { + uint16_t count = note->tags.count; + + iter->note = note; + iter->tag = note->tags.tag; + iter->index = 0; + + return iter->tag->count != 0; +} + +static inline int +ndb_tags_iterate_next(struct ndb_iterator *iter) { + struct ndb_tags *tags = &iter->note->tags; + + if (++iter->index < tags->count) { + uint32_t tag_data_size = iter->tag->count * sizeof(iter->tag->strs[0]); + iter->tag = (struct ndb_tag *)(iter->tag->strs[0].bytes + tag_data_size); + return 1; + } + + return 0; +} + +#endif diff --git a/test.c b/test.c @@ -0,0 +1,68 @@ + +#include "nostrdb.h" + +#include <stdio.h> +#include <assert.h> + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + +int main(int argc, const char *argv[]) { + struct ndb_builder builder, *b = &builder; + struct ndb_note *note; + int len, ok; + + unsigned char id[32]; + memset(id, 1, 32); + + unsigned char pubkey[32]; + memset(pubkey, 2, 32); + + unsigned char sig[64]; + memset(sig, 3, 64); + + + const char *hex_pk = "5d9b81b2d4d5609c5565286fc3b511dc6b9a1b3d7d1174310c624d61d1f82bb9"; + const char *tag[] = { "p", hex_pk }; + const char *word_tag[] = { "word", "words", "w" }; + + ndb_builder_new(b, 0); + note = builder.note; + + memset(note->padding, 3, sizeof(note->padding)); + + ndb_builder_set_content(b, "hello, world!"); + ndb_builder_set_id(b, id); + ndb_builder_set_pubkey(b, pubkey); + ndb_builder_set_signature(b, sig); + ndb_builder_add_tag(b, tag, ARRAY_SIZE(tag)); + ndb_builder_add_tag(b, word_tag, ARRAY_SIZE(word_tag)); + + len = ndb_builder_finalize(b, &note); + + assert(note->tags.count == 2); + + // test iterator + struct ndb_iterator iter, *it = &iter; + + ok = ndb_tags_iterate_start(note, it); + assert(ok); + assert(it->tag->count == 2); + const char *p = ndb_note_string(note, &it->tag->strs[0]); + const char *hpk = ndb_note_string(note, &it->tag->strs[1]); + assert(hpk); + assert(!ndb_str_is_packed(it->tag->strs[1])); + assert(!strcmp(hpk, hex_pk)); + assert(!strcmp(p, "p")); + + ok = ndb_tags_iterate_next(it); + assert(ok); + assert(it->tag->count == 3); + assert(!strcmp(ndb_note_string(note, &it->tag->strs[0]), "word")); + assert(!strcmp(ndb_note_string(note, &it->tag->strs[1]), "words")); + assert(!strcmp(ndb_note_string(note, &it->tag->strs[2]), "w")); + + ok = ndb_tags_iterate_next(it); + assert(!ok); + + fwrite(note, len, 1, stdout); +}