commit 281b363654a4ca5be6332b446e66795015118dab
Author: William Casarin <jb55@jb55.com>
Date: Wed, 19 Jul 2023 12:10:28 -0700
nostrdb: the unfairly fast nostr database
Diffstat:
A | .gitignore | | | 4 | ++++ |
A | COPYING | | | 1 | + |
A | LICENSE | | | 1 | + |
A | Makefile | | | 16 | ++++++++++++++++ |
A | README.md | | | 11 | +++++++++++ |
A | cursor.h | | | 125 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | nostr_index.fbs | | | 32 | ++++++++++++++++++++++++++++++++ |
A | nostrdb.c | | | 144 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | nostrdb.h | | | 172 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | test.c | | | 68 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
10 files changed, 574 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,4 @@
+tags
+test
+.buildcmd
+.build-result
diff --git a/COPYING b/COPYING
@@ -0,0 +1 @@
+MIT
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1 @@
+MIT
diff --git a/Makefile b/Makefile
@@ -0,0 +1,16 @@
+
+CFLAGS = -Wall
+
+check: test
+ ./test
+
+tags:
+ ctags *.c *.h
+
+test: test.c nostrdb.c nostrdb.h
+ $(CC) test.c nostrdb.c -o $@
+
+%.o: %.c
+ $(CC) $(CFLAGS)
+
+.PHONY: tags
diff --git a/README.md b/README.md
@@ -0,0 +1,11 @@
+
+# nostrdb
+
+The unfairly fast nostr database backed by lmdb.
+
+nostrdb stores nostr events as a custom in-memory representation that enables
+zero-copy and O(1) access to all note fields. This is similar to flatbuffers
+but it is custom built for nostr events.
+
+These events are then memory-mapped inside lmdb, enabling insanely fast,
+zero-copy access and querying.
diff --git a/cursor.h b/cursor.h
@@ -0,0 +1,125 @@
+
+#ifndef JB55_CURSOR_H
+#define JB55_CURSOR_H
+
+//#include <ctype.h>
+//#include <assert.h>
+
+#include <string.h>
+#include <ctype.h>
+
+#define unlikely(x) __builtin_expect((x),0)
+#define likely(x) __builtin_expect((x),1)
+
+struct cursor {
+ unsigned char *start;
+ unsigned char *p;
+ unsigned char *end;
+};
+
+static inline void make_cursor(unsigned char *start, unsigned char *end, struct cursor *cursor)
+{
+ cursor->start = start;
+ cursor->p = start;
+ cursor->end = end;
+}
+
+static inline int cursor_push_byte(struct cursor *cursor, unsigned char c)
+{
+ if (unlikely(cursor->p + 1 > cursor->end)) {
+ return 0;
+ }
+
+ *cursor->p = c;
+ cursor->p++;
+
+ return 1;
+}
+
+static inline int cursor_push(struct cursor *cursor, unsigned char *data, int len)
+{
+ if (unlikely(cursor->p + len >= cursor->end)) {
+ return 0;
+ }
+
+ if (cursor->p != data)
+ memcpy(cursor->p, data, len);
+
+ cursor->p += len;
+
+ return 1;
+}
+
+static inline int cursor_push_str(struct cursor *cursor, const char *str)
+{
+ return cursor_push(cursor, (unsigned char*)str, (int)strlen(str));
+}
+
+static inline int cursor_push_c_str(struct cursor *cursor, const char *str)
+{
+ return cursor_push_str(cursor, str) && cursor_push_byte(cursor, 0);
+}
+
+static inline void *cursor_malloc(struct cursor *mem, unsigned long size)
+{
+ void *ret;
+
+ if (mem->p + size > mem->end) {
+ return 0;
+ }
+
+ ret = mem->p;
+ mem->p += size;
+
+ return ret;
+}
+
+static inline int cursor_slice(struct cursor *mem, struct cursor *slice, size_t size)
+{
+ unsigned char *p;
+ if (!(p = cursor_malloc(mem, size))) {
+ return 0;
+ }
+ make_cursor(p, mem->p, slice);
+ return 1;
+}
+
+static inline size_t cursor_count(struct cursor *cursor, size_t elem_size) {
+ return (cursor->p - cursor->start)/elem_size;
+}
+
+static inline int cursor_push_u32(struct cursor *cursor, uint32_t i) {
+ return cursor_push(cursor, (unsigned char*)&i, sizeof(i));
+}
+
+static inline int cursor_push_u16(struct cursor *cursor, uint16_t i) {
+ return cursor_push(cursor, (unsigned char*)&i, sizeof(i));
+}
+
+#define max(a,b) ((a) > (b) ? (a) : (b))
+#include <stdio.h>
+static inline void cursor_print_around(struct cursor *cur, int range)
+{
+ unsigned char *c;
+
+ printf("[%ld/%ld]\n", cur->p - cur->start, cur->end - cur->start);
+
+ c = max(cur->p - range, cur->start);
+ for (; c < cur->end && c < (cur->p + range); c++) {
+ printf("%02x", *c);
+ }
+ printf("\n");
+
+ c = max(cur->p - range, cur->start);
+ for (; c < cur->end && c < (cur->p + range); c++) {
+ if (c == cur->p) {
+ printf("^");
+ continue;
+ }
+ printf(" ");
+ }
+ printf("\n");
+}
+#undef max
+
+#endif
diff --git a/nostr_index.fbs b/nostr_index.fbs
@@ -0,0 +1,32 @@
+namespace NostrIndex;
+
+struct Fixed32Bytes {
+ val: [ubyte:32];
+}
+
+struct Fixed64Bytes {
+ val: [ubyte:64];
+}
+
+table TagGeneral {
+ key: uint8;
+ val: [ubyte];
+}
+
+table TagFixed32 {
+ key: uint8;
+ val: Fixed32Bytes;
+}
+
+table Event {
+ id: Fixed32Bytes;
+ pubkey: Fixed32Bytes;
+ created_at: uint64;
+ kind: uint64;
+ tagsGeneral: [TagGeneral];
+ tagsFixed32: [TagFixed32];
+ signature: Fixed64Bytes;
+}
+
+table Empty {}
+root_type Empty;
diff --git a/nostrdb.c b/nostrdb.c
@@ -0,0 +1,144 @@
+
+#include "nostrdb.h"
+#include <stdlib.h>
+
+int ndb_builder_new(struct ndb_builder *builder, int *bufsize) {
+ struct ndb_note *note;
+ struct cursor mem;
+ // 1MB + 0.5MB
+ builder->size = 1024 * 1024;
+ if (bufsize)
+ builder->size = *bufsize;
+
+ int str_indices_size = sizeof(uint32_t) * (2<<14);
+ unsigned char *bytes = malloc(builder->size + str_indices_size);
+ if (!bytes) return 0;
+
+ make_cursor(bytes, bytes + builder->size + str_indices_size, &mem);
+
+ note = builder->note = (struct ndb_note *)bytes;
+ size_t half = builder->size >> 1;
+
+ if (!cursor_slice(&mem, &builder->note_cur, half)) return 0;
+ if (!cursor_slice(&mem, &builder->strings, half)) return 0;
+ if (!cursor_slice(&mem, &builder->str_indices, str_indices_size)) return 0;
+
+ memset(note, 0, sizeof(*note));
+ builder->note_cur.p += sizeof(*note);
+
+ note->version = 1;
+
+ return 1;
+}
+
+int ndb_builder_finalize(struct ndb_builder *builder, struct ndb_note **note) {
+ int strings_len = builder->strings.p - builder->strings.start;
+ unsigned char *end = builder->note_cur.p + strings_len;
+ int total_size = end - builder->note_cur.start;
+
+ // move the strings buffer next to the end of our ndb_note
+ memmove(builder->note_cur.p, builder->strings.start, strings_len);
+
+ // set the strings location
+ builder->note->strings = builder->note_cur.p - builder->note_cur.start;
+
+ // remove any extra data
+ *note = realloc(builder->note_cur.start, total_size);
+
+ return total_size;
+}
+
+struct ndb_note *ndb_builder_note(struct ndb_builder *builder) {
+ return builder->note;
+}
+
+int ndb_builder_make_string(struct ndb_builder *builder, const char *str, union packed_str *pstr) {
+ int len = strlen(str);
+ uint32_t loc;
+
+ if (len == 0) {
+ *pstr = ndb_char_to_packed_str(0);
+ return 1;
+ } else if (len == 1) {
+ *pstr = ndb_char_to_packed_str(str[0]);
+ return 1;
+ } else if (len == 2) {
+ *pstr = ndb_chars_to_packed_str(str[0], str[1]);
+ return 1;
+ }
+
+ // find existing matching string to avoid duplicate strings
+ int indices = cursor_count(&builder->str_indices, sizeof(uint32_t));
+ for (int i = 0; i < indices; i++) {
+ uint32_t index = ((uint32_t*)builder->str_indices.start)[i];
+ const char *some_str = (const char*)builder->strings.start + index;
+
+ if (!strcmp(some_str, str)) {
+ // found an existing matching str, use that index
+ *pstr = ndb_offset_str(index);
+ return 1;
+ }
+ }
+
+ // no string found, push a new one
+ loc = builder->strings.p - builder->strings.start;
+ if (!(cursor_push(&builder->strings, (unsigned char*)str, len) &&
+ cursor_push_byte(&builder->strings, '\0'))) {
+ return 0;
+ }
+ *pstr = ndb_offset_str(loc);
+
+ // record in builder indices. ignore return value, if we can't cache it
+ // then whatever
+ cursor_push_u32(&builder->str_indices, loc);
+
+ return 1;
+}
+
+int ndb_builder_set_content(struct ndb_builder *builder, const char *content) {
+ return ndb_builder_make_string(builder, content, &builder->note->content);
+}
+
+void ndb_builder_set_pubkey(struct ndb_builder *builder, unsigned char *pubkey) {
+ memcpy(builder->note->pubkey, pubkey, 32);
+}
+
+void ndb_builder_set_id(struct ndb_builder *builder, unsigned char *id) {
+ memcpy(builder->note->id, id, 32);
+}
+
+void ndb_builder_set_signature(struct ndb_builder *builder, unsigned char *signature) {
+ memcpy(builder->note->signature, signature, 64);
+}
+
+void ndb_builder_set_kind(struct ndb_builder *builder, uint32_t kind) {
+ builder->note->kind = kind;
+}
+
+static inline int cursor_push_tag(struct cursor *cur, struct ndb_tag *tag) {
+ return cursor_push_u16(cur, tag->count);
+}
+
+int ndb_builder_add_tag(struct ndb_builder *builder, const char **strs, uint16_t num_strs) {
+ int i;
+ union packed_str pstr;
+ const char *str;
+ struct ndb_tag tag;
+
+ builder->note->tags.count++;
+ tag.count = num_strs;
+
+ if (!cursor_push_tag(&builder->note_cur, &tag))
+ return 0;
+
+ for (i = 0; i < num_strs; i++) {
+ str = strs[i];
+ if (!ndb_builder_make_string(builder, str, &pstr))
+ return 0;
+ if (!cursor_push_u32(&builder->note_cur, pstr.offset))
+ return 0;
+ }
+
+ return 1;
+}
+
diff --git a/nostrdb.h b/nostrdb.h
@@ -0,0 +1,172 @@
+#ifndef NOSTRDB_H
+#define NOSTRDB_H
+
+#include <inttypes.h>
+#include "cursor.h"
+
+// these must be byte-aligned, they are directly accessing the serialized data
+// representation
+#pragma pack(push, 1)
+
+// we assume little endian everywhere. sorry not sorry.
+union packed_str {
+ uint32_t offset;
+
+ struct {
+ char str[3];
+ unsigned char flag;
+ } packed;
+
+ unsigned char bytes[4];
+};
+
+struct ndb_tag {
+ uint16_t count;
+ union packed_str strs[0];
+};
+
+struct ndb_tags {
+ uint16_t count;
+ struct ndb_tag tag[0];
+};
+
+// v1
+struct ndb_note {
+ unsigned char version; // v=1
+ unsigned char padding[3]; // keep things aligned
+ unsigned char id[32];
+ unsigned char pubkey[32];
+ unsigned char signature[64];
+
+ uint32_t created_at;
+ uint32_t kind;
+ union packed_str content;
+ uint32_t strings;
+ uint32_t json;
+
+ // nothing can come after tags since it contains variadic data
+ struct ndb_tags tags;
+};
+
+#pragma pack(pop)
+
+struct ndb_builder {
+ struct cursor note_cur;
+ struct cursor strings;
+ struct cursor str_indices;
+ struct ndb_note *note;
+ int size;
+};
+
+struct ndb_iterator {
+ struct ndb_note *note;
+ struct ndb_tag *tag;
+
+ // current outer index
+ int index;
+};
+
+int ndb_builder_new(struct ndb_builder *builder, int *bufsize);
+int ndb_builder_finalize(struct ndb_builder *builder, struct ndb_note **note);
+int ndb_builder_set_content(struct ndb_builder *builder, const char *content);
+void ndb_builder_set_signature(struct ndb_builder *builder, unsigned char *signature);
+void ndb_builder_set_pubkey(struct ndb_builder *builder, unsigned char *pubkey);
+void ndb_builder_set_id(struct ndb_builder *builder, unsigned char *id);
+void ndb_builder_set_kind(struct ndb_builder *builder, uint32_t kind);
+int ndb_builder_add_tag(struct ndb_builder *builder, const char **strs, uint16_t num_strs);
+
+static inline unsigned char *ndb_note_id(struct ndb_note *note) {
+ return note->id;
+}
+
+static inline unsigned char *ndb_note_pubkey(struct ndb_note *note) {
+ return note->pubkey;
+}
+
+static inline unsigned char *ndb_note_signature(struct ndb_note *note) {
+ return note->signature;
+}
+
+static inline uint32_t ndb_note_created_at(struct ndb_note *note) {
+ return note->created_at;
+}
+
+static inline int ndb_str_is_packed(union packed_str str) {
+ return (str.offset >> 31) & 0x1;
+}
+
+static inline struct ndb_note *ndb_note_from_bytes(unsigned char *bytes) {
+ struct ndb_note *note = (struct ndb_note *)bytes;
+ if (note->version != 1)
+ return 0;
+ return note;
+}
+
+static inline union packed_str ndb_offset_str(uint32_t offset) {
+ // ensure accidents like -1 don't corrupt our packed_str
+ union packed_str str;
+ str.offset = offset & 0x7FFFFFFF;
+ return str;
+}
+
+static inline union packed_str
+ndb_char_to_packed_str(char c) {
+ union packed_str str;
+ str.packed.flag = 0xFF;
+ str.packed.str[0] = c;
+ str.packed.str[1] = '\0';
+ return str;
+}
+
+static inline union packed_str
+ndb_chars_to_packed_str(char c1, char c2) {
+ union packed_str str;
+ str.packed.flag = 0xFF;
+ str.packed.str[0] = c1;
+ str.packed.str[1] = c2;
+ str.packed.str[2] = '\0';
+ return str;
+}
+
+static inline const char *
+ndb_note_string(struct ndb_note *note, union packed_str *str) {
+ if (ndb_str_is_packed(*str))
+ return str->packed.str;
+
+ return ((const char *)note) + note->strings + str->offset;
+}
+
+static inline const char *
+ndb_note_tag_index(struct ndb_note *note, struct ndb_tag *tag, int index) {
+ if (index >= tag->count) {
+ return 0;
+ }
+
+ return ndb_note_string(note, &tag->strs[index]);
+}
+
+static inline int
+ndb_tags_iterate_start(struct ndb_note *note, struct ndb_iterator *iter) {
+ uint16_t count = note->tags.count;
+
+ iter->note = note;
+ iter->tag = note->tags.tag;
+ iter->index = 0;
+
+ return iter->tag->count != 0;
+}
+
+static inline int
+ndb_tags_iterate_next(struct ndb_iterator *iter) {
+ struct ndb_tags *tags = &iter->note->tags;
+
+ if (++iter->index < tags->count) {
+ uint32_t tag_data_size = iter->tag->count * sizeof(iter->tag->strs[0]);
+ iter->tag = (struct ndb_tag *)(iter->tag->strs[0].bytes + tag_data_size);
+ return 1;
+ }
+
+ return 0;
+}
+
+#endif
diff --git a/test.c b/test.c
@@ -0,0 +1,68 @@
+
+#include "nostrdb.h"
+
+#include <stdio.h>
+#include <assert.h>
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+
+int main(int argc, const char *argv[]) {
+ struct ndb_builder builder, *b = &builder;
+ struct ndb_note *note;
+ int len, ok;
+
+ unsigned char id[32];
+ memset(id, 1, 32);
+
+ unsigned char pubkey[32];
+ memset(pubkey, 2, 32);
+
+ unsigned char sig[64];
+ memset(sig, 3, 64);
+
+
+ const char *hex_pk = "5d9b81b2d4d5609c5565286fc3b511dc6b9a1b3d7d1174310c624d61d1f82bb9";
+ const char *tag[] = { "p", hex_pk };
+ const char *word_tag[] = { "word", "words", "w" };
+
+ ndb_builder_new(b, 0);
+ note = builder.note;
+
+ memset(note->padding, 3, sizeof(note->padding));
+
+ ndb_builder_set_content(b, "hello, world!");
+ ndb_builder_set_id(b, id);
+ ndb_builder_set_pubkey(b, pubkey);
+ ndb_builder_set_signature(b, sig);
+ ndb_builder_add_tag(b, tag, ARRAY_SIZE(tag));
+ ndb_builder_add_tag(b, word_tag, ARRAY_SIZE(word_tag));
+
+ len = ndb_builder_finalize(b, ¬e);
+
+ assert(note->tags.count == 2);
+
+ // test iterator
+ struct ndb_iterator iter, *it = &iter;
+
+ ok = ndb_tags_iterate_start(note, it);
+ assert(ok);
+ assert(it->tag->count == 2);
+ const char *p = ndb_note_string(note, &it->tag->strs[0]);
+ const char *hpk = ndb_note_string(note, &it->tag->strs[1]);
+ assert(hpk);
+ assert(!ndb_str_is_packed(it->tag->strs[1]));
+ assert(!strcmp(hpk, hex_pk));
+ assert(!strcmp(p, "p"));
+
+ ok = ndb_tags_iterate_next(it);
+ assert(ok);
+ assert(it->tag->count == 3);
+ assert(!strcmp(ndb_note_string(note, &it->tag->strs[0]), "word"));
+ assert(!strcmp(ndb_note_string(note, &it->tag->strs[1]), "words"));
+ assert(!strcmp(ndb_note_string(note, &it->tag->strs[2]), "w"));
+
+ ok = ndb_tags_iterate_next(it);
+ assert(!ok);
+
+ fwrite(note, len, 1, stdout);
+}