nostrdb

an unfairly fast embedded nostr database backed by lmdb
git clone git://jb55.com/nostrdb
Log | Files | Refs | README | LICENSE

commit 62dc9eefc2b3180d3fc98b202dc7fc8456375887
parent b3102474fb73de8e9843d0bcd58db28d220a1b86
Author: William Casarin <jb55@jb55.com>
Date:   Sat, 22 Jul 2023 07:01:58 -0700

switch to 2-byte string indices and remove packed string

Diffstat:
Mnostrdb.c | 40+++++++++++++++-------------------------
Mnostrdb.h | 70+++++++++++++---------------------------------------------------------
Mtest.c | 3+--
3 files changed, 29 insertions(+), 84 deletions(-)

diff --git a/nostrdb.c b/nostrdb.c @@ -4,6 +4,7 @@ #include "hex.h" #include "cursor.h" #include <stdlib.h> +#include <assert.h> struct ndb_json_parser { const char *json; @@ -16,7 +17,7 @@ struct ndb_json_parser { static inline int cursor_push_tag(struct cursor *cur, struct ndb_tag *tag) { - return cursor_push_u16(cur, tag->count); + return cursor_push_u32(cur, tag->count); } int ndb_builder_new(struct ndb_builder *builder, unsigned char *buf, @@ -122,30 +123,19 @@ struct ndb_note * ndb_builder_note(struct ndb_builder *builder) } int ndb_builder_make_string(struct ndb_builder *builder, const char *str, - int len, union packed_str *pstr) + int len, uint16_t *pstr) { - uint32_t loc; - - if (len == 0) { - *pstr = ndb_char_to_packed_str(0); - return 1; - } else if (len == 1) { - *pstr = ndb_char_to_packed_str(str[0]); - return 1; - } else if (len == 2) { - *pstr = ndb_chars_to_packed_str(str[0], str[1]); - return 1; - } + uint16_t loc; // find existing matching string to avoid duplicate strings - int indices = cursor_count(&builder->str_indices, sizeof(uint32_t)); + int indices = cursor_count(&builder->str_indices, sizeof(uint16_t)); for (int i = 0; i < indices; i++) { - uint32_t index = ((uint32_t*)builder->str_indices.start)[i]; - const char *some_str = (const char*)builder->strings.start + index; + uint16_t index = ((uint16_t*)builder->str_indices.start)[i]; + const char *some_str = ((const char*)builder->strings.start) + index; if (!strcmp(some_str, str)) { // found an existing matching str, use that index - *pstr = ndb_offset_str(index); + *pstr = index; return 1; } } @@ -156,11 +146,11 @@ int ndb_builder_make_string(struct ndb_builder *builder, const char *str, cursor_push_byte(&builder->strings, '\0'))) { return 0; } - *pstr = ndb_offset_str(loc); + *pstr = loc; // record in builder indices. ignore return value, if we can't cache it // then whatever - cursor_push_u32(&builder->str_indices, loc); + cursor_push_u16(&builder->str_indices, loc); return 1; } @@ -222,9 +212,9 @@ static inline int ndb_builder_process_json_tags(struct ndb_json_parser *p, return 1; for (int i = 0; i < array->size; i++) { - if (!ndb_builder_tag_from_json_array(p, &tag[i+1])) + if (!ndb_builder_tag_from_json_array(p, &tag[i+1])) return 0; - tag += tag[i+1].size; + tag += tag[i+1].size; } return 1; @@ -334,10 +324,10 @@ int ndb_builder_new_tag(struct ndb_builder *builder) inline int ndb_builder_push_tag_str(struct ndb_builder *builder, const char *str, int len) { - union packed_str pstr; - if (!ndb_builder_make_string(builder, str, len, &pstr)) + uint16_t str_ptr; + if (!ndb_builder_make_string(builder, str, len, &str_ptr)) return 0; - if (!cursor_push_u32(&builder->note_cur, pstr.offset)) + if (!cursor_push_u16(&builder->note_cur, str_ptr)) return 0; builder->current_tag->count++; return 1; diff --git a/nostrdb.h b/nostrdb.h @@ -2,31 +2,22 @@ #define NOSTRDB_H #include <inttypes.h> +#include <assert.h> #include "cursor.h" // these must be byte-aligned, they are directly accessing the serialized data // representation #pragma pack(push, 1) -union packed_str { - uint32_t offset; - - struct { - char str[3]; - // we assume little endian everywhere. sorry not sorry. - unsigned char flag; - } packed; - - unsigned char bytes[4]; -}; - struct ndb_tag { uint16_t count; - union packed_str strs[0]; + uint16_t padding; + uint16_t strs[0]; }; struct ndb_tags { uint16_t count; + uint16_t padding; struct ndb_tag tag[0]; }; @@ -40,7 +31,8 @@ struct ndb_note { uint32_t created_at; uint32_t kind; - union packed_str content; + uint16_t content; + uint16_t content_padding; uint32_t strings; uint32_t json; @@ -79,24 +71,15 @@ int ndb_builder_new_tag(struct ndb_builder *builder); int ndb_builder_push_tag_str(struct ndb_builder *builder, const char *str, int len); // BYE BUILDER -static inline int ndb_str_is_packed(union packed_str str) +static inline const char * ndb_note_str(struct ndb_note *note, uint16_t str) { - return (str.offset >> 31) & 0x1; -} - -static inline const char * ndb_note_str(struct ndb_note *note, - union packed_str *str) -{ - if (ndb_str_is_packed(*str)) - return str->packed.str; - - return ((const char *)note) + note->strings + str->offset; + return ((const char *)note) + note->strings + str; } static inline const char * ndb_tag_str(struct ndb_note *note, struct ndb_tag *tag, int ind) { - return ndb_note_str(note, &tag->strs[ind]); + return ndb_note_str(note, tag->strs[ind]); } static inline int ndb_tag_matches_char(struct ndb_note *note, @@ -138,7 +121,7 @@ static inline uint32_t ndb_note_created_at(struct ndb_note *note) static inline const char * ndb_note_content(struct ndb_note *note) { - return ndb_note_str(note, &note->content); + return ndb_note_str(note, note->content); } static inline struct ndb_note * ndb_note_from_bytes(unsigned char *bytes) @@ -149,33 +132,6 @@ static inline struct ndb_note * ndb_note_from_bytes(unsigned char *bytes) return note; } -static inline union packed_str ndb_offset_str(uint32_t offset) -{ - // ensure accidents like -1 don't corrupt our packed_str - union packed_str str; - str.offset = offset & 0x7FFFFFFF; - return str; -} - -static inline union packed_str ndb_char_to_packed_str(char c) -{ - union packed_str str; - str.packed.flag = 0xFF; - str.packed.str[0] = c; - str.packed.str[1] = '\0'; - return str; -} - -static inline union packed_str ndb_chars_to_packed_str(char c1, char c2) -{ - union packed_str str; - str.packed.flag = 0xFF; - str.packed.str[0] = c1; - str.packed.str[1] = c2; - str.packed.str[2] = '\0'; - return str; -} - static inline const char * ndb_note_tag_index(struct ndb_note *note, struct ndb_tag *tag, int index) { @@ -183,7 +139,7 @@ static inline const char * ndb_note_tag_index(struct ndb_note *note, return 0; } - return ndb_note_str(note, &tag->strs[index]); + return ndb_note_str(note, tag->strs[index]); } static inline int ndb_tags_iterate_start(struct ndb_note *note, @@ -201,8 +157,8 @@ static inline int ndb_tags_iterate_next(struct ndb_iterator *iter) struct ndb_tags *tags = &iter->note->tags; if (++iter->index < tags->count) { - uint32_t tag_data_size = iter->tag->count * sizeof(iter->tag->strs[0]); - iter->tag = (struct ndb_tag *)(iter->tag->strs[0].bytes + tag_data_size); + uint16_t tag_data_size = iter->tag->count * sizeof(iter->tag->strs[0]); + iter->tag = (struct ndb_tag *)(((unsigned char *)&iter->tag->strs[0]) + tag_data_size); return 1; } diff --git a/test.c b/test.c @@ -60,9 +60,8 @@ static void test_basic_event() { const char *p = ndb_iter_tag_str(it, 0); const char *hpk = ndb_iter_tag_str(it, 1); assert(hpk); - assert(!ndb_str_is_packed(it->tag->strs[1])); - assert(!strcmp(hpk, hex_pk)); assert(!strcmp(p, "p")); + assert(!strcmp(hpk, hex_pk)); ok = ndb_tags_iterate_next(it); assert(ok);