nostrdb

an unfairly fast embedded nostr database backed by lmdb
git clone git://jb55.com/nostrdb
Log | Files | Refs | README | LICENSE

commit f56f983a5688c367bb1c8e9f5e6cbf5fe6ac3062
parent 625007b036ccb819d7e922a38f60ab47574ed742
Author: William Casarin <jb55@jb55.com>
Date:   Sat, 22 Jul 2023 07:38:34 -0700

Revert "switch to 2-byte string indices and remove packed string"

This reverts commit 62dc9eefc2b3180d3fc98b202dc7fc8456375887.

Diffstat:
Mnostrdb.c | 40+++++++++++++++++++++++++---------------
Mnostrdb.h | 70+++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------
Mtest.c | 3++-
3 files changed, 84 insertions(+), 29 deletions(-)

diff --git a/nostrdb.c b/nostrdb.c @@ -4,7 +4,6 @@ #include "hex.h" #include "cursor.h" #include <stdlib.h> -#include <assert.h> struct ndb_json_parser { const char *json; @@ -17,7 +16,7 @@ struct ndb_json_parser { static inline int cursor_push_tag(struct cursor *cur, struct ndb_tag *tag) { - return cursor_push_u32(cur, tag->count); + return cursor_push_u16(cur, tag->count); } int ndb_builder_new(struct ndb_builder *builder, unsigned char *buf, @@ -123,19 +122,30 @@ struct ndb_note * ndb_builder_note(struct ndb_builder *builder) } int ndb_builder_make_string(struct ndb_builder *builder, const char *str, - int len, uint16_t *pstr) + int len, union packed_str *pstr) { - uint16_t loc; + uint32_t loc; + + if (len == 0) { + *pstr = ndb_char_to_packed_str(0); + return 1; + } else if (len == 1) { + *pstr = ndb_char_to_packed_str(str[0]); + return 1; + } else if (len == 2) { + *pstr = ndb_chars_to_packed_str(str[0], str[1]); + return 1; + } // find existing matching string to avoid duplicate strings - int indices = cursor_count(&builder->str_indices, sizeof(uint16_t)); + int indices = cursor_count(&builder->str_indices, sizeof(uint32_t)); for (int i = 0; i < indices; i++) { - uint16_t index = ((uint16_t*)builder->str_indices.start)[i]; - const char *some_str = ((const char*)builder->strings.start) + index; + uint32_t index = ((uint32_t*)builder->str_indices.start)[i]; + const char *some_str = (const char*)builder->strings.start + index; if (!strcmp(some_str, str)) { // found an existing matching str, use that index - *pstr = index; + *pstr = ndb_offset_str(index); return 1; } } @@ -146,11 +156,11 @@ int ndb_builder_make_string(struct ndb_builder *builder, const char *str, cursor_push_byte(&builder->strings, '\0'))) { return 0; } - *pstr = loc; + *pstr = ndb_offset_str(loc); // record in builder indices. ignore return value, if we can't cache it // then whatever - cursor_push_u16(&builder->str_indices, loc); + cursor_push_u32(&builder->str_indices, loc); return 1; } @@ -212,9 +222,9 @@ static inline int ndb_builder_process_json_tags(struct ndb_json_parser *p, return 1; for (int i = 0; i < array->size; i++) { - if (!ndb_builder_tag_from_json_array(p, &tag[i+1])) + if (!ndb_builder_tag_from_json_array(p, &tag[i+1])) return 0; - tag += tag[i+1].size; + tag += tag[i+1].size; } return 1; @@ -324,10 +334,10 @@ int ndb_builder_new_tag(struct ndb_builder *builder) inline int ndb_builder_push_tag_str(struct ndb_builder *builder, const char *str, int len) { - uint16_t str_ptr; - if (!ndb_builder_make_string(builder, str, len, &str_ptr)) + union packed_str pstr; + if (!ndb_builder_make_string(builder, str, len, &pstr)) return 0; - if (!cursor_push_u16(&builder->note_cur, str_ptr)) + if (!cursor_push_u32(&builder->note_cur, pstr.offset)) return 0; builder->current_tag->count++; return 1; diff --git a/nostrdb.h b/nostrdb.h @@ -2,22 +2,31 @@ #define NOSTRDB_H #include <inttypes.h> -#include <assert.h> #include "cursor.h" // these must be byte-aligned, they are directly accessing the serialized data // representation #pragma pack(push, 1) +union packed_str { + uint32_t offset; + + struct { + char str[3]; + // we assume little endian everywhere. sorry not sorry. + unsigned char flag; + } packed; + + unsigned char bytes[4]; +}; + struct ndb_tag { uint16_t count; - uint16_t padding; - uint16_t strs[0]; + union packed_str strs[0]; }; struct ndb_tags { uint16_t count; - uint16_t padding; struct ndb_tag tag[0]; }; @@ -31,8 +40,7 @@ struct ndb_note { uint32_t created_at; uint32_t kind; - uint16_t content; - uint16_t content_padding; + union packed_str content; uint32_t strings; uint32_t json; @@ -71,15 +79,24 @@ int ndb_builder_new_tag(struct ndb_builder *builder); int ndb_builder_push_tag_str(struct ndb_builder *builder, const char *str, int len); // BYE BUILDER -static inline const char * ndb_note_str(struct ndb_note *note, uint16_t str) +static inline int ndb_str_is_packed(union packed_str str) { - return ((const char *)note) + note->strings + str; + return (str.offset >> 31) & 0x1; +} + +static inline const char * ndb_note_str(struct ndb_note *note, + union packed_str *str) +{ + if (ndb_str_is_packed(*str)) + return str->packed.str; + + return ((const char *)note) + note->strings + str->offset; } static inline const char * ndb_tag_str(struct ndb_note *note, struct ndb_tag *tag, int ind) { - return ndb_note_str(note, tag->strs[ind]); + return ndb_note_str(note, &tag->strs[ind]); } static inline int ndb_tag_matches_char(struct ndb_note *note, @@ -121,7 +138,7 @@ static inline uint32_t ndb_note_created_at(struct ndb_note *note) static inline const char * ndb_note_content(struct ndb_note *note) { - return ndb_note_str(note, note->content); + return ndb_note_str(note, &note->content); } static inline struct ndb_note * ndb_note_from_bytes(unsigned char *bytes) @@ -132,6 +149,33 @@ static inline struct ndb_note * ndb_note_from_bytes(unsigned char *bytes) return note; } +static inline union packed_str ndb_offset_str(uint32_t offset) +{ + // ensure accidents like -1 don't corrupt our packed_str + union packed_str str; + str.offset = offset & 0x7FFFFFFF; + return str; +} + +static inline union packed_str ndb_char_to_packed_str(char c) +{ + union packed_str str; + str.packed.flag = 0xFF; + str.packed.str[0] = c; + str.packed.str[1] = '\0'; + return str; +} + +static inline union packed_str ndb_chars_to_packed_str(char c1, char c2) +{ + union packed_str str; + str.packed.flag = 0xFF; + str.packed.str[0] = c1; + str.packed.str[1] = c2; + str.packed.str[2] = '\0'; + return str; +} + static inline const char * ndb_note_tag_index(struct ndb_note *note, struct ndb_tag *tag, int index) { @@ -139,7 +183,7 @@ static inline const char * ndb_note_tag_index(struct ndb_note *note, return 0; } - return ndb_note_str(note, tag->strs[index]); + return ndb_note_str(note, &tag->strs[index]); } static inline int ndb_tags_iterate_start(struct ndb_note *note, @@ -157,8 +201,8 @@ static inline int ndb_tags_iterate_next(struct ndb_iterator *iter) struct ndb_tags *tags = &iter->note->tags; if (++iter->index < tags->count) { - uint16_t tag_data_size = iter->tag->count * sizeof(iter->tag->strs[0]); - iter->tag = (struct ndb_tag *)(((unsigned char *)&iter->tag->strs[0]) + tag_data_size); + uint32_t tag_data_size = iter->tag->count * sizeof(iter->tag->strs[0]); + iter->tag = (struct ndb_tag *)(iter->tag->strs[0].bytes + tag_data_size); return 1; } diff --git a/test.c b/test.c @@ -61,8 +61,9 @@ static void test_basic_event() { const char *p = ndb_iter_tag_str(it, 0); const char *hpk = ndb_iter_tag_str(it, 1); assert(hpk); - assert(!strcmp(p, "p")); + assert(!ndb_str_is_packed(it->tag->strs[1])); assert(!strcmp(hpk, hex_pk)); + assert(!strcmp(p, "p")); ok = ndb_tags_iterate_next(it); assert(ok);