nostrdb

an unfairly fast embedded nostr database backed by lmdb
git clone git://jb55.com/nostrdb
Log | Files | Refs | README | LICENSE

commit 85fde454dccfbfaa13d3a38330986c5029acc87d
parent 662795889f04f7860737e1b38cc2ea0afed00b76
Author: William Casarin <jb55@jb55.com>
Date:   Sat, 22 Jul 2023 10:52:39 -0700

pack pubkeys and id strings

cuts storage requirements in half

Diffstat:
Mnostrdb.c | 99+++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------
Mnostrdb.h | 88++++++++++++++++++++++++++++++++++++++++---------------------------------------
Mtest.c | 42++++++++++++++++++++++--------------------
3 files changed, 131 insertions(+), 98 deletions(-)

diff --git a/nostrdb.c b/nostrdb.c @@ -57,24 +57,6 @@ int ndb_builder_new(struct ndb_builder *builder, unsigned char *buf, return 1; } -/// Check for small strings to pack -static inline int ndb_builder_try_compact_str(struct ndb_builder *builder, - const char *str, int len, - union packed_str *pstr) -{ - if (len == 0) { - *pstr = ndb_char_to_packed_str(0); - return 1; - } else if (len == 1) { - *pstr = ndb_char_to_packed_str(str[0]); - return 1; - } else if (len == 2) { - *pstr = ndb_chars_to_packed_str(str[0], str[1]); - return 1; - } - - return 0; -} static inline int ndb_json_parser_init(struct ndb_json_parser *p, @@ -146,7 +128,7 @@ struct ndb_note * ndb_builder_note(struct ndb_builder *builder) /// builder phase just for this purpose. static inline int ndb_builder_find_str(struct ndb_builder *builder, const char *str, int len, - union packed_str *pstr) + union ndb_packed_str *pstr) { // find existing matching string to avoid duplicate strings int indices = cursor_count(&builder->str_indices, sizeof(uint32_t)); @@ -165,7 +147,7 @@ static inline int ndb_builder_find_str(struct ndb_builder *builder, } static int ndb_builder_push_str(struct ndb_builder *builder, const char *str, - int len, union packed_str *pstr) + int len, union ndb_packed_str *pstr) { uint32_t loc; @@ -185,9 +167,52 @@ static int ndb_builder_push_str(struct ndb_builder *builder, const char *str, return 1; } +static int ndb_builder_push_packed_id(struct ndb_builder *builder, + unsigned char *id, + union ndb_packed_str *pstr) +{ + if (ndb_builder_find_str(builder, (const char*)id, 32, pstr)) { + pstr->packed.flag = NDB_PACKED_ID; + return 1; + } + + if (ndb_builder_push_str(builder, (const char*)id, 32, pstr)) { + pstr->packed.flag = NDB_PACKED_ID; + return 1; + } + + return 0; +} + + +/// Check for small strings to pack +static inline int ndb_builder_try_compact_str(struct ndb_builder *builder, + const char *str, int len, + union ndb_packed_str *pstr, + int pack_ids) +{ + unsigned char id_buf[32]; + + if (len == 0) { + *pstr = ndb_char_to_packed_str(0); + return 1; + } else if (len == 1) { + *pstr = ndb_char_to_packed_str(str[0]); + return 1; + } else if (len == 2) { + *pstr = ndb_chars_to_packed_str(str[0], str[1]); + return 1; + } else if (pack_ids && len == 64 && hex_decode(str, 64, id_buf, 32)) { + return ndb_builder_push_packed_id(builder, id_buf, pstr); + } + + return 0; +} + + static int ndb_builder_push_unpacked_str(struct ndb_builder *builder, const char *str, int len, - union packed_str *pstr) + union ndb_packed_str *pstr) { if (ndb_builder_find_str(builder, str, len, pstr)) return 1; @@ -196,9 +221,9 @@ static int ndb_builder_push_unpacked_str(struct ndb_builder *builder, } int ndb_builder_make_str(struct ndb_builder *builder, const char *str, int len, - union packed_str *pstr) + union ndb_packed_str *pstr, int pack_ids) { - if (ndb_builder_try_compact_str(builder, str, len, pstr)) + if (ndb_builder_try_compact_str(builder, str, len, pstr, pack_ids)) return 1; return ndb_builder_push_unpacked_str(builder, str, len, pstr); @@ -207,8 +232,10 @@ int ndb_builder_make_str(struct ndb_builder *builder, const char *str, int len, int ndb_builder_set_content(struct ndb_builder *builder, const char *content, int len) { + int pack_ids = 0; builder->note->content_length = len; - return ndb_builder_make_str(builder, content, len, &builder->note->content); + return ndb_builder_make_str(builder, content, len, + &builder->note->content, pack_ids); } @@ -228,7 +255,7 @@ static inline int toksize(jsmntok_t *tok) } static int ndb_builder_finalize_tag(struct ndb_builder *builder, - union packed_str offset) + union ndb_packed_str offset) { if (!cursor_push_u32(&builder->note_cur, offset.offset)) return 0; @@ -239,8 +266,8 @@ static int ndb_builder_finalize_tag(struct ndb_builder *builder, /// Unescape and push json strings static int ndb_builder_make_json_str(struct ndb_builder *builder, const char *str, int len, - union packed_str *pstr, - int *written) + union ndb_packed_str *pstr, + int *written, int pack_ids) { // let's not care about de-duping these. we should just unescape // in-place directly into the strings table. @@ -249,7 +276,7 @@ static int ndb_builder_make_json_str(struct ndb_builder *builder, unsigned char *builder_start; // always try compact strings first - if (ndb_builder_try_compact_str(builder, str, len, pstr)) + if (ndb_builder_try_compact_str(builder, str, len, pstr, pack_ids)) return 1; end = str + len; @@ -327,8 +354,9 @@ static int ndb_builder_make_json_str(struct ndb_builder *builder, static int ndb_builder_push_json_tag(struct ndb_builder *builder, const char *str, int len) { - union packed_str pstr; - if (!ndb_builder_make_json_str(builder, str, len, &pstr, NULL)) + union ndb_packed_str pstr; + int pack_ids = 1; + if (!ndb_builder_make_json_str(builder, str, len, &pstr, NULL, pack_ids)) return 0; return ndb_builder_finalize_tag(builder, pstr); } @@ -474,13 +502,13 @@ int ndb_note_from_json(const char *json, int len, struct ndb_note **note, } else if (jsoneq(json, tok, tok_len, "content")) { // content tok = &parser.toks[i+1]; - union packed_str pstr; + union ndb_packed_str pstr; tok_len = toksize(tok); - int written; + int written, pack_ids = 0; if (!ndb_builder_make_json_str(&parser.builder, json + tok->start, tok_len, &pstr, - &written)) { + &written, pack_ids)) { return 0; } parser.builder.note->content_length = written; @@ -531,8 +559,9 @@ int ndb_builder_new_tag(struct ndb_builder *builder) inline int ndb_builder_push_tag_str(struct ndb_builder *builder, const char *str, int len) { - union packed_str pstr; - if (!ndb_builder_make_str(builder, str, len, &pstr)) + union ndb_packed_str pstr; + int pack_ids = 1; + if (!ndb_builder_make_str(builder, str, len, &pstr, pack_ids)) return 0; return ndb_builder_finalize_tag(builder, pstr); } diff --git a/nostrdb.h b/nostrdb.h @@ -4,25 +4,36 @@ #include <inttypes.h> #include "cursor.h" +struct ndb_str { + unsigned char flag; + union { + const char *str; + unsigned char *id; + }; +}; + // these must be byte-aligned, they are directly accessing the serialized data // representation #pragma pack(push, 1) -union packed_str { - uint32_t offset; +/// We can store byte data in the string table, so +#define NDB_PACKED_STR 0x1 +#define NDB_PACKED_ID 0x2 +union ndb_packed_str { struct { char str[3]; // we assume little endian everywhere. sorry not sorry. - unsigned char flag; + unsigned char flag; // NDB_PACKED_STR, etc } packed; + uint32_t offset; unsigned char bytes[4]; }; struct ndb_tag { uint16_t count; - union packed_str strs[0]; + union ndb_packed_str strs[0]; }; struct ndb_tags { @@ -41,7 +52,7 @@ struct ndb_note { uint32_t created_at; uint32_t kind; uint32_t content_length; - union packed_str content; + union ndb_packed_str content; uint32_t strings; uint32_t json; @@ -80,23 +91,23 @@ int ndb_builder_new_tag(struct ndb_builder *builder); int ndb_builder_push_tag_str(struct ndb_builder *builder, const char *str, int len); // BYE BUILDER -static inline int ndb_str_is_packed(union packed_str str) +static inline struct ndb_str ndb_note_str(struct ndb_note *note, + union ndb_packed_str *pstr) { - return (str.offset >> 31) & 0x1; -} - + struct ndb_str str; + str.flag = pstr->packed.flag; -static inline const char * ndb_note_str(struct ndb_note *note, - union packed_str *str) -{ - if (ndb_str_is_packed(*str)) - return str->packed.str; + if (str.flag == NDB_PACKED_STR) { + str.str = pstr->packed.str; + return str; + } - return ((const char *)note) + note->strings + str->offset; + str.str = ((const char *)note) + note->strings + (pstr->offset & 0xFFFFFF); + return str; } -static inline const char * ndb_tag_str(struct ndb_note *note, - struct ndb_tag *tag, int ind) +static inline struct ndb_str ndb_tag_str(struct ndb_note *note, + struct ndb_tag *tag, int ind) { return ndb_note_str(note, &tag->strs[ind]); } @@ -104,16 +115,16 @@ static inline const char * ndb_tag_str(struct ndb_note *note, static inline int ndb_tag_matches_char(struct ndb_note *note, struct ndb_tag *tag, int ind, char c) { - const char *str = ndb_tag_str(note, tag, ind); - if (str[0] == '\0') + struct ndb_str str = ndb_tag_str(note, tag, ind); + if (str.str[0] == '\0') return 0; - else if (str[0] == c) + else if (str.str[0] == c) return 1; return 0; } -static inline const char * ndb_iter_tag_str(struct ndb_iterator *iter, - int ind) +static inline struct ndb_str ndb_iter_tag_str(struct ndb_iterator *iter, + int ind) { return ndb_tag_str(iter->note, iter->tag, ind); } @@ -143,9 +154,9 @@ static inline uint32_t ndb_note_kind(struct ndb_note *note) return note->kind; } -static inline const char * ndb_note_content(struct ndb_note *note) +static inline const char *ndb_note_content(struct ndb_note *note) { - return ndb_note_str(note, &note->content); + return ndb_note_str(note, &note->content).str; } static inline uint32_t ndb_note_content_length(struct ndb_note *note) @@ -161,43 +172,34 @@ static inline struct ndb_note * ndb_note_from_bytes(unsigned char *bytes) return note; } -static inline union packed_str ndb_offset_str(uint32_t offset) +static inline union ndb_packed_str ndb_offset_str(uint32_t offset) { // ensure accidents like -1 don't corrupt our packed_str - union packed_str str; - str.offset = offset & 0x7FFFFFFF; + union ndb_packed_str str; + // most significant byte is reserved for ndb_packtype + str.offset = offset & 0xFFFFFF; return str; } -static inline union packed_str ndb_char_to_packed_str(char c) +static inline union ndb_packed_str ndb_char_to_packed_str(char c) { - union packed_str str; - str.packed.flag = 0xFF; + union ndb_packed_str str; + str.packed.flag = NDB_PACKED_STR; str.packed.str[0] = c; str.packed.str[1] = '\0'; return str; } -static inline union packed_str ndb_chars_to_packed_str(char c1, char c2) +static inline union ndb_packed_str ndb_chars_to_packed_str(char c1, char c2) { - union packed_str str; - str.packed.flag = 0xFF; + union ndb_packed_str str; + str.packed.flag = NDB_PACKED_STR; str.packed.str[0] = c1; str.packed.str[1] = c2; str.packed.str[2] = '\0'; return str; } -static inline const char * ndb_note_tag_index(struct ndb_note *note, - struct ndb_tag *tag, int index) -{ - if (index >= tag->count) { - return 0; - } - - return ndb_note_str(note, &tag->strs[index]); -} - static inline int ndb_tags_iterate_start(struct ndb_note *note, struct ndb_iterator *iter) { diff --git a/test.c b/test.c @@ -31,9 +31,7 @@ static void test_basic_event() { memset(note->padding, 3, sizeof(note->padding)); - const char *content = "hello, world!"; - - ok = ndb_builder_set_content(b, content, strlen(content)); assert(ok); + ok = ndb_builder_set_content(b, hex_pk, strlen(hex_pk)); assert(ok); ndb_builder_set_id(b, id); assert(ok); ndb_builder_set_pubkey(b, pubkey); assert(ok); ndb_builder_set_signature(b, sig); assert(ok); @@ -50,6 +48,8 @@ static void test_basic_event() { ok = ndb_builder_finalize(b, &note); assert(ok); + // content should never be packed id + assert(note->content.packed.flag != NDB_PACKED_ID); assert(note->tags.count == 2); // test iterator @@ -58,19 +58,21 @@ static void test_basic_event() { ok = ndb_tags_iterate_start(note, it); assert(ok); assert(it->tag->count == 2); - const char *p = ndb_iter_tag_str(it, 0); - const char *hpk = ndb_iter_tag_str(it, 1); - assert(hpk); - assert(!ndb_str_is_packed(it->tag->strs[1])); - assert(!strcmp(hpk, hex_pk)); + const char *p = ndb_iter_tag_str(it, 0).str; + struct ndb_str hpk = ndb_iter_tag_str(it, 1); + + hex_decode(hex_pk, 64, id, 32); + + assert(hpk.flag == NDB_PACKED_ID); + assert(memcmp(hpk.id, id, 32) == 0); assert(!strcmp(p, "p")); ok = ndb_tags_iterate_next(it); assert(ok); assert(it->tag->count == 3); - assert(!strcmp(ndb_iter_tag_str(it, 0), "word")); - assert(!strcmp(ndb_iter_tag_str(it, 1), "words")); - assert(!strcmp(ndb_iter_tag_str(it, 2), "w")); + assert(!strcmp(ndb_iter_tag_str(it, 0).str, "word")); + assert(!strcmp(ndb_iter_tag_str(it, 1).str, "words")); + assert(!strcmp(ndb_iter_tag_str(it, 2).str, "w")); ok = ndb_tags_iterate_next(it); assert(!ok); @@ -108,7 +110,7 @@ static void test_parse_contact_list() size = ndb_note_from_json((const char*)json, written, &note, buf, alloc_size); printf("ndb_note_from_json size %d\n", size); assert(size > 0); - assert(size == 59062); + assert(size == 34062); const char* expected_content = "{\"wss://nos.lol\":{\"write\":true,\"read\":true}," @@ -137,7 +139,7 @@ static void test_parse_contact_list() } static void test_parse_json() { - char hex_id[65] = {0}; + char hex_id[32] = {0}; unsigned char buffer[1024]; struct ndb_note *note; #define HEX_ID "5004a081e397c6da9dc2f2d6b3134006a9d0e8c1b46689d9fe150bb2f21a204d" @@ -152,24 +154,24 @@ static void test_parse_json() { const char *content = ndb_note_content(note); unsigned char *id = ndb_note_id(note); - hex_encode(id, 32, hex_id, sizeof(hex_id)); + hex_decode(HEX_ID, 64, hex_id, sizeof(hex_id)); assert(!strcmp(content, "共通語")); - assert(!strcmp(HEX_ID, hex_id)); + assert(!memcmp(id, hex_id, 32)); assert(note->tags.count == 2); struct ndb_iterator iter, *it = &iter; ok = ndb_tags_iterate_start(note, it); assert(ok); assert(it->tag->count == 2); - assert(!strcmp(ndb_iter_tag_str(it, 0), "p")); - assert(!strcmp(ndb_iter_tag_str(it, 1), HEX_ID)); + assert(!strcmp(ndb_iter_tag_str(it, 0).str, "p")); + assert(!memcmp(ndb_iter_tag_str(it, 1).id, hex_id, 32)); ok = ndb_tags_iterate_next(it); assert(ok); assert(it->tag->count == 3); - assert(!strcmp(ndb_iter_tag_str(it, 0), "word")); - assert(!strcmp(ndb_iter_tag_str(it, 1), "words")); - assert(!strcmp(ndb_iter_tag_str(it, 2), "w")); + assert(!strcmp(ndb_iter_tag_str(it, 0).str, "word")); + assert(!strcmp(ndb_iter_tag_str(it, 1).str, "words")); + assert(!strcmp(ndb_iter_tag_str(it, 2).str, "w")); } int main(int argc, const char *argv[]) {