commit 85fde454dccfbfaa13d3a38330986c5029acc87d
parent 662795889f04f7860737e1b38cc2ea0afed00b76
Author: William Casarin <jb55@jb55.com>
Date: Sat, 22 Jul 2023 10:52:39 -0700
pack pubkeys and id strings
cuts storage requirements in half
Diffstat:
M | nostrdb.c | | | 99 | +++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------- |
M | nostrdb.h | | | 88 | ++++++++++++++++++++++++++++++++++++++++--------------------------------------- |
M | test.c | | | 42 | ++++++++++++++++++++++-------------------- |
3 files changed, 131 insertions(+), 98 deletions(-)
diff --git a/nostrdb.c b/nostrdb.c
@@ -57,24 +57,6 @@ int ndb_builder_new(struct ndb_builder *builder, unsigned char *buf,
return 1;
}
-/// Check for small strings to pack
-static inline int ndb_builder_try_compact_str(struct ndb_builder *builder,
- const char *str, int len,
- union packed_str *pstr)
-{
- if (len == 0) {
- *pstr = ndb_char_to_packed_str(0);
- return 1;
- } else if (len == 1) {
- *pstr = ndb_char_to_packed_str(str[0]);
- return 1;
- } else if (len == 2) {
- *pstr = ndb_chars_to_packed_str(str[0], str[1]);
- return 1;
- }
-
- return 0;
-}
static inline int ndb_json_parser_init(struct ndb_json_parser *p,
@@ -146,7 +128,7 @@ struct ndb_note * ndb_builder_note(struct ndb_builder *builder)
/// builder phase just for this purpose.
static inline int ndb_builder_find_str(struct ndb_builder *builder,
const char *str, int len,
- union packed_str *pstr)
+ union ndb_packed_str *pstr)
{
// find existing matching string to avoid duplicate strings
int indices = cursor_count(&builder->str_indices, sizeof(uint32_t));
@@ -165,7 +147,7 @@ static inline int ndb_builder_find_str(struct ndb_builder *builder,
}
static int ndb_builder_push_str(struct ndb_builder *builder, const char *str,
- int len, union packed_str *pstr)
+ int len, union ndb_packed_str *pstr)
{
uint32_t loc;
@@ -185,9 +167,52 @@ static int ndb_builder_push_str(struct ndb_builder *builder, const char *str,
return 1;
}
+static int ndb_builder_push_packed_id(struct ndb_builder *builder,
+ unsigned char *id,
+ union ndb_packed_str *pstr)
+{
+ if (ndb_builder_find_str(builder, (const char*)id, 32, pstr)) {
+ pstr->packed.flag = NDB_PACKED_ID;
+ return 1;
+ }
+
+ if (ndb_builder_push_str(builder, (const char*)id, 32, pstr)) {
+ pstr->packed.flag = NDB_PACKED_ID;
+ return 1;
+ }
+
+ return 0;
+}
+
+
+/// Check for small strings to pack
+static inline int ndb_builder_try_compact_str(struct ndb_builder *builder,
+ const char *str, int len,
+ union ndb_packed_str *pstr,
+ int pack_ids)
+{
+ unsigned char id_buf[32];
+
+ if (len == 0) {
+ *pstr = ndb_char_to_packed_str(0);
+ return 1;
+ } else if (len == 1) {
+ *pstr = ndb_char_to_packed_str(str[0]);
+ return 1;
+ } else if (len == 2) {
+ *pstr = ndb_chars_to_packed_str(str[0], str[1]);
+ return 1;
+ } else if (pack_ids && len == 64 && hex_decode(str, 64, id_buf, 32)) {
+ return ndb_builder_push_packed_id(builder, id_buf, pstr);
+ }
+
+ return 0;
+}
+
+
static int ndb_builder_push_unpacked_str(struct ndb_builder *builder,
const char *str, int len,
- union packed_str *pstr)
+ union ndb_packed_str *pstr)
{
if (ndb_builder_find_str(builder, str, len, pstr))
return 1;
@@ -196,9 +221,9 @@ static int ndb_builder_push_unpacked_str(struct ndb_builder *builder,
}
int ndb_builder_make_str(struct ndb_builder *builder, const char *str, int len,
- union packed_str *pstr)
+ union ndb_packed_str *pstr, int pack_ids)
{
- if (ndb_builder_try_compact_str(builder, str, len, pstr))
+ if (ndb_builder_try_compact_str(builder, str, len, pstr, pack_ids))
return 1;
return ndb_builder_push_unpacked_str(builder, str, len, pstr);
@@ -207,8 +232,10 @@ int ndb_builder_make_str(struct ndb_builder *builder, const char *str, int len,
int ndb_builder_set_content(struct ndb_builder *builder, const char *content,
int len)
{
+ int pack_ids = 0;
builder->note->content_length = len;
- return ndb_builder_make_str(builder, content, len, &builder->note->content);
+ return ndb_builder_make_str(builder, content, len,
+ &builder->note->content, pack_ids);
}
@@ -228,7 +255,7 @@ static inline int toksize(jsmntok_t *tok)
}
static int ndb_builder_finalize_tag(struct ndb_builder *builder,
- union packed_str offset)
+ union ndb_packed_str offset)
{
if (!cursor_push_u32(&builder->note_cur, offset.offset))
return 0;
@@ -239,8 +266,8 @@ static int ndb_builder_finalize_tag(struct ndb_builder *builder,
/// Unescape and push json strings
static int ndb_builder_make_json_str(struct ndb_builder *builder,
const char *str, int len,
- union packed_str *pstr,
- int *written)
+ union ndb_packed_str *pstr,
+ int *written, int pack_ids)
{
// let's not care about de-duping these. we should just unescape
// in-place directly into the strings table.
@@ -249,7 +276,7 @@ static int ndb_builder_make_json_str(struct ndb_builder *builder,
unsigned char *builder_start;
// always try compact strings first
- if (ndb_builder_try_compact_str(builder, str, len, pstr))
+ if (ndb_builder_try_compact_str(builder, str, len, pstr, pack_ids))
return 1;
end = str + len;
@@ -327,8 +354,9 @@ static int ndb_builder_make_json_str(struct ndb_builder *builder,
static int ndb_builder_push_json_tag(struct ndb_builder *builder,
const char *str, int len)
{
- union packed_str pstr;
- if (!ndb_builder_make_json_str(builder, str, len, &pstr, NULL))
+ union ndb_packed_str pstr;
+ int pack_ids = 1;
+ if (!ndb_builder_make_json_str(builder, str, len, &pstr, NULL, pack_ids))
return 0;
return ndb_builder_finalize_tag(builder, pstr);
}
@@ -474,13 +502,13 @@ int ndb_note_from_json(const char *json, int len, struct ndb_note **note,
} else if (jsoneq(json, tok, tok_len, "content")) {
// content
tok = &parser.toks[i+1];
- union packed_str pstr;
+ union ndb_packed_str pstr;
tok_len = toksize(tok);
- int written;
+ int written, pack_ids = 0;
if (!ndb_builder_make_json_str(&parser.builder,
json + tok->start,
tok_len, &pstr,
- &written)) {
+ &written, pack_ids)) {
return 0;
}
parser.builder.note->content_length = written;
@@ -531,8 +559,9 @@ int ndb_builder_new_tag(struct ndb_builder *builder)
inline int ndb_builder_push_tag_str(struct ndb_builder *builder,
const char *str, int len)
{
- union packed_str pstr;
- if (!ndb_builder_make_str(builder, str, len, &pstr))
+ union ndb_packed_str pstr;
+ int pack_ids = 1;
+ if (!ndb_builder_make_str(builder, str, len, &pstr, pack_ids))
return 0;
return ndb_builder_finalize_tag(builder, pstr);
}
diff --git a/nostrdb.h b/nostrdb.h
@@ -4,25 +4,36 @@
#include <inttypes.h>
#include "cursor.h"
+struct ndb_str {
+ unsigned char flag;
+ union {
+ const char *str;
+ unsigned char *id;
+ };
+};
+
// these must be byte-aligned, they are directly accessing the serialized data
// representation
#pragma pack(push, 1)
-union packed_str {
- uint32_t offset;
+/// We can store byte data in the string table, so
+#define NDB_PACKED_STR 0x1
+#define NDB_PACKED_ID 0x2
+union ndb_packed_str {
struct {
char str[3];
// we assume little endian everywhere. sorry not sorry.
- unsigned char flag;
+ unsigned char flag; // NDB_PACKED_STR, etc
} packed;
+ uint32_t offset;
unsigned char bytes[4];
};
struct ndb_tag {
uint16_t count;
- union packed_str strs[0];
+ union ndb_packed_str strs[0];
};
struct ndb_tags {
@@ -41,7 +52,7 @@ struct ndb_note {
uint32_t created_at;
uint32_t kind;
uint32_t content_length;
- union packed_str content;
+ union ndb_packed_str content;
uint32_t strings;
uint32_t json;
@@ -80,23 +91,23 @@ int ndb_builder_new_tag(struct ndb_builder *builder);
int ndb_builder_push_tag_str(struct ndb_builder *builder, const char *str, int len);
// BYE BUILDER
-static inline int ndb_str_is_packed(union packed_str str)
+static inline struct ndb_str ndb_note_str(struct ndb_note *note,
+ union ndb_packed_str *pstr)
{
- return (str.offset >> 31) & 0x1;
-}
-
+ struct ndb_str str;
+ str.flag = pstr->packed.flag;
-static inline const char * ndb_note_str(struct ndb_note *note,
- union packed_str *str)
-{
- if (ndb_str_is_packed(*str))
- return str->packed.str;
+ if (str.flag == NDB_PACKED_STR) {
+ str.str = pstr->packed.str;
+ return str;
+ }
- return ((const char *)note) + note->strings + str->offset;
+ str.str = ((const char *)note) + note->strings + (pstr->offset & 0xFFFFFF);
+ return str;
}
-static inline const char * ndb_tag_str(struct ndb_note *note,
- struct ndb_tag *tag, int ind)
+static inline struct ndb_str ndb_tag_str(struct ndb_note *note,
+ struct ndb_tag *tag, int ind)
{
return ndb_note_str(note, &tag->strs[ind]);
}
@@ -104,16 +115,16 @@ static inline const char * ndb_tag_str(struct ndb_note *note,
static inline int ndb_tag_matches_char(struct ndb_note *note,
struct ndb_tag *tag, int ind, char c)
{
- const char *str = ndb_tag_str(note, tag, ind);
- if (str[0] == '\0')
+ struct ndb_str str = ndb_tag_str(note, tag, ind);
+ if (str.str[0] == '\0')
return 0;
- else if (str[0] == c)
+ else if (str.str[0] == c)
return 1;
return 0;
}
-static inline const char * ndb_iter_tag_str(struct ndb_iterator *iter,
- int ind)
+static inline struct ndb_str ndb_iter_tag_str(struct ndb_iterator *iter,
+ int ind)
{
return ndb_tag_str(iter->note, iter->tag, ind);
}
@@ -143,9 +154,9 @@ static inline uint32_t ndb_note_kind(struct ndb_note *note)
return note->kind;
}
-static inline const char * ndb_note_content(struct ndb_note *note)
+static inline const char *ndb_note_content(struct ndb_note *note)
{
- return ndb_note_str(note, ¬e->content);
+ return ndb_note_str(note, ¬e->content).str;
}
static inline uint32_t ndb_note_content_length(struct ndb_note *note)
@@ -161,43 +172,34 @@ static inline struct ndb_note * ndb_note_from_bytes(unsigned char *bytes)
return note;
}
-static inline union packed_str ndb_offset_str(uint32_t offset)
+static inline union ndb_packed_str ndb_offset_str(uint32_t offset)
{
// ensure accidents like -1 don't corrupt our packed_str
- union packed_str str;
- str.offset = offset & 0x7FFFFFFF;
+ union ndb_packed_str str;
+ // most significant byte is reserved for ndb_packtype
+ str.offset = offset & 0xFFFFFF;
return str;
}
-static inline union packed_str ndb_char_to_packed_str(char c)
+static inline union ndb_packed_str ndb_char_to_packed_str(char c)
{
- union packed_str str;
- str.packed.flag = 0xFF;
+ union ndb_packed_str str;
+ str.packed.flag = NDB_PACKED_STR;
str.packed.str[0] = c;
str.packed.str[1] = '\0';
return str;
}
-static inline union packed_str ndb_chars_to_packed_str(char c1, char c2)
+static inline union ndb_packed_str ndb_chars_to_packed_str(char c1, char c2)
{
- union packed_str str;
- str.packed.flag = 0xFF;
+ union ndb_packed_str str;
+ str.packed.flag = NDB_PACKED_STR;
str.packed.str[0] = c1;
str.packed.str[1] = c2;
str.packed.str[2] = '\0';
return str;
}
-static inline const char * ndb_note_tag_index(struct ndb_note *note,
- struct ndb_tag *tag, int index)
-{
- if (index >= tag->count) {
- return 0;
- }
-
- return ndb_note_str(note, &tag->strs[index]);
-}
-
static inline int ndb_tags_iterate_start(struct ndb_note *note,
struct ndb_iterator *iter)
{
diff --git a/test.c b/test.c
@@ -31,9 +31,7 @@ static void test_basic_event() {
memset(note->padding, 3, sizeof(note->padding));
- const char *content = "hello, world!";
-
- ok = ndb_builder_set_content(b, content, strlen(content)); assert(ok);
+ ok = ndb_builder_set_content(b, hex_pk, strlen(hex_pk)); assert(ok);
ndb_builder_set_id(b, id); assert(ok);
ndb_builder_set_pubkey(b, pubkey); assert(ok);
ndb_builder_set_signature(b, sig); assert(ok);
@@ -50,6 +48,8 @@ static void test_basic_event() {
ok = ndb_builder_finalize(b, ¬e);
assert(ok);
+ // content should never be packed id
+ assert(note->content.packed.flag != NDB_PACKED_ID);
assert(note->tags.count == 2);
// test iterator
@@ -58,19 +58,21 @@ static void test_basic_event() {
ok = ndb_tags_iterate_start(note, it);
assert(ok);
assert(it->tag->count == 2);
- const char *p = ndb_iter_tag_str(it, 0);
- const char *hpk = ndb_iter_tag_str(it, 1);
- assert(hpk);
- assert(!ndb_str_is_packed(it->tag->strs[1]));
- assert(!strcmp(hpk, hex_pk));
+ const char *p = ndb_iter_tag_str(it, 0).str;
+ struct ndb_str hpk = ndb_iter_tag_str(it, 1);
+
+ hex_decode(hex_pk, 64, id, 32);
+
+ assert(hpk.flag == NDB_PACKED_ID);
+ assert(memcmp(hpk.id, id, 32) == 0);
assert(!strcmp(p, "p"));
ok = ndb_tags_iterate_next(it);
assert(ok);
assert(it->tag->count == 3);
- assert(!strcmp(ndb_iter_tag_str(it, 0), "word"));
- assert(!strcmp(ndb_iter_tag_str(it, 1), "words"));
- assert(!strcmp(ndb_iter_tag_str(it, 2), "w"));
+ assert(!strcmp(ndb_iter_tag_str(it, 0).str, "word"));
+ assert(!strcmp(ndb_iter_tag_str(it, 1).str, "words"));
+ assert(!strcmp(ndb_iter_tag_str(it, 2).str, "w"));
ok = ndb_tags_iterate_next(it);
assert(!ok);
@@ -108,7 +110,7 @@ static void test_parse_contact_list()
size = ndb_note_from_json((const char*)json, written, ¬e, buf, alloc_size);
printf("ndb_note_from_json size %d\n", size);
assert(size > 0);
- assert(size == 59062);
+ assert(size == 34062);
const char* expected_content =
"{\"wss://nos.lol\":{\"write\":true,\"read\":true},"
@@ -137,7 +139,7 @@ static void test_parse_contact_list()
}
static void test_parse_json() {
- char hex_id[65] = {0};
+ char hex_id[32] = {0};
unsigned char buffer[1024];
struct ndb_note *note;
#define HEX_ID "5004a081e397c6da9dc2f2d6b3134006a9d0e8c1b46689d9fe150bb2f21a204d"
@@ -152,24 +154,24 @@ static void test_parse_json() {
const char *content = ndb_note_content(note);
unsigned char *id = ndb_note_id(note);
- hex_encode(id, 32, hex_id, sizeof(hex_id));
+ hex_decode(HEX_ID, 64, hex_id, sizeof(hex_id));
assert(!strcmp(content, "共通語"));
- assert(!strcmp(HEX_ID, hex_id));
+ assert(!memcmp(id, hex_id, 32));
assert(note->tags.count == 2);
struct ndb_iterator iter, *it = &iter;
ok = ndb_tags_iterate_start(note, it); assert(ok);
assert(it->tag->count == 2);
- assert(!strcmp(ndb_iter_tag_str(it, 0), "p"));
- assert(!strcmp(ndb_iter_tag_str(it, 1), HEX_ID));
+ assert(!strcmp(ndb_iter_tag_str(it, 0).str, "p"));
+ assert(!memcmp(ndb_iter_tag_str(it, 1).id, hex_id, 32));
ok = ndb_tags_iterate_next(it); assert(ok);
assert(it->tag->count == 3);
- assert(!strcmp(ndb_iter_tag_str(it, 0), "word"));
- assert(!strcmp(ndb_iter_tag_str(it, 1), "words"));
- assert(!strcmp(ndb_iter_tag_str(it, 2), "w"));
+ assert(!strcmp(ndb_iter_tag_str(it, 0).str, "word"));
+ assert(!strcmp(ndb_iter_tag_str(it, 1).str, "words"));
+ assert(!strcmp(ndb_iter_tag_str(it, 2).str, "w"));
}
int main(int argc, const char *argv[]) {