commit f56f983a5688c367bb1c8e9f5e6cbf5fe6ac3062
parent 625007b036ccb819d7e922a38f60ab47574ed742
Author: William Casarin <jb55@jb55.com>
Date: Sat, 22 Jul 2023 07:38:34 -0700
Revert "switch to 2-byte string indices and remove packed string"
This reverts commit 62dc9eefc2b3180d3fc98b202dc7fc8456375887.
Diffstat:
M | nostrdb.c | | | 40 | +++++++++++++++++++++++++--------------- |
M | nostrdb.h | | | 70 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------- |
M | test.c | | | 3 | ++- |
3 files changed, 84 insertions(+), 29 deletions(-)
diff --git a/nostrdb.c b/nostrdb.c
@@ -4,7 +4,6 @@
#include "hex.h"
#include "cursor.h"
#include <stdlib.h>
-#include <assert.h>
struct ndb_json_parser {
const char *json;
@@ -17,7 +16,7 @@ struct ndb_json_parser {
static inline int cursor_push_tag(struct cursor *cur, struct ndb_tag *tag)
{
- return cursor_push_u32(cur, tag->count);
+ return cursor_push_u16(cur, tag->count);
}
int ndb_builder_new(struct ndb_builder *builder, unsigned char *buf,
@@ -123,19 +122,30 @@ struct ndb_note * ndb_builder_note(struct ndb_builder *builder)
}
int ndb_builder_make_string(struct ndb_builder *builder, const char *str,
- int len, uint16_t *pstr)
+ int len, union packed_str *pstr)
{
- uint16_t loc;
+ uint32_t loc;
+
+ if (len == 0) {
+ *pstr = ndb_char_to_packed_str(0);
+ return 1;
+ } else if (len == 1) {
+ *pstr = ndb_char_to_packed_str(str[0]);
+ return 1;
+ } else if (len == 2) {
+ *pstr = ndb_chars_to_packed_str(str[0], str[1]);
+ return 1;
+ }
// find existing matching string to avoid duplicate strings
- int indices = cursor_count(&builder->str_indices, sizeof(uint16_t));
+ int indices = cursor_count(&builder->str_indices, sizeof(uint32_t));
for (int i = 0; i < indices; i++) {
- uint16_t index = ((uint16_t*)builder->str_indices.start)[i];
- const char *some_str = ((const char*)builder->strings.start) + index;
+ uint32_t index = ((uint32_t*)builder->str_indices.start)[i];
+ const char *some_str = (const char*)builder->strings.start + index;
if (!strcmp(some_str, str)) {
// found an existing matching str, use that index
- *pstr = index;
+ *pstr = ndb_offset_str(index);
return 1;
}
}
@@ -146,11 +156,11 @@ int ndb_builder_make_string(struct ndb_builder *builder, const char *str,
cursor_push_byte(&builder->strings, '\0'))) {
return 0;
}
- *pstr = loc;
+ *pstr = ndb_offset_str(loc);
// record in builder indices. ignore return value, if we can't cache it
// then whatever
- cursor_push_u16(&builder->str_indices, loc);
+ cursor_push_u32(&builder->str_indices, loc);
return 1;
}
@@ -212,9 +222,9 @@ static inline int ndb_builder_process_json_tags(struct ndb_json_parser *p,
return 1;
for (int i = 0; i < array->size; i++) {
- if (!ndb_builder_tag_from_json_array(p, &tag[i+1]))
+ if (!ndb_builder_tag_from_json_array(p, &tag[i+1]))
return 0;
- tag += tag[i+1].size;
+ tag += tag[i+1].size;
}
return 1;
@@ -324,10 +334,10 @@ int ndb_builder_new_tag(struct ndb_builder *builder)
inline int ndb_builder_push_tag_str(struct ndb_builder *builder,
const char *str, int len)
{
- uint16_t str_ptr;
- if (!ndb_builder_make_string(builder, str, len, &str_ptr))
+ union packed_str pstr;
+ if (!ndb_builder_make_string(builder, str, len, &pstr))
return 0;
- if (!cursor_push_u16(&builder->note_cur, str_ptr))
+ if (!cursor_push_u32(&builder->note_cur, pstr.offset))
return 0;
builder->current_tag->count++;
return 1;
diff --git a/nostrdb.h b/nostrdb.h
@@ -2,22 +2,31 @@
#define NOSTRDB_H
#include <inttypes.h>
-#include <assert.h>
#include "cursor.h"
// these must be byte-aligned, they are directly accessing the serialized data
// representation
#pragma pack(push, 1)
+union packed_str {
+ uint32_t offset;
+
+ struct {
+ char str[3];
+ // we assume little endian everywhere. sorry not sorry.
+ unsigned char flag;
+ } packed;
+
+ unsigned char bytes[4];
+};
+
struct ndb_tag {
uint16_t count;
- uint16_t padding;
- uint16_t strs[0];
+ union packed_str strs[0];
};
struct ndb_tags {
uint16_t count;
- uint16_t padding;
struct ndb_tag tag[0];
};
@@ -31,8 +40,7 @@ struct ndb_note {
uint32_t created_at;
uint32_t kind;
- uint16_t content;
- uint16_t content_padding;
+ union packed_str content;
uint32_t strings;
uint32_t json;
@@ -71,15 +79,24 @@ int ndb_builder_new_tag(struct ndb_builder *builder);
int ndb_builder_push_tag_str(struct ndb_builder *builder, const char *str, int len);
// BYE BUILDER
-static inline const char * ndb_note_str(struct ndb_note *note, uint16_t str)
+static inline int ndb_str_is_packed(union packed_str str)
{
- return ((const char *)note) + note->strings + str;
+ return (str.offset >> 31) & 0x1;
+}
+
+static inline const char * ndb_note_str(struct ndb_note *note,
+ union packed_str *str)
+{
+ if (ndb_str_is_packed(*str))
+ return str->packed.str;
+
+ return ((const char *)note) + note->strings + str->offset;
}
static inline const char * ndb_tag_str(struct ndb_note *note,
struct ndb_tag *tag, int ind)
{
- return ndb_note_str(note, tag->strs[ind]);
+ return ndb_note_str(note, &tag->strs[ind]);
}
static inline int ndb_tag_matches_char(struct ndb_note *note,
@@ -121,7 +138,7 @@ static inline uint32_t ndb_note_created_at(struct ndb_note *note)
static inline const char * ndb_note_content(struct ndb_note *note)
{
- return ndb_note_str(note, note->content);
+ return ndb_note_str(note, ¬e->content);
}
static inline struct ndb_note * ndb_note_from_bytes(unsigned char *bytes)
@@ -132,6 +149,33 @@ static inline struct ndb_note * ndb_note_from_bytes(unsigned char *bytes)
return note;
}
+static inline union packed_str ndb_offset_str(uint32_t offset)
+{
+ // ensure accidents like -1 don't corrupt our packed_str
+ union packed_str str;
+ str.offset = offset & 0x7FFFFFFF;
+ return str;
+}
+
+static inline union packed_str ndb_char_to_packed_str(char c)
+{
+ union packed_str str;
+ str.packed.flag = 0xFF;
+ str.packed.str[0] = c;
+ str.packed.str[1] = '\0';
+ return str;
+}
+
+static inline union packed_str ndb_chars_to_packed_str(char c1, char c2)
+{
+ union packed_str str;
+ str.packed.flag = 0xFF;
+ str.packed.str[0] = c1;
+ str.packed.str[1] = c2;
+ str.packed.str[2] = '\0';
+ return str;
+}
+
static inline const char * ndb_note_tag_index(struct ndb_note *note,
struct ndb_tag *tag, int index)
{
@@ -139,7 +183,7 @@ static inline const char * ndb_note_tag_index(struct ndb_note *note,
return 0;
}
- return ndb_note_str(note, tag->strs[index]);
+ return ndb_note_str(note, &tag->strs[index]);
}
static inline int ndb_tags_iterate_start(struct ndb_note *note,
@@ -157,8 +201,8 @@ static inline int ndb_tags_iterate_next(struct ndb_iterator *iter)
struct ndb_tags *tags = &iter->note->tags;
if (++iter->index < tags->count) {
- uint16_t tag_data_size = iter->tag->count * sizeof(iter->tag->strs[0]);
- iter->tag = (struct ndb_tag *)(((unsigned char *)&iter->tag->strs[0]) + tag_data_size);
+ uint32_t tag_data_size = iter->tag->count * sizeof(iter->tag->strs[0]);
+ iter->tag = (struct ndb_tag *)(iter->tag->strs[0].bytes + tag_data_size);
return 1;
}
diff --git a/test.c b/test.c
@@ -61,8 +61,9 @@ static void test_basic_event() {
const char *p = ndb_iter_tag_str(it, 0);
const char *hpk = ndb_iter_tag_str(it, 1);
assert(hpk);
- assert(!strcmp(p, "p"));
+ assert(!ndb_str_is_packed(it->tag->strs[1]));
assert(!strcmp(hpk, hex_pk));
+ assert(!strcmp(p, "p"));
ok = ndb_tags_iterate_next(it);
assert(ok);