nostrdb

an unfairly fast embedded nostr database backed by lmdb
git clone git://jb55.com/nostrdb
Log | Files | Refs | Submodules | README | LICENSE

commit d6b14c0fc3010e640246018b4ec2a52f8cd4973f
parent 6fddb176492af955df0e9682b19cc2342efd93c3
Author: William Casarin <jb55@jb55.com>
Date:   Thu,  8 Feb 2024 15:07:06 -0800

filter: add ndb_filter_end

This is a pretty scary looking function that realloc our large variable
filter buffer into a compact one. This saves up a bunch of memory when
we are done building the filter.

Signed-off-by: William Casarin <jb55@jb55.com>

Diffstat:
Msrc/nostrdb.c | 95++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
Msrc/nostrdb.h | 3++-
Mtest.c | 16+++++++++++++---
3 files changed, 102 insertions(+), 12 deletions(-)

diff --git a/src/nostrdb.c b/src/nostrdb.c @@ -509,6 +509,92 @@ static void lowercase_strncpy(char *dst, const char *src, int n) { } } +static inline int ndb_filter_elem_is_ptr(struct ndb_filter_field *field) { + return field->elem_type == NDB_ELEMENT_STRING || field->elem_type == NDB_ELEMENT_ID; +} + +// "Finalize" the filter. This resizes the allocated heap buffers so that they +// are as small as possible. This also prevents new fields from being added +int ndb_filter_end(struct ndb_filter *filter) +{ + int i, k; +#ifdef DEBUG + size_t orig_size; +#endif + size_t data_len, elem_len; + struct ndb_filter_elements *els; + if (filter->finalized == 1) + return 0; + + // move the data buffer to the end of the element buffer and update + // all of the element pointers accordingly + data_len = filter->data_buf.p - filter->data_buf.start; + elem_len = filter->elem_buf.p - filter->elem_buf.start; +#ifdef DEBUG + orig_size = filter->data_buf.end - filter->elem_buf.start; +#endif + + // first we delta-ize the element pointers so that they are relative to + // the start of the delta buf. we will re-add these after we move the + // memory + for (i = 0; i < filter->num_elements; i++) { + els = filter->elements[i]; + + // realloc could move this whole thing, so subtract + // the element pointers as well + filter->elements[i] = (struct ndb_filter_elements *) + ((unsigned char *)filter->elements[i] - + filter->elem_buf.start); + + if (!ndb_filter_elem_is_ptr(&els->field)) + continue; + + for (k = 0; k < els->count; k++) { + els->elements[k].id = (unsigned char *) + ((unsigned char *)els->elements[k].id - + filter->data_buf.start); + } + } + + // cap the elem buff + filter->elem_buf.end = filter->elem_buf.p; + + // move the data buffer to the end of the element buffer + memmove(filter->elem_buf.p, filter->data_buf.start, data_len); + + // realloc the whole thing + filter->elem_buf.start = realloc(filter->elem_buf.start, elem_len + data_len); + filter->elem_buf.end = filter->elem_buf.start + elem_len; + filter->elem_buf.p = filter->elem_buf.end; + + filter->data_buf.start = filter->elem_buf.end; + filter->data_buf.end = filter->data_buf.start + data_len; + filter->data_buf.p = filter->data_buf.end; + + // un-deltaize the pointers + for (i = 0; i < filter->num_elements; i++) { + filter->elements[i] = (struct ndb_filter_elements *) + ((unsigned char *)filter->elem_buf.start + + (size_t)filter->elements[i]); + els = filter->elements[i]; + + if (!ndb_filter_elem_is_ptr(&els->field)) + continue; + + for (k = 0; k < els->count; k++) { + els->elements[k].id = + (size_t)els->elements[k].id + + filter->data_buf.start; + } + } + + filter->finalized = 1; + + ndb_debug("ndb_filter_end: %ld -> %ld\n", orig_size, elem_len + data_len); + + return 1; +} + int ndb_filter_init(struct ndb_filter *filter) { struct cursor cur; @@ -538,18 +624,11 @@ int ndb_filter_init(struct ndb_filter *filter) filter->num_elements = 0; filter->elements[0] = (struct ndb_filter_elements*) buf; filter->current = NULL; + filter->finalized = 0; return 1; } -void ndb_filter_reset(struct ndb_filter *filter) -{ - filter->num_elements = 0; - filter->elem_buf.p = filter->elem_buf.start; - filter->data_buf.p = filter->data_buf.start; - filter->current = NULL; -} - void ndb_filter_destroy(struct ndb_filter *filter) { if (filter->elem_buf.start) diff --git a/src/nostrdb.h b/src/nostrdb.h @@ -241,6 +241,7 @@ struct ndb_filter { struct cursor elem_buf; struct cursor data_buf; int num_elements; + int finalized; struct ndb_filter_elements *current; struct ndb_filter_elements *elements[NDB_NUM_FILTERS]; }; @@ -477,7 +478,7 @@ int ndb_filter_add_str_element(struct ndb_filter *, const char *str); int ndb_filter_start_field(struct ndb_filter *, enum ndb_filter_fieldtype); int ndb_filter_start_tag_field(struct ndb_filter *, char tag); int ndb_filter_matches(struct ndb_filter *, struct ndb_note *); -void ndb_filter_reset(struct ndb_filter *); +int ndb_filter_end(struct ndb_filter *); void ndb_filter_end_field(struct ndb_filter *); void ndb_filter_destroy(struct ndb_filter *); diff --git a/test.c b/test.c @@ -70,6 +70,7 @@ static void test_filters() assert(ndb_filter_start_field(f, NDB_FILTER_KINDS) == 0); assert(ndb_filter_start_field(f, NDB_FILTER_TAGS) == 0); ndb_filter_end_field(f); + ndb_filter_end(f); // should be sorted after end assert(current->elements[0].integer == 2); @@ -88,7 +89,8 @@ static void test_filters() assert(ndb_filter_matches(f, note)); // don't free, just reset data pointers - ndb_filter_reset(f); + ndb_filter_destroy(f); + ndb_filter_init(f); // now try generic matches assert(ndb_filter_start_tag_field(f, 't')); @@ -97,6 +99,7 @@ static void test_filters() assert(ndb_filter_start_field(f, NDB_FILTER_KINDS)); assert(ndb_filter_add_int_element(f, 3)); ndb_filter_end_field(f); + ndb_filter_end(f); // shouldn't match the kind filter assert(!ndb_filter_matches(f, note)); @@ -106,10 +109,12 @@ static void test_filters() // now it should assert(ndb_filter_matches(f, note)); - ndb_filter_reset(f); + ndb_filter_destroy(f); + ndb_filter_init(f); assert(ndb_filter_start_field(f, NDB_FILTER_AUTHORS)); assert(ndb_filter_add_id_element(f, ndb_note_pubkey(note))); ndb_filter_end_field(f); + ndb_filter_end(f); assert(f->current == NULL); assert(ndb_filter_matches(f, note)); @@ -1164,6 +1169,7 @@ static void test_tag_query() ndb_filter_start_tag_field(f, 't'); ndb_filter_add_str_element(f, "hashtag"); ndb_filter_end_field(f); + ndb_filter_end(f); assert((subid = ndb_subscribe(ndb, f, 1))); assert(ndb_process_event(ndb, ev, strlen(ev))); @@ -1222,6 +1228,7 @@ static void test_query() ndb_filter_add_id_element(f, id); ndb_filter_add_id_element(f, id2); ndb_filter_end_field(f); + ndb_filter_end(f); assert((subid = ndb_subscribe(ndb, f, 1))); @@ -1238,13 +1245,15 @@ static void test_query() assert(count == 2); assert(0 == memcmp(ndb_note_id(results[0].note), id2, 32)); - ndb_filter_reset(f); + ndb_filter_destroy(f); + ndb_filter_init(f); ndb_filter_start_field(f, NDB_FILTER_KINDS); ndb_filter_add_int_element(f, 2); ndb_filter_end_field(f); ndb_filter_start_field(f, NDB_FILTER_LIMIT); ndb_filter_add_int_element(f, 2); ndb_filter_end_field(f); + ndb_filter_end(f); count = 0; assert(ndb_query(&txn, f, 1, results, cap, &count)); @@ -1358,6 +1367,7 @@ static void test_subscriptions() assert(ndb_filter_start_field(f, NDB_FILTER_KINDS)); assert(ndb_filter_add_int_element(f, 1337)); ndb_filter_end_field(f); + ndb_filter_end(f); assert((subid = ndb_subscribe(ndb, f, 1)));