commit d6b14c0fc3010e640246018b4ec2a52f8cd4973f
parent 6fddb176492af955df0e9682b19cc2342efd93c3
Author: William Casarin <jb55@jb55.com>
Date: Thu, 8 Feb 2024 15:07:06 -0800
filter: add ndb_filter_end
This is a pretty scary looking function that realloc our large variable
filter buffer into a compact one. This saves up a bunch of memory when
we are done building the filter.
Signed-off-by: William Casarin <jb55@jb55.com>
Diffstat:
3 files changed, 102 insertions(+), 12 deletions(-)
diff --git a/src/nostrdb.c b/src/nostrdb.c
@@ -509,6 +509,92 @@ static void lowercase_strncpy(char *dst, const char *src, int n) {
}
}
+static inline int ndb_filter_elem_is_ptr(struct ndb_filter_field *field) {
+ return field->elem_type == NDB_ELEMENT_STRING || field->elem_type == NDB_ELEMENT_ID;
+}
+
+// "Finalize" the filter. This resizes the allocated heap buffers so that they
+// are as small as possible. This also prevents new fields from being added
+int ndb_filter_end(struct ndb_filter *filter)
+{
+ int i, k;
+#ifdef DEBUG
+ size_t orig_size;
+#endif
+ size_t data_len, elem_len;
+ struct ndb_filter_elements *els;
+ if (filter->finalized == 1)
+ return 0;
+
+ // move the data buffer to the end of the element buffer and update
+ // all of the element pointers accordingly
+ data_len = filter->data_buf.p - filter->data_buf.start;
+ elem_len = filter->elem_buf.p - filter->elem_buf.start;
+#ifdef DEBUG
+ orig_size = filter->data_buf.end - filter->elem_buf.start;
+#endif
+
+ // first we delta-ize the element pointers so that they are relative to
+ // the start of the delta buf. we will re-add these after we move the
+ // memory
+ for (i = 0; i < filter->num_elements; i++) {
+ els = filter->elements[i];
+
+ // realloc could move this whole thing, so subtract
+ // the element pointers as well
+ filter->elements[i] = (struct ndb_filter_elements *)
+ ((unsigned char *)filter->elements[i] -
+ filter->elem_buf.start);
+
+ if (!ndb_filter_elem_is_ptr(&els->field))
+ continue;
+
+ for (k = 0; k < els->count; k++) {
+ els->elements[k].id = (unsigned char *)
+ ((unsigned char *)els->elements[k].id -
+ filter->data_buf.start);
+ }
+ }
+
+ // cap the elem buff
+ filter->elem_buf.end = filter->elem_buf.p;
+
+ // move the data buffer to the end of the element buffer
+ memmove(filter->elem_buf.p, filter->data_buf.start, data_len);
+
+ // realloc the whole thing
+ filter->elem_buf.start = realloc(filter->elem_buf.start, elem_len + data_len);
+ filter->elem_buf.end = filter->elem_buf.start + elem_len;
+ filter->elem_buf.p = filter->elem_buf.end;
+
+ filter->data_buf.start = filter->elem_buf.end;
+ filter->data_buf.end = filter->data_buf.start + data_len;
+ filter->data_buf.p = filter->data_buf.end;
+
+ // un-deltaize the pointers
+ for (i = 0; i < filter->num_elements; i++) {
+ filter->elements[i] = (struct ndb_filter_elements *)
+ ((unsigned char *)filter->elem_buf.start +
+ (size_t)filter->elements[i]);
+ els = filter->elements[i];
+
+ if (!ndb_filter_elem_is_ptr(&els->field))
+ continue;
+
+ for (k = 0; k < els->count; k++) {
+ els->elements[k].id =
+ (size_t)els->elements[k].id +
+ filter->data_buf.start;
+ }
+ }
+
+ filter->finalized = 1;
+
+ ndb_debug("ndb_filter_end: %ld -> %ld\n", orig_size, elem_len + data_len);
+
+ return 1;
+}
+
int ndb_filter_init(struct ndb_filter *filter)
{
struct cursor cur;
@@ -538,18 +624,11 @@ int ndb_filter_init(struct ndb_filter *filter)
filter->num_elements = 0;
filter->elements[0] = (struct ndb_filter_elements*) buf;
filter->current = NULL;
+ filter->finalized = 0;
return 1;
}
-void ndb_filter_reset(struct ndb_filter *filter)
-{
- filter->num_elements = 0;
- filter->elem_buf.p = filter->elem_buf.start;
- filter->data_buf.p = filter->data_buf.start;
- filter->current = NULL;
-}
-
void ndb_filter_destroy(struct ndb_filter *filter)
{
if (filter->elem_buf.start)
diff --git a/src/nostrdb.h b/src/nostrdb.h
@@ -241,6 +241,7 @@ struct ndb_filter {
struct cursor elem_buf;
struct cursor data_buf;
int num_elements;
+ int finalized;
struct ndb_filter_elements *current;
struct ndb_filter_elements *elements[NDB_NUM_FILTERS];
};
@@ -477,7 +478,7 @@ int ndb_filter_add_str_element(struct ndb_filter *, const char *str);
int ndb_filter_start_field(struct ndb_filter *, enum ndb_filter_fieldtype);
int ndb_filter_start_tag_field(struct ndb_filter *, char tag);
int ndb_filter_matches(struct ndb_filter *, struct ndb_note *);
-void ndb_filter_reset(struct ndb_filter *);
+int ndb_filter_end(struct ndb_filter *);
void ndb_filter_end_field(struct ndb_filter *);
void ndb_filter_destroy(struct ndb_filter *);
diff --git a/test.c b/test.c
@@ -70,6 +70,7 @@ static void test_filters()
assert(ndb_filter_start_field(f, NDB_FILTER_KINDS) == 0);
assert(ndb_filter_start_field(f, NDB_FILTER_TAGS) == 0);
ndb_filter_end_field(f);
+ ndb_filter_end(f);
// should be sorted after end
assert(current->elements[0].integer == 2);
@@ -88,7 +89,8 @@ static void test_filters()
assert(ndb_filter_matches(f, note));
// don't free, just reset data pointers
- ndb_filter_reset(f);
+ ndb_filter_destroy(f);
+ ndb_filter_init(f);
// now try generic matches
assert(ndb_filter_start_tag_field(f, 't'));
@@ -97,6 +99,7 @@ static void test_filters()
assert(ndb_filter_start_field(f, NDB_FILTER_KINDS));
assert(ndb_filter_add_int_element(f, 3));
ndb_filter_end_field(f);
+ ndb_filter_end(f);
// shouldn't match the kind filter
assert(!ndb_filter_matches(f, note));
@@ -106,10 +109,12 @@ static void test_filters()
// now it should
assert(ndb_filter_matches(f, note));
- ndb_filter_reset(f);
+ ndb_filter_destroy(f);
+ ndb_filter_init(f);
assert(ndb_filter_start_field(f, NDB_FILTER_AUTHORS));
assert(ndb_filter_add_id_element(f, ndb_note_pubkey(note)));
ndb_filter_end_field(f);
+ ndb_filter_end(f);
assert(f->current == NULL);
assert(ndb_filter_matches(f, note));
@@ -1164,6 +1169,7 @@ static void test_tag_query()
ndb_filter_start_tag_field(f, 't');
ndb_filter_add_str_element(f, "hashtag");
ndb_filter_end_field(f);
+ ndb_filter_end(f);
assert((subid = ndb_subscribe(ndb, f, 1)));
assert(ndb_process_event(ndb, ev, strlen(ev)));
@@ -1222,6 +1228,7 @@ static void test_query()
ndb_filter_add_id_element(f, id);
ndb_filter_add_id_element(f, id2);
ndb_filter_end_field(f);
+ ndb_filter_end(f);
assert((subid = ndb_subscribe(ndb, f, 1)));
@@ -1238,13 +1245,15 @@ static void test_query()
assert(count == 2);
assert(0 == memcmp(ndb_note_id(results[0].note), id2, 32));
- ndb_filter_reset(f);
+ ndb_filter_destroy(f);
+ ndb_filter_init(f);
ndb_filter_start_field(f, NDB_FILTER_KINDS);
ndb_filter_add_int_element(f, 2);
ndb_filter_end_field(f);
ndb_filter_start_field(f, NDB_FILTER_LIMIT);
ndb_filter_add_int_element(f, 2);
ndb_filter_end_field(f);
+ ndb_filter_end(f);
count = 0;
assert(ndb_query(&txn, f, 1, results, cap, &count));
@@ -1358,6 +1367,7 @@ static void test_subscriptions()
assert(ndb_filter_start_field(f, NDB_FILTER_KINDS));
assert(ndb_filter_add_int_element(f, 1337));
ndb_filter_end_field(f);
+ ndb_filter_end(f);
assert((subid = ndb_subscribe(ndb, f, 1)));