commit 9adb0b84dba5279e0e903b1e572d59750ed42ea0
parent b69e24109072e6c9712e254b751d886d810b4ab7
Author: William Casarin <jb55@jb55.com>
Date: Tue, 8 Apr 2025 16:29:16 -0700
filter: add initial custom filtering logic
This adds some helpers for adding custom filtering logic
to nostr filters. These are just a callback and a closure.
There can only be one custom callback filter per filter.
Fixes: https://github.com/damus-io/nostrdb/issues/33
Signed-off-by: William Casarin <jb55@jb55.com>
Diffstat:
M | src/nostrdb.c | | | 74 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | src/nostrdb.h | | | 15 | ++++++++++++++- |
M | test.c | | | 98 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
3 files changed, 186 insertions(+), 1 deletion(-)
diff --git a/src/nostrdb.c b/src/nostrdb.c
@@ -671,6 +671,12 @@ ndb_filter_elements_data(const struct ndb_filter *filter, int offset)
return data;
}
+struct ndb_filter_custom *
+ndb_filter_get_custom_element(const struct ndb_filter *filter, const struct ndb_filter_elements *els)
+{
+ return (struct ndb_filter_custom *)ndb_filter_elements_data(filter, els->elements[0]);
+}
+
unsigned char *
ndb_filter_get_id_element(const struct ndb_filter *filter, const struct ndb_filter_elements *els, int index)
{
@@ -754,6 +760,7 @@ static const char *ndb_filter_field_name(enum ndb_filter_fieldtype field)
case NDB_FILTER_LIMIT: return "limit";
case NDB_FILTER_SEARCH: return "search";
case NDB_FILTER_RELAYS: return "relays";
+ case NDB_FILTER_CUSTOM: return "custom";
}
return "unknown";
@@ -821,6 +828,10 @@ static int ndb_filter_add_element(struct ndb_filter *filter, union ndb_filter_el
offset = filter->data_buf.p - filter->data_buf.start;
switch (current->field.type) {
+ case NDB_FILTER_CUSTOM:
+ if (!cursor_push(&filter->data_buf, (unsigned char *)&el, sizeof(el)))
+ return 0;
+ break;
case NDB_FILTER_IDS:
case NDB_FILTER_AUTHORS:
if (!cursor_push(&filter->data_buf, (unsigned char *)el.id, 32))
@@ -861,6 +872,7 @@ static int ndb_filter_add_element(struct ndb_filter *filter, union ndb_filter_el
case NDB_ELEMENT_INT:
// ints are not allowed in tag filters
case NDB_ELEMENT_UNKNOWN:
+ case NDB_ELEMENT_CUSTOM:
return 0;
}
// push a pointer of the string in the databuf as an element
@@ -925,6 +937,7 @@ int ndb_filter_add_str_element_len(struct ndb_filter *filter, const char *str, i
case NDB_FILTER_IDS:
case NDB_FILTER_AUTHORS:
case NDB_FILTER_KINDS:
+ case NDB_FILTER_CUSTOM:
return 0;
case NDB_FILTER_SEARCH:
if (current->count == 1) {
@@ -951,6 +964,41 @@ int ndb_filter_add_str_element(struct ndb_filter *filter, const char *str)
return ndb_filter_add_str_element_len(filter, str, strlen(str));
}
+int ndb_filter_add_custom_filter_element(struct ndb_filter *filter, ndb_filter_callback_fn *cb, void *ctx)
+{
+ union ndb_filter_element el;
+ struct ndb_filter_elements *current;
+ struct ndb_filter_custom custom;
+
+ custom.cb = cb;
+ custom.ctx = ctx;
+
+ if (!(current = ndb_filter_current_element(filter)))
+ return 0;
+
+ switch (current->field.type) {
+ case NDB_FILTER_CUSTOM:
+ break;
+ case NDB_FILTER_IDS:
+ case NDB_FILTER_AUTHORS:
+ case NDB_FILTER_TAGS:
+ case NDB_FILTER_SEARCH:
+ case NDB_FILTER_RELAYS:
+ case NDB_FILTER_KINDS:
+ case NDB_FILTER_SINCE:
+ case NDB_FILTER_UNTIL:
+ case NDB_FILTER_LIMIT:
+ return 0;
+ }
+
+ if (!ndb_filter_set_elem_type(filter, NDB_ELEMENT_CUSTOM))
+ return 0;
+
+ el.custom_filter = custom;
+
+ return ndb_filter_add_element(filter, el);
+}
+
int ndb_filter_add_int_element(struct ndb_filter *filter, uint64_t integer)
{
union ndb_filter_element el;
@@ -964,6 +1012,7 @@ int ndb_filter_add_int_element(struct ndb_filter *filter, uint64_t integer)
case NDB_FILTER_TAGS:
case NDB_FILTER_SEARCH:
case NDB_FILTER_RELAYS:
+ case NDB_FILTER_CUSTOM:
return 0;
case NDB_FILTER_KINDS:
case NDB_FILTER_SINCE:
@@ -996,6 +1045,7 @@ int ndb_filter_add_id_element(struct ndb_filter *filter, const unsigned char *id
case NDB_FILTER_KINDS:
case NDB_FILTER_SEARCH:
case NDB_FILTER_RELAYS:
+ case NDB_FILTER_CUSTOM:
return 0;
case NDB_FILTER_IDS:
case NDB_FILTER_AUTHORS:
@@ -1079,6 +1129,7 @@ static int ndb_tag_filter_matches(struct ndb_filter *filter,
case NDB_ELEMENT_INT:
// int elements int tag queries are not supported
case NDB_ELEMENT_UNKNOWN:
+ case NDB_ELEMENT_CUSTOM:
return 0;
}
}
@@ -1166,6 +1217,7 @@ static int ndb_filter_matches_with(struct ndb_filter *filter,
int i, j;
struct ndb_filter_elements *els;
struct search_id_state state;
+ struct ndb_filter_custom *custom;
state.filter = filter;
@@ -1244,6 +1296,12 @@ static int ndb_filter_matches_with(struct ndb_filter *filter,
// the search index will be walked for these kinds
// of queries.
continue;
+ case NDB_FILTER_CUSTOM:
+ custom = ndb_filter_get_custom_element(filter, els);
+ if (custom->cb(custom->ctx, note))
+ continue;
+ break;
+
case NDB_FILTER_LIMIT:
cont:
continue;
@@ -1297,6 +1355,7 @@ static int ndb_filter_field_eq(struct ndb_filter *a_filt,
const char *a_str, *b_str;
unsigned char *a_id, *b_id;
uint64_t a_int, b_int;
+ struct ndb_filter_custom *a_custom, *b_custom;
if (a_field->count != b_field->count)
return 0;
@@ -1318,6 +1377,11 @@ static int ndb_filter_field_eq(struct ndb_filter *a_filt,
for (i = 0; i < a_field->count; i++) {
switch (a_field->field.elem_type) {
+ case NDB_ELEMENT_CUSTOM:
+ a_custom = ndb_filter_get_custom_element(a_filt, a_field);
+ b_custom = ndb_filter_get_custom_element(b_filt, b_field);
+ if (memcmp(a_custom, b_custom, sizeof(*a_custom)))
+ return 0;
case NDB_ELEMENT_UNKNOWN:
return 0;
case NDB_ELEMENT_STRING:
@@ -1373,6 +1437,7 @@ void ndb_filter_end_field(struct ndb_filter *filter)
// TODO: generic tag search sorting
break;
case NDB_FILTER_SINCE:
+ case NDB_FILTER_CUSTOM:
case NDB_FILTER_UNTIL:
case NDB_FILTER_LIMIT:
case NDB_FILTER_SEARCH:
@@ -6412,6 +6477,9 @@ static int cursor_push_json_elem_array(struct cursor *cur,
for (i = 0; i < elems->count; i++) {
switch (elems->field.elem_type) {
+ case NDB_ELEMENT_CUSTOM:
+ // can't serialize custom functions
+ break;
case NDB_ELEMENT_STRING:
str = ndb_filter_get_string_element(filter, elems, i);
if (!cursor_push_jsonstr(cur, str))
@@ -6464,6 +6532,9 @@ int ndb_filter_json(const struct ndb_filter *filter, char *buf, int buflen)
for (i = 0; i < filter->num_elements; i++) {
elems = ndb_filter_get_elements(filter, i);
switch (elems->field.type) {
+ case NDB_FILTER_CUSTOM:
+ // nothing to encode these as
+ break;
case NDB_FILTER_IDS:
if (!cursor_push_str(c, "\"ids\":"))
return 0;
@@ -7325,6 +7396,9 @@ static int ndb_filter_parse_json(struct ndb_json_parser *parser,
// we parsed a top-level field
switch(field) {
+ case NDB_FILTER_CUSTOM:
+ // can't really parse these yet
+ break;
case NDB_FILTER_AUTHORS:
case NDB_FILTER_IDS:
if (!ndb_filter_parse_json_ids(parser, filter)) {
diff --git a/src/nostrdb.h b/src/nostrdb.h
@@ -2,6 +2,7 @@
#define NOSTRDB_H
#include <inttypes.h>
+#include <stdbool.h>
#include "win.h"
#include "cursor.h"
@@ -48,6 +49,7 @@ struct ndb_t {
};
struct ndb_str {
+ // NDB_PACKED_STR, NDB_PACKED_ID
unsigned char flag;
union {
const char *str;
@@ -163,8 +165,9 @@ enum ndb_filter_fieldtype {
NDB_FILTER_LIMIT = 7,
NDB_FILTER_SEARCH = 8,
NDB_FILTER_RELAYS = 9,
+ NDB_FILTER_CUSTOM = 10,
};
-#define NDB_NUM_FILTERS 7
+#define NDB_NUM_FILTERS 10
// when matching generic tags, we need to know if we're dealing with
// a pointer to a 32-byte ID or a null terminated string
@@ -173,6 +176,7 @@ enum ndb_generic_element_type {
NDB_ELEMENT_STRING = 1,
NDB_ELEMENT_ID = 2,
NDB_ELEMENT_INT = 3,
+ NDB_ELEMENT_CUSTOM = 4,
};
enum ndb_search_order {
@@ -250,10 +254,18 @@ struct ndb_filter_string {
int len;
};
+typedef bool ndb_filter_callback_fn(void *, struct ndb_note *);
+
+struct ndb_filter_custom {
+ void *ctx;
+ ndb_filter_callback_fn *cb;
+};
+
union ndb_filter_element {
struct ndb_filter_string string;
const unsigned char *id;
uint64_t integer;
+ struct ndb_filter_custom custom_filter;
};
struct ndb_filter_field {
@@ -546,6 +558,7 @@ int ndb_filter_init_with(struct ndb_filter *filter, int pages);
int ndb_filter_add_id_element(struct ndb_filter *, const unsigned char *id);
int ndb_filter_add_int_element(struct ndb_filter *, uint64_t integer);
int ndb_filter_add_str_element(struct ndb_filter *, const char *str);
+int ndb_filter_add_custom_filter_element(struct ndb_filter *filter, ndb_filter_callback_fn *cb, void *ctx);
int ndb_filter_eq(const struct ndb_filter *, const struct ndb_filter *);
/// is `a` a subset of `b`
diff --git a/test.c b/test.c
@@ -1901,7 +1901,105 @@ static void test_nip50_profile_search() {
printf("ok test_nip50_profile_search\n");
}
+static bool only_threads_filter(void *ctx, struct ndb_note *note)
+{
+ struct ndb_iterator it;
+ struct ndb_str str;
+ const char *c;
+
+ ndb_tags_iterate_start(note, &it);
+
+ while (ndb_tags_iterate_next(&it)) {
+ if (ndb_tag_count(it.tag) < 4)
+ continue;
+
+ str = ndb_iter_tag_str(&it, 0);
+ if (str.str[0] != 'e')
+ continue;
+
+ str = ndb_iter_tag_str(&it, 3);
+ if (str.flag == NDB_PACKED_ID)
+ continue;
+
+ if (str.str[0] != 'r')
+ continue;
+
+ // if it has a reply or root marker, then this is a reply
+ c = &str.str[1];
+ if (*c++ == 'e' && *c++ == 'p' && *c++ == 'l' && *c++ == 'y' && *c++ == '\0') {
+ return false;
+ }
+
+ c = &str.str[1];
+ if (*c++ == 'o' && *c++ == 'o' && *c++ == 't' && *c++ == '\0') {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static void test_custom_filter()
+{
+ struct ndb *ndb;
+ struct ndb_txn txn;
+ struct ndb_config config;
+ struct ndb_filter filter, *f = &filter;
+ struct ndb_filter filter2, *f2 = &filter2;
+ int count;
+ uint64_t sub_id, note_key;
+ struct ndb_query_result results[2];
+ struct ndb_ingest_meta meta;
+
+ const char *root = "[\"EVENT\",{\"id\":\"3d3fba391ce6f83cf336b161f3de90bb2610c20dfb9f4de3a6dacb6b11362971\",\"pubkey\":\"32e1827635450ebb3c5a7d12c1f8e7b2b514439ac10a67eef3d9fd9c5c68e245\",\"created_at\":1744084064,\"kind\":1,\"tags\":[],\"content\":\"dire wolves are back LFG\",\"sig\":\"340a6ee8a859a1d78e50551dae7b24aaba7137647a6ac295acc97faa06ba33310593f5b4081dad188aee81266144f2312afb249939a2e07c14ca167af08e998f\"}]";
+
+ const char *reply_json = "[\"EVENT\",{\"id\":\"3a338522ee1e27056acccee65849de8deba426db1c71cbd61d105280bbb67ed2\",\"pubkey\":\"7cc328a08ddb2afdf9f9be77beff4c83489ff979721827d628a542f32a247c0e\",\"created_at\":1744151551,\"kind\":1,\"tags\":[[\"alt\",\"A short note: pokerdeck ♠️🦍🐧\"],[\"e\",\"086ccb1873fa10d4338713f24b034e17e543d8ad79c15ff39cf59f4d0cb7a2d6\",\"wss://nostr.wine/\",\"root\",\"32e1827635450ebb3c5a7d12c1f8e7b2b514439ac10a67eef3d9fd9c5c68e245\"],[\"p\",\"32e1827635450ebb3c5a7d12c1f8e7b2b514439ac10a67eef3d9fd9c5c68e245\",\"wss://nostr.wine\"]],\"content\":\"pokerdeck ♠️🦍🐧\",\"sig\":\"1099e47ba1ba962a8f2617c139e240d0369d81e70241dce7e73340e3230d8a3039c07114ed21f0e765e40f1b71fc2770fa5585994d27d2ece3b14e7b98f988d3\"}]";
+
+ ndb_default_config(&config);
+ assert(ndb_init(&ndb, test_dir, &config));
+
+ assert(ndb_filter_init(f));
+ assert(ndb_filter_start_field(f, NDB_FILTER_KINDS));
+ assert(ndb_filter_add_int_element(f, 1));
+ ndb_filter_end_field(f);
+ ndb_filter_end(f);
+
+ assert(ndb_filter_init(f));
+ assert(ndb_filter_start_field(f, NDB_FILTER_CUSTOM));
+ assert(ndb_filter_add_custom_filter_element(f, only_threads_filter, NULL));
+ ndb_filter_end_field(f);
+ ndb_filter_end(f);
+
+ assert(ndb_filter_init(f2));
+ assert(ndb_filter_start_field(f2, NDB_FILTER_KINDS));
+ assert(ndb_filter_add_int_element(f2, 1));
+ ndb_filter_end_field(f2);
+ ndb_filter_end(f2);
+
+ sub_id = ndb_subscribe(ndb, f2, 1);
+
+ ndb_ingest_meta_init(&meta, 1, "test");
+
+ assert(ndb_process_event_with(ndb, root, strlen(root), &meta));
+ assert(ndb_process_event_with(ndb, reply_json, strlen(reply_json), &meta));
+
+ assert(ndb_wait_for_notes(ndb, sub_id, ¬e_key, 2) == 2);
+
+ ndb_begin_query(ndb, &txn);
+ ndb_query(&txn, f, 1, results, 2, &count);
+ ndb_end_query(&txn);
+
+ assert(count == 1);
+ assert(ndb_note_id(results[0].note)[0] == 0x3d);
+
+ // Cleanup
+ ndb_destroy(ndb);
+
+ printf("ok test_custom_filter\n");
+}
+
int main(int argc, const char *argv[]) {
+ test_custom_filter();
test_note_relay_index();
test_filter_search();
test_filter_parse_search_json();