nostrdb

an unfairly fast embedded nostr database backed by lmdb
git clone git://jb55.com/nostrdb
Log | Files | Refs | Submodules | README | LICENSE

commit 9adb0b84dba5279e0e903b1e572d59750ed42ea0
parent b69e24109072e6c9712e254b751d886d810b4ab7
Author: William Casarin <jb55@jb55.com>
Date:   Tue,  8 Apr 2025 16:29:16 -0700

filter: add initial custom filtering logic

This adds some helpers for adding custom filtering logic
to nostr filters. These are just a callback and a closure.
There can only be one custom callback filter per filter.

Fixes: https://github.com/damus-io/nostrdb/issues/33
Signed-off-by: William Casarin <jb55@jb55.com>

Diffstat:
Msrc/nostrdb.c | 74++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/nostrdb.h | 15++++++++++++++-
Mtest.c | 98+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 186 insertions(+), 1 deletion(-)

diff --git a/src/nostrdb.c b/src/nostrdb.c @@ -671,6 +671,12 @@ ndb_filter_elements_data(const struct ndb_filter *filter, int offset) return data; } +struct ndb_filter_custom * +ndb_filter_get_custom_element(const struct ndb_filter *filter, const struct ndb_filter_elements *els) +{ + return (struct ndb_filter_custom *)ndb_filter_elements_data(filter, els->elements[0]); +} + unsigned char * ndb_filter_get_id_element(const struct ndb_filter *filter, const struct ndb_filter_elements *els, int index) { @@ -754,6 +760,7 @@ static const char *ndb_filter_field_name(enum ndb_filter_fieldtype field) case NDB_FILTER_LIMIT: return "limit"; case NDB_FILTER_SEARCH: return "search"; case NDB_FILTER_RELAYS: return "relays"; + case NDB_FILTER_CUSTOM: return "custom"; } return "unknown"; @@ -821,6 +828,10 @@ static int ndb_filter_add_element(struct ndb_filter *filter, union ndb_filter_el offset = filter->data_buf.p - filter->data_buf.start; switch (current->field.type) { + case NDB_FILTER_CUSTOM: + if (!cursor_push(&filter->data_buf, (unsigned char *)&el, sizeof(el))) + return 0; + break; case NDB_FILTER_IDS: case NDB_FILTER_AUTHORS: if (!cursor_push(&filter->data_buf, (unsigned char *)el.id, 32)) @@ -861,6 +872,7 @@ static int ndb_filter_add_element(struct ndb_filter *filter, union ndb_filter_el case NDB_ELEMENT_INT: // ints are not allowed in tag filters case NDB_ELEMENT_UNKNOWN: + case NDB_ELEMENT_CUSTOM: return 0; } // push a pointer of the string in the databuf as an element @@ -925,6 +937,7 @@ int ndb_filter_add_str_element_len(struct ndb_filter *filter, const char *str, i case NDB_FILTER_IDS: case NDB_FILTER_AUTHORS: case NDB_FILTER_KINDS: + case NDB_FILTER_CUSTOM: return 0; case NDB_FILTER_SEARCH: if (current->count == 1) { @@ -951,6 +964,41 @@ int ndb_filter_add_str_element(struct ndb_filter *filter, const char *str) return ndb_filter_add_str_element_len(filter, str, strlen(str)); } +int ndb_filter_add_custom_filter_element(struct ndb_filter *filter, ndb_filter_callback_fn *cb, void *ctx) +{ + union ndb_filter_element el; + struct ndb_filter_elements *current; + struct ndb_filter_custom custom; + + custom.cb = cb; + custom.ctx = ctx; + + if (!(current = ndb_filter_current_element(filter))) + return 0; + + switch (current->field.type) { + case NDB_FILTER_CUSTOM: + break; + case NDB_FILTER_IDS: + case NDB_FILTER_AUTHORS: + case NDB_FILTER_TAGS: + case NDB_FILTER_SEARCH: + case NDB_FILTER_RELAYS: + case NDB_FILTER_KINDS: + case NDB_FILTER_SINCE: + case NDB_FILTER_UNTIL: + case NDB_FILTER_LIMIT: + return 0; + } + + if (!ndb_filter_set_elem_type(filter, NDB_ELEMENT_CUSTOM)) + return 0; + + el.custom_filter = custom; + + return ndb_filter_add_element(filter, el); +} + int ndb_filter_add_int_element(struct ndb_filter *filter, uint64_t integer) { union ndb_filter_element el; @@ -964,6 +1012,7 @@ int ndb_filter_add_int_element(struct ndb_filter *filter, uint64_t integer) case NDB_FILTER_TAGS: case NDB_FILTER_SEARCH: case NDB_FILTER_RELAYS: + case NDB_FILTER_CUSTOM: return 0; case NDB_FILTER_KINDS: case NDB_FILTER_SINCE: @@ -996,6 +1045,7 @@ int ndb_filter_add_id_element(struct ndb_filter *filter, const unsigned char *id case NDB_FILTER_KINDS: case NDB_FILTER_SEARCH: case NDB_FILTER_RELAYS: + case NDB_FILTER_CUSTOM: return 0; case NDB_FILTER_IDS: case NDB_FILTER_AUTHORS: @@ -1079,6 +1129,7 @@ static int ndb_tag_filter_matches(struct ndb_filter *filter, case NDB_ELEMENT_INT: // int elements int tag queries are not supported case NDB_ELEMENT_UNKNOWN: + case NDB_ELEMENT_CUSTOM: return 0; } } @@ -1166,6 +1217,7 @@ static int ndb_filter_matches_with(struct ndb_filter *filter, int i, j; struct ndb_filter_elements *els; struct search_id_state state; + struct ndb_filter_custom *custom; state.filter = filter; @@ -1244,6 +1296,12 @@ static int ndb_filter_matches_with(struct ndb_filter *filter, // the search index will be walked for these kinds // of queries. continue; + case NDB_FILTER_CUSTOM: + custom = ndb_filter_get_custom_element(filter, els); + if (custom->cb(custom->ctx, note)) + continue; + break; + case NDB_FILTER_LIMIT: cont: continue; @@ -1297,6 +1355,7 @@ static int ndb_filter_field_eq(struct ndb_filter *a_filt, const char *a_str, *b_str; unsigned char *a_id, *b_id; uint64_t a_int, b_int; + struct ndb_filter_custom *a_custom, *b_custom; if (a_field->count != b_field->count) return 0; @@ -1318,6 +1377,11 @@ static int ndb_filter_field_eq(struct ndb_filter *a_filt, for (i = 0; i < a_field->count; i++) { switch (a_field->field.elem_type) { + case NDB_ELEMENT_CUSTOM: + a_custom = ndb_filter_get_custom_element(a_filt, a_field); + b_custom = ndb_filter_get_custom_element(b_filt, b_field); + if (memcmp(a_custom, b_custom, sizeof(*a_custom))) + return 0; case NDB_ELEMENT_UNKNOWN: return 0; case NDB_ELEMENT_STRING: @@ -1373,6 +1437,7 @@ void ndb_filter_end_field(struct ndb_filter *filter) // TODO: generic tag search sorting break; case NDB_FILTER_SINCE: + case NDB_FILTER_CUSTOM: case NDB_FILTER_UNTIL: case NDB_FILTER_LIMIT: case NDB_FILTER_SEARCH: @@ -6412,6 +6477,9 @@ static int cursor_push_json_elem_array(struct cursor *cur, for (i = 0; i < elems->count; i++) { switch (elems->field.elem_type) { + case NDB_ELEMENT_CUSTOM: + // can't serialize custom functions + break; case NDB_ELEMENT_STRING: str = ndb_filter_get_string_element(filter, elems, i); if (!cursor_push_jsonstr(cur, str)) @@ -6464,6 +6532,9 @@ int ndb_filter_json(const struct ndb_filter *filter, char *buf, int buflen) for (i = 0; i < filter->num_elements; i++) { elems = ndb_filter_get_elements(filter, i); switch (elems->field.type) { + case NDB_FILTER_CUSTOM: + // nothing to encode these as + break; case NDB_FILTER_IDS: if (!cursor_push_str(c, "\"ids\":")) return 0; @@ -7325,6 +7396,9 @@ static int ndb_filter_parse_json(struct ndb_json_parser *parser, // we parsed a top-level field switch(field) { + case NDB_FILTER_CUSTOM: + // can't really parse these yet + break; case NDB_FILTER_AUTHORS: case NDB_FILTER_IDS: if (!ndb_filter_parse_json_ids(parser, filter)) { diff --git a/src/nostrdb.h b/src/nostrdb.h @@ -2,6 +2,7 @@ #define NOSTRDB_H #include <inttypes.h> +#include <stdbool.h> #include "win.h" #include "cursor.h" @@ -48,6 +49,7 @@ struct ndb_t { }; struct ndb_str { + // NDB_PACKED_STR, NDB_PACKED_ID unsigned char flag; union { const char *str; @@ -163,8 +165,9 @@ enum ndb_filter_fieldtype { NDB_FILTER_LIMIT = 7, NDB_FILTER_SEARCH = 8, NDB_FILTER_RELAYS = 9, + NDB_FILTER_CUSTOM = 10, }; -#define NDB_NUM_FILTERS 7 +#define NDB_NUM_FILTERS 10 // when matching generic tags, we need to know if we're dealing with // a pointer to a 32-byte ID or a null terminated string @@ -173,6 +176,7 @@ enum ndb_generic_element_type { NDB_ELEMENT_STRING = 1, NDB_ELEMENT_ID = 2, NDB_ELEMENT_INT = 3, + NDB_ELEMENT_CUSTOM = 4, }; enum ndb_search_order { @@ -250,10 +254,18 @@ struct ndb_filter_string { int len; }; +typedef bool ndb_filter_callback_fn(void *, struct ndb_note *); + +struct ndb_filter_custom { + void *ctx; + ndb_filter_callback_fn *cb; +}; + union ndb_filter_element { struct ndb_filter_string string; const unsigned char *id; uint64_t integer; + struct ndb_filter_custom custom_filter; }; struct ndb_filter_field { @@ -546,6 +558,7 @@ int ndb_filter_init_with(struct ndb_filter *filter, int pages); int ndb_filter_add_id_element(struct ndb_filter *, const unsigned char *id); int ndb_filter_add_int_element(struct ndb_filter *, uint64_t integer); int ndb_filter_add_str_element(struct ndb_filter *, const char *str); +int ndb_filter_add_custom_filter_element(struct ndb_filter *filter, ndb_filter_callback_fn *cb, void *ctx); int ndb_filter_eq(const struct ndb_filter *, const struct ndb_filter *); /// is `a` a subset of `b` diff --git a/test.c b/test.c @@ -1901,7 +1901,105 @@ static void test_nip50_profile_search() { printf("ok test_nip50_profile_search\n"); } +static bool only_threads_filter(void *ctx, struct ndb_note *note) +{ + struct ndb_iterator it; + struct ndb_str str; + const char *c; + + ndb_tags_iterate_start(note, &it); + + while (ndb_tags_iterate_next(&it)) { + if (ndb_tag_count(it.tag) < 4) + continue; + + str = ndb_iter_tag_str(&it, 0); + if (str.str[0] != 'e') + continue; + + str = ndb_iter_tag_str(&it, 3); + if (str.flag == NDB_PACKED_ID) + continue; + + if (str.str[0] != 'r') + continue; + + // if it has a reply or root marker, then this is a reply + c = &str.str[1]; + if (*c++ == 'e' && *c++ == 'p' && *c++ == 'l' && *c++ == 'y' && *c++ == '\0') { + return false; + } + + c = &str.str[1]; + if (*c++ == 'o' && *c++ == 'o' && *c++ == 't' && *c++ == '\0') { + return false; + } + } + + return true; +} + +static void test_custom_filter() +{ + struct ndb *ndb; + struct ndb_txn txn; + struct ndb_config config; + struct ndb_filter filter, *f = &filter; + struct ndb_filter filter2, *f2 = &filter2; + int count; + uint64_t sub_id, note_key; + struct ndb_query_result results[2]; + struct ndb_ingest_meta meta; + + const char *root = "[\"EVENT\",{\"id\":\"3d3fba391ce6f83cf336b161f3de90bb2610c20dfb9f4de3a6dacb6b11362971\",\"pubkey\":\"32e1827635450ebb3c5a7d12c1f8e7b2b514439ac10a67eef3d9fd9c5c68e245\",\"created_at\":1744084064,\"kind\":1,\"tags\":[],\"content\":\"dire wolves are back LFG\",\"sig\":\"340a6ee8a859a1d78e50551dae7b24aaba7137647a6ac295acc97faa06ba33310593f5b4081dad188aee81266144f2312afb249939a2e07c14ca167af08e998f\"}]"; + + const char *reply_json = "[\"EVENT\",{\"id\":\"3a338522ee1e27056acccee65849de8deba426db1c71cbd61d105280bbb67ed2\",\"pubkey\":\"7cc328a08ddb2afdf9f9be77beff4c83489ff979721827d628a542f32a247c0e\",\"created_at\":1744151551,\"kind\":1,\"tags\":[[\"alt\",\"A short note: pokerdeck ♠️🦍🐧\"],[\"e\",\"086ccb1873fa10d4338713f24b034e17e543d8ad79c15ff39cf59f4d0cb7a2d6\",\"wss://nostr.wine/\",\"root\",\"32e1827635450ebb3c5a7d12c1f8e7b2b514439ac10a67eef3d9fd9c5c68e245\"],[\"p\",\"32e1827635450ebb3c5a7d12c1f8e7b2b514439ac10a67eef3d9fd9c5c68e245\",\"wss://nostr.wine\"]],\"content\":\"pokerdeck ♠️🦍🐧\",\"sig\":\"1099e47ba1ba962a8f2617c139e240d0369d81e70241dce7e73340e3230d8a3039c07114ed21f0e765e40f1b71fc2770fa5585994d27d2ece3b14e7b98f988d3\"}]"; + + ndb_default_config(&config); + assert(ndb_init(&ndb, test_dir, &config)); + + assert(ndb_filter_init(f)); + assert(ndb_filter_start_field(f, NDB_FILTER_KINDS)); + assert(ndb_filter_add_int_element(f, 1)); + ndb_filter_end_field(f); + ndb_filter_end(f); + + assert(ndb_filter_init(f)); + assert(ndb_filter_start_field(f, NDB_FILTER_CUSTOM)); + assert(ndb_filter_add_custom_filter_element(f, only_threads_filter, NULL)); + ndb_filter_end_field(f); + ndb_filter_end(f); + + assert(ndb_filter_init(f2)); + assert(ndb_filter_start_field(f2, NDB_FILTER_KINDS)); + assert(ndb_filter_add_int_element(f2, 1)); + ndb_filter_end_field(f2); + ndb_filter_end(f2); + + sub_id = ndb_subscribe(ndb, f2, 1); + + ndb_ingest_meta_init(&meta, 1, "test"); + + assert(ndb_process_event_with(ndb, root, strlen(root), &meta)); + assert(ndb_process_event_with(ndb, reply_json, strlen(reply_json), &meta)); + + assert(ndb_wait_for_notes(ndb, sub_id, &note_key, 2) == 2); + + ndb_begin_query(ndb, &txn); + ndb_query(&txn, f, 1, results, 2, &count); + ndb_end_query(&txn); + + assert(count == 1); + assert(ndb_note_id(results[0].note)[0] == 0x3d); + + // Cleanup + ndb_destroy(ndb); + + printf("ok test_custom_filter\n"); +} + int main(int argc, const char *argv[]) { + test_custom_filter(); test_note_relay_index(); test_filter_search(); test_filter_parse_search_json();