nostrdb

an unfairly fast embedded nostr database backed by lmdb
git clone git://jb55.com/nostrdb
Log | Files | Refs | Submodules | README | LICENSE

commit 6a7f7929af8f1805c2c808c21b8b7ad4af4e6a3d
parent 9a3b613bd44e9d6d7c0ab592934b2dd6ba904e05
Author: William Casarin <jb55@jb55.com>
Date:   Mon, 13 Jan 2025 11:29:37 -0800

nip50: add support for search field in filters

We will be using this for our nip50 search support

Diffstat:
Msrc/nostrdb.c | 75++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Msrc/nostrdb.h | 1+
Mtest.c | 48++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 123 insertions(+), 1 deletion(-)

diff --git a/src/nostrdb.c b/src/nostrdb.c @@ -742,6 +742,7 @@ static const char *ndb_filter_field_name(enum ndb_filter_fieldtype field) case NDB_FILTER_SINCE: return "since"; case NDB_FILTER_UNTIL: return "until"; case NDB_FILTER_LIMIT: return "limit"; + case NDB_FILTER_SEARCH: return "search"; } return "unknown"; @@ -825,6 +826,15 @@ static int ndb_filter_add_element(struct ndb_filter *filter, union ndb_filter_el return 0; offset = el.integer; break; + case NDB_FILTER_SEARCH: + if (current->field.elem_type != NDB_ELEMENT_STRING) { + return 0; + } + if (!cursor_push(&filter->data_buf, (unsigned char *)el.string.string, el.string.len)) + return 0; + if (!cursor_push_byte(&filter->data_buf, 0)) + return 0; + break; case NDB_FILTER_TAGS: switch (current->field.elem_type) { case NDB_ELEMENT_ID: @@ -887,7 +897,7 @@ int ndb_filter_add_str_element_len(struct ndb_filter *filter, const char *str, i if (!(current = ndb_filter_current_element(filter))) return 0; - // only generic queries are allowed to have strings + // only generic tags and search queries are allowed to have strings switch (current->field.type) { case NDB_FILTER_SINCE: case NDB_FILTER_UNTIL: @@ -896,6 +906,12 @@ int ndb_filter_add_str_element_len(struct ndb_filter *filter, const char *str, i case NDB_FILTER_AUTHORS: case NDB_FILTER_KINDS: return 0; + case NDB_FILTER_SEARCH: + if (current->count == 1) { + // you can't add more than one string to a search + return 0; + } + break; case NDB_FILTER_TAGS: break; } @@ -925,6 +941,7 @@ int ndb_filter_add_int_element(struct ndb_filter *filter, uint64_t integer) case NDB_FILTER_IDS: case NDB_FILTER_AUTHORS: case NDB_FILTER_TAGS: + case NDB_FILTER_SEARCH: return 0; case NDB_FILTER_KINDS: case NDB_FILTER_SINCE: @@ -955,6 +972,7 @@ int ndb_filter_add_id_element(struct ndb_filter *filter, const unsigned char *id case NDB_FILTER_UNTIL: case NDB_FILTER_LIMIT: case NDB_FILTER_KINDS: + case NDB_FILTER_SEARCH: return 0; case NDB_FILTER_IDS: case NDB_FILTER_AUTHORS: @@ -1146,6 +1164,21 @@ static int ndb_filter_matches_with(struct ndb_filter *filter, assert(els->count == 1); if (note->created_at < els->elements[0]) continue; + break; + case NDB_FILTER_SEARCH: + // TODO: matching search filters will need an accelerated + // data structure, like our minimal perfect hashmap + // idea for mutewords. + // + // We'll also want to store tokenized words in the filter + // itself, so that we can walk over each word and check + // the hashmap to see if the note contains at least + // one word. + // + // For now we always return true, since we assume + // the search index will be walked for these kinds + // of queries. + continue; case NDB_FILTER_LIMIT: cont: continue; @@ -1267,6 +1300,7 @@ void ndb_filter_end_field(struct ndb_filter *filter) case NDB_FILTER_SINCE: case NDB_FILTER_UNTIL: case NDB_FILTER_LIMIT: + case NDB_FILTER_SEARCH: // don't need to sort these break; } @@ -5377,6 +5411,7 @@ static int cursor_push_json_elem_array(struct cursor *cur, int ndb_filter_json(const struct ndb_filter *filter, char *buf, int buflen) { + const char *str; struct cursor cur, *c = &cur; struct ndb_filter_elements *elems; int i; @@ -5400,6 +5435,14 @@ int ndb_filter_json(const struct ndb_filter *filter, char *buf, int buflen) if (!cursor_push_json_elem_array(c, filter, elems)) return 0; break; + case NDB_FILTER_SEARCH: + if (!cursor_push_str(c, "\"search\":")) + return 0; + if (!(str = ndb_filter_get_string_element(filter, elems, 0))) + return 0; + if (!cursor_push_jsonstr(c, str)) + return 0; + break; case NDB_FILTER_AUTHORS: if (!cursor_push_str(c, "\"authors\":")) return 0; @@ -6021,6 +6064,8 @@ ndb_filter_parse_field(const char *tok, int len, char *tagchar) return NDB_FILTER_UNTIL; } else if (len == 5 && !strncmp(tok, "limit", 5)) { return NDB_FILTER_LIMIT; + } else if (len == 6 && !strncmp(tok, "search", 6)) { + return NDB_FILTER_SEARCH; } return 0; @@ -6131,6 +6176,28 @@ static int ndb_filter_parse_json_elems(struct ndb_json_parser *parser, return 1; } +static int ndb_filter_parse_json_str(struct ndb_json_parser *parser, + struct ndb_filter *filter) +{ + jsmntok_t *tok; + const char *start; + int tok_len; + + tok = &parser->toks[parser->i]; + start = parser->json + tok->start; + tok_len = toksize(tok); + + if (tok->type != JSMN_STRING) + return 0; + + if (!ndb_filter_add_str_element_len(filter, start, tok_len)) + return 0; + + ndb_debug("added str elem '%.*s'\n", tok_len, start); + + return 1; +} + static int ndb_filter_parse_json_int(struct ndb_json_parser *parser, struct ndb_filter *filter) { @@ -6224,6 +6291,12 @@ static int ndb_filter_parse_json(struct ndb_json_parser *parser, return 0; } break; + case NDB_FILTER_SEARCH: + if (!ndb_filter_parse_json_str(parser, filter)) { + ndb_debug("failed to parse filter search str\n"); + return 0; + } + break; case NDB_FILTER_SINCE: case NDB_FILTER_UNTIL: case NDB_FILTER_LIMIT: diff --git a/src/nostrdb.h b/src/nostrdb.h @@ -156,6 +156,7 @@ enum ndb_filter_fieldtype { NDB_FILTER_SINCE = 5, NDB_FILTER_UNTIL = 6, NDB_FILTER_LIMIT = 7, + NDB_FILTER_SEARCH = 8, }; #define NDB_NUM_FILTERS 7 diff --git a/test.c b/test.c @@ -1733,7 +1733,55 @@ static void test_filter_is_subset() { assert(ndb_filter_is_subset_of(k, ki) == 0); } +static void test_filter_search() +{ + struct ndb_filter filter, *f = &filter; + + assert(ndb_filter_init_with(f, 2)); + + assert(ndb_filter_start_field(f, NDB_FILTER_SEARCH)); + assert(ndb_filter_add_str_element(f, "searchterm")); + assert(!ndb_filter_add_str_element(f, "searchterm 2")); + ndb_filter_end_field(f); + + assert(ndb_filter_end(f)); +} + +static void test_filter_parse_search_json() { + const char *json = "{\"search\":\"abc\",\"limit\":1}"; + unsigned char buf[1024]; + int i; + + struct ndb_filter filter, *f = &filter; + struct ndb_filter_elements *es; + + ndb_filter_init_with(f, 2); + assert(ndb_filter_from_json(json, strlen(json), f, buf, sizeof(buf))); + assert(filter.finalized); + + assert(f->num_elements == 2); + for (i = 0; i < f->num_elements; i++) { + es = ndb_filter_get_elements(f, i); + if (i == 0) { + assert(es->field.type == NDB_FILTER_SEARCH); + assert(es->count == 1); + assert(!strcmp(ndb_filter_get_string_element(f, es, 0), "abc")); + } else if (i == 1) { + assert(es->field.type == NDB_FILTER_LIMIT); + assert(es->count == 1); + assert(ndb_filter_get_int_element(es, 0) == 1); + } + } + + // test back to json + assert(ndb_filter_json(f, (char *)buf, sizeof(buf))); + printf("search json: '%s'\n", (const char *)buf); + assert(!strcmp((const char*)buf, json)); +} + int main(int argc, const char *argv[]) { + test_filter_search(); + test_filter_parse_search_json(); test_parse_filter_json(); test_filter_eq(); test_filter_is_subset();