nip50: add support for search field in filters - nostrdb - an unfairly fast embedded nostr database backed by lmdb

commit 6a7f7929af8f1805c2c808c21b8b7ad4af4e6a3d
parent 9a3b613bd44e9d6d7c0ab592934b2dd6ba904e05
Author: William Casarin <jb55@jb55.com>
Date:   Mon, 13 Jan 2025 11:29:37 -0800

nip50: add support for search field in filters

We will be using this for our nip50 search support

Diffstat:
M src/nostrdb.c  | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
M src/nostrdb.h  | 1 +
M test.c  | 48 ++++++++++++++++++++++++++++++++++++++++++++++++

3 files changed, 123 insertions(+), 1 deletion(-)
diff --git a/src/nostrdb.c b/src/nostrdb.c
@@ -742,6 +742,7 @@ static const char *ndb_filter_field_name(enum ndb_filter_fieldtype field)
 	case NDB_FILTER_SINCE: return "since";
 	case NDB_FILTER_UNTIL: return "until";
 	case NDB_FILTER_LIMIT: return "limit";
+	case NDB_FILTER_SEARCH: return "search";
 	}
 
 	return "unknown";
@@ -825,6 +826,15 @@ static int ndb_filter_add_element(struct ndb_filter *filter, union ndb_filter_el
 			return 0;
 		offset = el.integer;
 		break;
+	case NDB_FILTER_SEARCH:
+		if (current->field.elem_type != NDB_ELEMENT_STRING) {
+			return 0;
+		}
+		if (!cursor_push(&filter->data_buf, (unsigned char *)el.string.string, el.string.len))
+			return 0;
+		if (!cursor_push_byte(&filter->data_buf, 0))
+			return 0;
+		break;
 	case NDB_FILTER_TAGS:
 		switch (current->field.elem_type) {
 		case NDB_ELEMENT_ID:
@@ -887,7 +897,7 @@ int ndb_filter_add_str_element_len(struct ndb_filter *filter, const char *str, i
 	if (!(current = ndb_filter_current_element(filter)))
 		return 0;
 
-	// only generic queries are allowed to have strings
+	// only generic tags and search queries are allowed to have strings
 	switch (current->field.type) {
 	case NDB_FILTER_SINCE:
 	case NDB_FILTER_UNTIL:
@@ -896,6 +906,12 @@ int ndb_filter_add_str_element_len(struct ndb_filter *filter, const char *str, i
 	case NDB_FILTER_AUTHORS:
 	case NDB_FILTER_KINDS:
 		return 0;
+	case NDB_FILTER_SEARCH:
+		if (current->count == 1) {
+			// you can't add more than one string to a search
+			return 0;
+		}
+		break;
 	case NDB_FILTER_TAGS:
 		break;
 	}
@@ -925,6 +941,7 @@ int ndb_filter_add_int_element(struct ndb_filter *filter, uint64_t integer)
 	case NDB_FILTER_IDS:
 	case NDB_FILTER_AUTHORS:
 	case NDB_FILTER_TAGS:
+	case NDB_FILTER_SEARCH:
 		return 0;
 	case NDB_FILTER_KINDS:
 	case NDB_FILTER_SINCE:
@@ -955,6 +972,7 @@ int ndb_filter_add_id_element(struct ndb_filter *filter, const unsigned char *id
 	case NDB_FILTER_UNTIL:
 	case NDB_FILTER_LIMIT:
 	case NDB_FILTER_KINDS:
+	case NDB_FILTER_SEARCH:
 		return 0;
 	case NDB_FILTER_IDS:
 	case NDB_FILTER_AUTHORS:
@@ -1146,6 +1164,21 @@ static int ndb_filter_matches_with(struct ndb_filter *filter,
 			assert(els->count == 1);
 			if (note->created_at < els->elements[0])
 				continue;
+			break;
+		case NDB_FILTER_SEARCH:
+			// TODO: matching search filters will need an accelerated
+			// data structure, like our minimal perfect hashmap
+			// idea for mutewords.
+			//
+			// We'll also want to store tokenized words in the filter
+			// itself, so that we can walk over each word and check
+			// the hashmap to see if the note contains at least
+			// one word.
+			//
+			// For now we always return true, since we assume
+			// the search index will be walked for these kinds
+			// of queries.
+			continue;
 		case NDB_FILTER_LIMIT:
 cont:
 			continue;
@@ -1267,6 +1300,7 @@ void ndb_filter_end_field(struct ndb_filter *filter)
 	case NDB_FILTER_SINCE:
 	case NDB_FILTER_UNTIL:
 	case NDB_FILTER_LIMIT:
+	case NDB_FILTER_SEARCH:
 		// don't need to sort these
 		break;
 	}
@@ -5377,6 +5411,7 @@ static int cursor_push_json_elem_array(struct cursor *cur,
 
 int ndb_filter_json(const struct ndb_filter *filter, char *buf, int buflen)
 {
+	const char *str;
 	struct cursor cur, *c = &cur;
 	struct ndb_filter_elements *elems;
 	int i;
@@ -5400,6 +5435,14 @@ int ndb_filter_json(const struct ndb_filter *filter, char *buf, int buflen)
 			if (!cursor_push_json_elem_array(c, filter, elems))
 				return 0;
 			break;
+		case NDB_FILTER_SEARCH:
+			if (!cursor_push_str(c, "\"search\":"))
+				return 0;
+			if (!(str = ndb_filter_get_string_element(filter, elems, 0)))
+				return 0;
+			if (!cursor_push_jsonstr(c, str))
+				return 0;
+			break;
 		case NDB_FILTER_AUTHORS:
 			if (!cursor_push_str(c, "\"authors\":"))
 				return 0;
@@ -6021,6 +6064,8 @@ ndb_filter_parse_field(const char *tok, int len, char *tagchar)
 		return NDB_FILTER_UNTIL;
 	} else if (len == 5 && !strncmp(tok, "limit", 5)) {
 		return NDB_FILTER_LIMIT;
+	} else if (len == 6 && !strncmp(tok, "search", 6)) {
+		return NDB_FILTER_SEARCH;
 	}
 
 	return 0;
@@ -6131,6 +6176,28 @@ static int ndb_filter_parse_json_elems(struct ndb_json_parser *parser,
 	return 1;
 }
 
+static int ndb_filter_parse_json_str(struct ndb_json_parser *parser,
+				     struct ndb_filter *filter)
+{
+	jsmntok_t *tok;
+	const char *start;
+	int tok_len;
+
+	tok = &parser->toks[parser->i];
+	start = parser->json + tok->start;
+	tok_len = toksize(tok);
+
+	if (tok->type != JSMN_STRING)
+		return 0;
+
+	if (!ndb_filter_add_str_element_len(filter, start, tok_len))
+		return 0;
+
+	ndb_debug("added str elem '%.*s'\n", tok_len, start);
+
+	return 1;
+}
+
 static int ndb_filter_parse_json_int(struct ndb_json_parser *parser,
 				     struct ndb_filter *filter)
 {
@@ -6224,6 +6291,12 @@ static int ndb_filter_parse_json(struct ndb_json_parser *parser,
 				return 0;
 			}
 			break;
+		case NDB_FILTER_SEARCH:
+			if (!ndb_filter_parse_json_str(parser, filter)) {
+				ndb_debug("failed to parse filter search str\n");
+				return 0;
+			}
+			break;
 		case NDB_FILTER_SINCE:
 		case NDB_FILTER_UNTIL:
 		case NDB_FILTER_LIMIT:
diff --git a/src/nostrdb.h b/src/nostrdb.h
@@ -156,6 +156,7 @@ enum ndb_filter_fieldtype {
 	NDB_FILTER_SINCE   = 5,
 	NDB_FILTER_UNTIL   = 6,
 	NDB_FILTER_LIMIT   = 7,
+	NDB_FILTER_SEARCH  = 8,
 };
 #define NDB_NUM_FILTERS 7
 
diff --git a/test.c b/test.c
@@ -1733,7 +1733,55 @@ static void test_filter_is_subset() {
 	assert(ndb_filter_is_subset_of(k, ki) == 0);
 }
 
+static void test_filter_search()
+{
+	struct ndb_filter filter, *f = &filter;
+
+	assert(ndb_filter_init_with(f, 2));
+
+	assert(ndb_filter_start_field(f, NDB_FILTER_SEARCH));
+	assert(ndb_filter_add_str_element(f, "searchterm"));
+	assert(!ndb_filter_add_str_element(f, "searchterm 2"));
+	ndb_filter_end_field(f);
+
+	assert(ndb_filter_end(f));
+}
+
+static void test_filter_parse_search_json() {
+	const char *json = "{\"search\":\"abc\",\"limit\":1}";
+	unsigned char buf[1024];
+	int i;
+
+	struct ndb_filter filter, *f = &filter;
+	struct ndb_filter_elements *es;
+
+	ndb_filter_init_with(f, 2);
+	assert(ndb_filter_from_json(json, strlen(json), f, buf, sizeof(buf)));
+	assert(filter.finalized);
+
+	assert(f->num_elements == 2);
+	for (i = 0; i < f->num_elements; i++) {
+		es = ndb_filter_get_elements(f, i);
+		if (i == 0) {
+			assert(es->field.type == NDB_FILTER_SEARCH);
+			assert(es->count == 1);
+			assert(!strcmp(ndb_filter_get_string_element(f, es, 0), "abc"));
+		} else if (i == 1) {
+			assert(es->field.type == NDB_FILTER_LIMIT);
+			assert(es->count == 1);
+			assert(ndb_filter_get_int_element(es, 0) == 1);
+		}
+	}
+
+	// test back to json
+	assert(ndb_filter_json(f, (char *)buf, sizeof(buf)));
+	printf("search json: '%s'\n", (const char *)buf);
+	assert(!strcmp((const char*)buf, json));
+}
+
 int main(int argc, const char *argv[]) {
+	test_filter_search();
+	test_filter_parse_search_json();
 	test_parse_filter_json();
 	test_filter_eq();
 	test_filter_is_subset();

	nostrdb an unfairly fast embedded nostr database backed by lmdb
	git clone git://jb55.com/nostrdb
	Log \| Files \| Refs \| Submodules \| README \| LICENSE

M	src/nostrdb.c	\|	75	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
M	src/nostrdb.h	\|	1	+
M	test.c	\|	48	++++++++++++++++++++++++++++++++++++++++++++++++