nostrdb

an unfairly fast embedded nostr database backed by lmdb
git clone git://jb55.com/nostrdb
Log | Files | Refs | Submodules | README | LICENSE

commit a3915f898309cf07c5188347ae2492674bc70d46
parent 8eb0af2f554b700c93f3a5ffb649f862862f60a3
Author: William Casarin <jb55@jb55.com>
Date:   Fri,  1 Dec 2023 17:04:01 -0800

search: allow searching from newest-to-oldest and oldest-to-newest

Diffstat:
MREADME.md | 4++--
Mndb.c | 24++++++++++++++++++++----
Mnostrdb.c | 89++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------
Mnostrdb.h | 15++++++++++++++-
4 files changed, 106 insertions(+), 26 deletions(-)

diff --git a/README.md b/README.md @@ -38,7 +38,7 @@ usage: ndb [--skip-verification] [-d db_dir] <command> commands stat - search <fulltext query> + search [--oldest-first] [--limit 42] <fulltext query> import <line-delimited json file> settings @@ -60,7 +60,7 @@ nostrdb supports fulltext queries. You can import some test events like so: ``` $ make testdata/many-events.json $ ndb --skip-verification import testdata/many-events.json -$ ndb search 'nosy ostrich' +$ ndb search --limit 2 --oldest-first 'nosy ostrich' [01] K<'ostrich' 7 1671217526 note_id:253309> Q: What do you call a nosy ostrich? diff --git a/ndb.c b/ndb.c @@ -2,6 +2,7 @@ #include "nostrdb.h" #include <stdio.h> +#include <stdlib.h> #include <sys/stat.h> #include <sys/mman.h> #include <unistd.h> @@ -12,7 +13,7 @@ static int usage() printf("usage: ndb [--skip-verification] [-d db_dir] <command>\n\n"); printf("commands\n\n"); printf(" stat\n"); - printf(" search <fulltext query>\n"); + printf(" search [--oldest-first] [--limit 42] <fulltext query>\n"); printf(" import <line-delimited json file>\n\n"); printf("settings\n\n"); printf(" --skip-verification skip signature validation\n"); @@ -113,7 +114,7 @@ static void ndb_print_text_search_result(struct ndb_txn *txn, int main(int argc, char *argv[]) { struct ndb *ndb; - int i, flags; + int i, flags, limit; struct ndb_stat stat; struct ndb_txn txn; struct ndb_text_search_results results; @@ -122,7 +123,9 @@ int main(int argc, char *argv[]) unsigned char *data; size_t data_len; struct ndb_config config; + struct ndb_text_search_config search_config; ndb_default_config(&config); + ndb_default_text_search_config(&search_config); ndb_config_set_mapsize(&config, 1024ULL * 1024ULL * 1024ULL * 1024ULL /* 1 TiB */); if (argc < 2) { @@ -152,9 +155,22 @@ int main(int argc, char *argv[]) return 2; } - if (argc == 3 && !strcmp(argv[1], "search")) { + if (argc >= 3 && !strcmp(argv[1], "search")) { + for (i = 0; i < 2; i++) { + if (!strcmp(argv[2], "--oldest-first")) { + ndb_text_search_config_set_order(&search_config, NDB_ORDER_ASCENDING); + argv++; + argc--; + } else if (!strcmp(argv[2], "--limit")) { + limit = atoi(argv[3]); + ndb_text_search_config_set_limit(&search_config, limit); + argv += 2; + argc -= 2; + } + } + ndb_begin_query(ndb, &txn); - ndb_text_search(&txn, argv[2], &results, 999); + ndb_text_search(&txn, argv[2], &results, &search_config); // print results for now for (i = 0; i < results.num_results; i++) { diff --git a/nostrdb.c b/nostrdb.c @@ -177,7 +177,7 @@ static int ndb_make_text_search_key(unsigned char *buf, int bufsize, // TODO: need update this to uint64_t // we push this first because our query function can pull this off // quicky to check matches - if (!push_varint(&cur, (int)note_id)) + if (!push_varint(&cur, (int32_t)note_id)) return 0; // string length @@ -238,12 +238,14 @@ static int ndb_make_text_search_key_high(unsigned char *buf, int bufsize, int *keysize) { uint64_t timestamp, note_id; - timestamp = UINT64_MAX; - note_id = UINT64_MAX; + timestamp = INT32_MAX; + note_id = INT32_MAX; return ndb_make_text_search_key(buf, bufsize, 0, wordlen, word, timestamp, note_id, keysize); } +typedef int (*ndb_text_search_key_order_fn)(unsigned char *buf, int bufsize, int wordlen, const char *word, int *keysize); + /** From LMDB: Compare two items lexically */ static int mdb_cmp_memn(const MDB_val *a, const MDB_val *b) { int diff; @@ -2469,7 +2471,8 @@ static int prefix_count(const char *str1, int len1, const char *str2, int len2) static int ndb_text_search_next_word(MDB_cursor *cursor, MDB_cursor_op op, MDB_val *k, struct ndb_word *search_word, struct ndb_text_search_result *last_result, - struct ndb_text_search_result *result) + struct ndb_text_search_result *result, + MDB_cursor_op order_op) { struct cursor key_cursor; MDB_val v; @@ -2481,8 +2484,18 @@ static int ndb_text_search_next_word(MDB_cursor *cursor, MDB_cursor_op op, // key. // // Subsequent searches should use MDB_NEXT - if (mdb_cursor_get(cursor, k, &v, op)) - return 0; + if (mdb_cursor_get(cursor, k, &v, op)) { + // we should only do this if we're going in reverse + if (op == MDB_SET_RANGE && order_op == MDB_PREV) { + // if set range worked and our key exists, it should be + // the one right before this one + if (mdb_cursor_get(cursor, k, &v, MDB_PREV)) + return 0; + } else { + return 0; + } + } + make_cursor(k->mv_data, k->mv_data + k->mv_size, &key_cursor); @@ -2566,28 +2579,68 @@ static void ndb_text_search_results_init( results->num_results = 0; } +static void ndb_print_text_search_key(struct ndb_text_search_key *key) +{ + printf("K<'%.*s' %d %" PRIu64 " note_id:%" PRIu64 ">", key->str_len, key->str, + key->word_index, + key->timestamp, + key->note_id); +} + +void ndb_default_text_search_config(struct ndb_text_search_config *cfg) +{ + cfg->order = NDB_ORDER_DESCENDING; + cfg->limit = MAX_TEXT_SEARCH_RESULTS; +} + +void ndb_text_search_config_set_order(struct ndb_text_search_config *cfg, + enum ndb_search_order order) +{ + cfg->order = order; +} + +void ndb_text_search_config_set_limit(struct ndb_text_search_config *cfg, int limit) +{ + cfg->limit = limit; +} + int ndb_text_search(struct ndb_txn *txn, const char *query, - struct ndb_text_search_results *results, int limit) + struct ndb_text_search_results *results, + struct ndb_text_search_config *config) { unsigned char buffer[1024], *buf; unsigned char saved_buf[1024], *saved; struct ndb_text_search_result *result, *last_result; struct ndb_text_search_result candidate, last_candidate; struct ndb_search_words search_words; + //struct ndb_text_search_key search_key; struct ndb_word *search_word; struct cursor cur; + ndb_text_search_key_order_fn key_order_fn; MDB_dbi text_db; MDB_cursor *cursor; MDB_val k, v; - int i, j, keysize, saved_size; - MDB_cursor_op op; + int i, j, keysize, saved_size, limit; + MDB_cursor_op op, order_op; //int num_note_ids; saved = NULL; ndb_text_search_results_init(results); ndb_search_words_init(&search_words); + + // search config + limit = MAX_TEXT_SEARCH_RESULTS; + order_op = MDB_PREV; + key_order_fn = ndb_make_text_search_key_high; + if (config) { + if (config->order == NDB_ORDER_ASCENDING) { + order_op = MDB_NEXT; + key_order_fn = ndb_make_text_search_key_low; + } + limit = min(limit, config->limit); + } + // end search config - //num_note_ids = 0; text_db = txn->lmdb->dbs[NDB_DB_NOTE_TEXT]; make_cursor((unsigned char *)query, (unsigned char *)query + strlen(query), &cur); @@ -2600,8 +2653,6 @@ int ndb_text_search(struct ndb_txn *txn, const char *query, return 0; } - limit = min(MAX_TEXT_SEARCH_RESULTS, limit); - // for each word, we recursively find all of the submatches while (results->num_results < limit) { last_result = NULL; @@ -2619,18 +2670,17 @@ int ndb_text_search(struct ndb_txn *txn, const char *query, // reposition the cursor so we can continue if (mdb_cursor_get(cursor, &k, &v, MDB_SET_RANGE)) - break; + return 0; - op = MDB_NEXT; + op = order_op; } else { // construct a packed fulltext search key using this // word this key doesn't contain any timestamp or index // info, so it should range match instead of exact // match - if (!ndb_make_text_search_key_low( - buffer, sizeof(buffer), - search_words.words[0].word_len, - search_words.words[0].word, &keysize)) + if (!key_order_fn(buffer, sizeof(buffer), + search_words.words[0].word_len, + search_words.words[0].word, &keysize)) { // word is too big to fit in 1024-sized key continue; @@ -2677,7 +2727,8 @@ int ndb_text_search(struct ndb_txn *txn, const char *query, if (!ndb_text_search_next_word(cursor, op, &k, search_word, last_result, - &candidate)) { + &candidate, + order_op)) { break; } diff --git a/nostrdb.h b/nostrdb.h @@ -277,6 +277,11 @@ enum ndb_generic_element_type { NDB_ELEMENT_ID = 2, }; +enum ndb_search_order { + NDB_ORDER_DESCENDING, + NDB_ORDER_ASCENDING, +}; + union ndb_filter_element { const char *string; const unsigned char *id; @@ -311,6 +316,11 @@ struct ndb_config { ndb_ingest_filter_fn ingest_filter; }; +struct ndb_text_search_config { + enum ndb_search_order order; + int limit; +}; + // CONFIG void ndb_default_config(struct ndb_config *); void ndb_config_set_ingest_threads(struct ndb_config *config, int threads); @@ -377,7 +387,10 @@ void ndb_filter_end_field(struct ndb_filter *); void ndb_filter_free(struct ndb_filter *filter); // FULLTEXT SEARCH -int ndb_text_search(struct ndb_txn *txn, const char *query, struct ndb_text_search_results *, int limit); +int ndb_text_search(struct ndb_txn *txn, const char *query, struct ndb_text_search_results *, struct ndb_text_search_config *); +void ndb_default_text_search_config(struct ndb_text_search_config *); +void ndb_text_search_config_set_order(struct ndb_text_search_config *, enum ndb_search_order); +void ndb_text_search_config_set_limit(struct ndb_text_search_config *, int limit); // stats int ndb_stat(struct ndb *ndb, struct ndb_stat *stat);