commit 6e9b1aa0f848292eb5a52b41bad48b0478ac0c07
parent 8813d9ebec3f6f8f0e74b960bae248d1e6b20aaf
Author: William Casarin <jb55@jb55.com>
Date: Tue, 28 Nov 2023 10:51:12 -0800
search: make search case insensitive
Diffstat:
3 files changed, 27 insertions(+), 3 deletions(-)
diff --git a/cursor.h b/cursor.h
@@ -50,6 +50,20 @@ static inline int cursor_push(struct cursor *cursor, unsigned char *data, int le
return 1;
}
+static inline int cursor_push_lowercase(struct cursor *cur, const char *str, int len)
+{
+ int i;
+
+ if (unlikely(cur->p + len >= cur->end))
+ return 0;
+
+ for (i = 0; i < len; i++)
+ cur->p[i] = tolower(str[i]);
+
+ cur->p += len;
+ return 1;
+}
+
static inline int cursor_push_str(struct cursor *cursor, const char *str)
{
return cursor_push(cursor, (unsigned char*)str, (int)strlen(str));
diff --git a/nostrdb.c b/nostrdb.c
@@ -164,8 +164,8 @@ static int ndb_make_text_search_key(unsigned char *buf, int bufsize,
if (!push_varint(&cur, word_len))
return 0;
- // non-null terminated string
- if (!cursor_push(&cur, (unsigned char*)str, word_len))
+ // non-null terminated, lowercase string
+ if (!cursor_push_lowercase(&cur, str, word_len))
return 0;
// the index of the word in the content so that we can do more accurate
diff --git a/test.c b/test.c
@@ -948,16 +948,26 @@ static void test_fulltext()
size_t mapsize;
int ingester_threads;
struct ndb_txn txn;
+ int written;
+ static const int alloc_size = 2 << 18;
+ char *json = malloc(alloc_size);
mapsize = 1024 * 1024 * 100;
ingester_threads = 1;
assert(ndb_init(&ndb, test_dir, mapsize, ingester_threads, 0));
+ read_file("testdata/search.json", (unsigned char*)json, alloc_size, &written);
+ assert(ndb_process_client_events(ndb, json, written));
+ ndb_destroy(ndb);
+ assert(ndb_init(&ndb, test_dir, mapsize, ingester_threads, 0));
+
ndb_begin_query(ndb, &txn);
- ndb_text_search(&txn, "bits after");
+ ndb_text_search(&txn, "Jumped Over");
ndb_end_query(&txn);
ndb_destroy(ndb);
+
+ free(json);
}
int main(int argc, const char *argv[]) {