nostrdb

an unfairly fast embedded nostr database backed by lmdb
git clone git://jb55.com/nostrdb
Log | Files | Refs | Submodules | README | LICENSE

commit 21e060ae740d4d27c854a30a79930c569c492ca2
parent e68ea9594346104531583be690c8599e7936c00a
Author: William Casarin <jb55@jb55.com>
Date:   Thu, 10 Aug 2023 10:21:51 -0700

add optimized memchr function for processing newlines

I will need this for processing many events at once

Diffstat:
MMakefile | 2+-
Amemchr.h | 84+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mnostrdb.c | 1+
Mtest.c | 33+++++++++++++++++++++++++++++++++
4 files changed, 119 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile @@ -1,5 +1,5 @@ CFLAGS = -Wall -Wno-unused-function -Werror -O2 -g -Ideps/secp256k1/include -Ideps/lmdb -HEADERS = sha256.h nostrdb.h cursor.h hex.h jsmn.h config.h sha256.h random.h +HEADERS = sha256.h nostrdb.h cursor.h hex.h jsmn.h config.h sha256.h random.h memchr.h SRCS = nostrdb.c sha256.c LDS = $(SRCS) $(ARS) ARS = deps/lmdb/liblmdb.a deps/secp256k1/.libs/libsecp256k1.a diff --git a/memchr.h b/memchr.h @@ -0,0 +1,84 @@ + + +#ifndef FAST_MEMCHR_H +#define FAST_MEMCHR_H + +#include <arm_neon.h> +#include <string.h> + +#ifdef __ARM_NEON +#define vector_strchr neon_strchr +#else +#define vector_strchr native_memchr +#endif + +#ifdef __ARM_NEON +static const char *neon_strchr(const char *str, char c, size_t length) { + const char* end = str + length; + + // Alignment handling + while (str < end && ((size_t)str & 0xF)) { + if (*str == c) + return str; + ++str; + } + + uint8x16_t searchChar = vdupq_n_u8(c); + + while (str + 16 <= end) { + uint8x16_t chunk = vld1q_u8((const uint8_t*)str); + uint8x16_t comparison = vceqq_u8(chunk, searchChar); + + // Check first 64 bits + uint64_t result0 = + vgetq_lane_u64(vreinterpretq_u64_u8(comparison), 0); + + if (result0) { + /* + printf("Got Result0: %016llx @ str'%s', bits %d\n", + result0, str, __builtin_ctzll(result0)/8); + */ + return str + __builtin_ctzll(result0)/8; + } + + // Check second 64 bits + uint64_t result1 = vgetq_lane_u64(vreinterpretq_u64_u8(comparison), 1); + if (result1) { + /* + int bits = __builtin_ctzll(result1); + int bit_index = bits / 8; + printf("Got Result1: %016llx @ str'%s', str+8='%s', str+8+bind='%s' bits=%d bit_index=%d\n", + result1, str, str + 8, str + 8 + bit_index, bits, bit_index); + */ + return str + 8 + __builtin_ctzll(result1)/8; + } + + str += 16; + } + + // Handle remaining unaligned characters + for (; str < end; ++str) { + if (*str == c) + return str; + } + + return NULL; +} +#endif + +static inline const char *native_memchr(const char *str, char c, size_t length) { + const void *result = memchr(str, c, length); + return (const char *) result; +} + +static inline const char *fast_strchr(const char *str, char c, size_t length) +{ + if (length >= 16) { + return vector_strchr(str, c, length); + } + + return native_memchr(str, c, length); +} + + +#endif // FAST_MEMCHR_H diff --git a/nostrdb.c b/nostrdb.c @@ -9,6 +9,7 @@ #include "util.h" #include "threadpool.h" #include "protected_queue.h" +#include "memchr.h" #include <stdlib.h> #include <limits.h> #include <assert.h> diff --git a/test.c b/test.c @@ -3,6 +3,7 @@ #include "hex.h" #include "io.h" #include "protected_queue.h" +#include "memchr.h" #include <stdio.h> #include <assert.h> @@ -460,6 +461,36 @@ static void test_queue_boundary_conditions() { assert(old_count == q.count); } +static void test_fast_strchr() +{ + // Test 1: Basic test + const char *testStr1 = "Hello, World!"; + assert(fast_strchr(testStr1, 'W', strlen(testStr1)) == testStr1 + 7); + + // Test 2: Character not present in the string + assert(fast_strchr(testStr1, 'X', strlen(testStr1)) == NULL); + + // Test 3: Multiple occurrences of the character + const char *testStr2 = "Multiple occurrences."; + assert(fast_strchr(testStr2, 'u', strlen(testStr2)) == testStr2 + 1); + + // Test 4: Check with an empty string + const char *testStr3 = ""; + assert(fast_strchr(testStr3, 'a', strlen(testStr3)) == NULL); + + // Test 5: Check with a one-character string + const char *testStr4 = "a"; + assert(fast_strchr(testStr4, 'a', strlen(testStr4)) == testStr4); + + // Test 6: Check the last character in the string + const char *testStr5 = "Last character check"; + assert(fast_strchr(testStr5, 'k', strlen(testStr5)) == testStr5 + 19); + + // Test 7: Large string test (>16 bytes) + char *testStr6 = "This is a test for large strings with more than 16 bytes."; + assert(fast_strchr(testStr6, 'm', strlen(testStr6)) == testStr6 + 38); +} + int main(int argc, const char *argv[]) { test_basic_event(); test_empty_tags(); @@ -477,6 +508,8 @@ int main(int argc, const char *argv[]) { test_queue_thread_safety(); test_queue_boundary_conditions(); + // memchr stuff + test_fast_strchr(); printf("All tests passed!\n"); // Print this if all tests pass. }