remove all dynamic allocations - nostrdb - an unfairly fast embedded nostr database backed by lmdb

commit 74a1f816f0b3f385227b39d990a9472eaab11bb6
parent 14f17b6d4f70c4a7ae2cc3f130603da96ec5802f
Author: William Casarin <jb55@jb55.com>
Date:   Thu, 20 Jul 2023 12:08:18 -0700

remove all dynamic allocations

The storage buffer is now passed in from the caller. The size is
returned from finalize so the caller can resize if needed.

Diffstat:
M .gitignore  | 2 ++
M Makefile  | 2 +-
M nostrdb.c  | 142 +++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------
M nostrdb.h  | 9 +++++----
M test.c  | 31 ++++++++++++++++++++-----------

5 files changed, 123 insertions(+), 63 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,5 @@ tags
 test
 .buildcmd
 .build-result
+.envrc
+shell.nix
diff --git a/Makefile b/Makefile
@@ -1,4 +1,4 @@
-CFLAGS = -Wall -Wno-unused-function -Werror -O3 -DJSMN_PARENT_LINKS
+CFLAGS = -Wall -Wno-unused-function -Werror -O1 -g -DJSMN_PARENT_LINKS -DJSMN_STRICT
 
 check: test
 	./test
diff --git a/nostrdb.c b/nostrdb.c
@@ -6,8 +6,13 @@
 #include <stdlib.h>
 
 struct ndb_json_parser {
+	const char *json;
+	int json_len;
 	struct ndb_builder builder;
-	struct cursor buf;
+	jsmn_parser json_parser;
+	jsmntok_t *toks, *toks_end;
+	int num_tokens;
+	struct cursor mem;
 };
 
 static inline int
@@ -16,26 +21,32 @@ cursor_push_tag(struct cursor *cur, struct ndb_tag *tag) {
 }
 
 int
-ndb_builder_new(struct ndb_builder *builder, int *bufsize) {
+ndb_builder_new(struct ndb_builder *builder, unsigned char *buf, int bufsize) {
 	struct ndb_note *note;
 	struct cursor mem;
-	// 1MB + 0.5MB
-	builder->size = 1024 * 1024;
-	if (bufsize)
-		builder->size = *bufsize;
+	int half, size, str_indices_size;
+
+	// come on bruh
+	if (bufsize < sizeof(struct ndb_note) * 2)
+		return 0;
 
-	int str_indices_size = builder->size / 32;
-	unsigned char *bytes = malloc(builder->size + str_indices_size);
-	if (!bytes) return 0;
+	str_indices_size = bufsize / 32;
+	size = bufsize - str_indices_size;
+	half = size / 2;
 
-	make_cursor(bytes, bytes + builder->size + str_indices_size, &mem);
+	//debug("size %d half %d str_indices %d\n", size, half, str_indices_size);
 
-	note = builder->note = (struct ndb_note *)bytes;
-	size_t half = builder->size >> 1;
+	// make a safe cursor of our available memory
+	make_cursor(buf, buf + bufsize, &mem);
 
-	if (!cursor_slice(&mem, &builder->note_cur, half)) return 0;
-	if (!cursor_slice(&mem, &builder->strings, half)) return 0;
-	if (!cursor_slice(&mem, &builder->str_indices, str_indices_size)) return 0;
+	note = builder->note = (struct ndb_note *)buf;
+
+	// take slices of the memory into subcursors
+	if (!(cursor_slice(&mem, &builder->note_cur, half) &&
+	      cursor_slice(&mem, &builder->strings, half) &&
+	      cursor_slice(&mem, &builder->str_indices, str_indices_size))) {
+		return 0;
+	}
 
 	memset(note, 0, sizeof(*note));
 	builder->note_cur.p += sizeof(*note);
@@ -45,6 +56,40 @@ ndb_builder_new(struct ndb_builder *builder, int *bufsize) {
 	return 1;
 }
 
+static inline int
+ndb_json_parser_init(struct ndb_json_parser *p, const char *json, int json_len, unsigned char *buf, int bufsize) {
+	int half = bufsize / 2;
+
+	p->toks = (jsmntok_t*)buf + half;
+	p->toks_end = (jsmntok_t*)buf + bufsize;
+	p->num_tokens = 0;
+	p->json = json;
+	p->json_len = json_len;
+
+	// ndb_builder gets the first half of the buffer, and jsmn gets the
+	// second half. I like this way of alloating memory (without actually
+	// dynamically allocating memory). You get one big chunk upfront and
+	// then submodules can recursively subdivide it. Maybe you could do
+	// something even more clever like golden-ratio style subdivision where
+	// the more important stuff gets a larger chunk and then it spirals
+	// downward into smaller chunks. Thanks for coming to my TED talk.
+	if (!ndb_builder_new(&p->builder, buf, half))
+		return 0;
+
+	jsmn_init(&p->json_parser);
+
+	return 1;
+}
+
+static inline int
+ndb_json_parser_parse(struct ndb_json_parser *p) {
+	int cap = (p->toks_end - p->toks)/sizeof(*p->toks);
+	p->num_tokens =
+		jsmn_parse(&p->json_parser, p->json, p->json_len, p->toks, cap);
+
+	return p->num_tokens;
+}
+
 int
 ndb_builder_finalize(struct ndb_builder *builder, struct ndb_note **note) {
 	int strings_len = builder->strings.p - builder->strings.start;
@@ -57,8 +102,10 @@ ndb_builder_finalize(struct ndb_builder *builder, struct ndb_note **note) {
 	// set the strings location
 	builder->note->strings = builder->note_cur.p - builder->note_cur.start;
 
-	// remove any extra data
-	*note = realloc(builder->note_cur.start, total_size);
+	// record the total size
+	//builder->note->size = total_size;
+
+	*note = builder->note;
 
 	return total_size;
 }
@@ -153,71 +200,72 @@ ndb_builder_process_json_tags(const char *json, jsmntok_t *array, struct ndb_bui
 
 
 int
-ndb_note_from_json(const char *json, int len, struct ndb_note **note) {
-	jsmntok_t toks[4096], *tok = NULL;
-	unsigned char buf[64];
-	struct ndb_builder builder;
-	jsmn_parser p;
+ndb_note_from_json(const char *json, int len, struct ndb_note **note,
+		   unsigned char *buf, int bufsize)
+{
+	jsmntok_t *tok = NULL;
+	unsigned char hexbuf[64];
 
-	int i, r, tok_len;
+	int i, tok_len;
 	const char *start;
+	struct ndb_json_parser parser;
 
-	jsmn_init(&p);
-	ndb_builder_new(&builder, &len);
+	ndb_json_parser_init(&parser, json, len, buf, bufsize);
 
-	r = jsmn_parse(&p, json, len, toks, sizeof(toks)/sizeof(toks[0]));
+	if (ndb_json_parser_parse(&parser) < 0)
+		return 0;
 
-	if (r < 0) return 0;
-	if (r < 1 || toks[0].type != JSMN_OBJECT) return 0;
+	if (parser.num_tokens < 1 || parser.toks[0].type != JSMN_OBJECT)
+		return 0;
 
-	for (i = 1; i < r; i++) {
-		tok = &toks[i];
+	for (i = 1; i < parser.num_tokens; i++) {
+		tok = &parser.toks[i];
 		start = json + tok->start;
 		tok_len = toksize(tok);
 
 		//printf("toplevel %.*s %d\n", tok_len, json + tok->start, tok->type);
-		if (tok_len == 0 || i + 1 >= r)
+		if (tok_len == 0 || i + 1 >= parser.num_tokens)
 			continue;
 
 		if (start[0] == 'p' && jsoneq(json, tok, tok_len, "pubkey")) {
 			// pubkey
-			tok = &toks[i+1];
-			hex_decode(json + tok->start, toksize(tok), buf, sizeof(buf));
-			ndb_builder_set_pubkey(&builder, buf);
+			tok = &parser.toks[i+1];
+			hex_decode(json + tok->start, toksize(tok), hexbuf, sizeof(hexbuf));
+			ndb_builder_set_pubkey(&parser.builder, hexbuf);
 		} else if (tok_len == 2 && start[0] == 'i' && start[1] == 'd') {
 			// id
-			tok = &toks[i+1];
-			hex_decode(json + tok->start, toksize(tok), buf, sizeof(buf));
+			tok = &parser.toks[i+1];
+			hex_decode(json + tok->start, toksize(tok), hexbuf, sizeof(hexbuf));
 			// TODO: validate id
-			ndb_builder_set_id(&builder, buf);
+			ndb_builder_set_id(&parser.builder, hexbuf);
 		} else if (tok_len == 3 && start[0] == 's' && start[1] == 'i' && start[2] == 'g') {
 			// sig
-			tok = &toks[i+1];
-			hex_decode(json + tok->start, toksize(tok), buf, sizeof(buf));
-			ndb_builder_set_signature(&builder, buf);
+			tok = &parser.toks[i+1];
+			hex_decode(json + tok->start, toksize(tok), hexbuf, sizeof(hexbuf));
+			ndb_builder_set_signature(&parser.builder, hexbuf);
 		} else if (start[0] == 'k' && jsoneq(json, tok, tok_len, "kind")) {
 			// kind
-			tok = &toks[i+1];
+			tok = &parser.toks[i+1];
 			printf("json_kind %.*s\n", toksize(tok), json + tok->start);
 		} else if (start[0] == 'c') {
 			if (jsoneq(json, tok, tok_len, "created_at")) {
 				// created_at
-				tok = &toks[i+1];
+				tok = &parser.toks[i+1];
 				printf("json_created_at %.*s\n", toksize(tok), json + tok->start);
 			} else if (jsoneq(json, tok, tok_len, "content")) {
 				// content
-				tok = &toks[i+1];
-				if (!ndb_builder_set_content(&builder, json + tok->start, toksize(tok)))
+				tok = &parser.toks[i+1];
+				if (!ndb_builder_set_content(&parser.builder, json + tok->start, toksize(tok)))
 					return 0;
 			}
 		} else if (start[0] == 't' && jsoneq(json, tok, tok_len, "tags")) {
-			tok = &toks[i+1];
-			ndb_builder_process_json_tags(json, tok, &builder);
+			tok = &parser.toks[i+1];
+			ndb_builder_process_json_tags(json, tok, &parser.builder);
 			i += tok->size;
 		}
 	}
 
-	return ndb_builder_finalize(&builder, note);
+	return ndb_builder_finalize(&parser.builder, note);
 }
 
 void
diff --git a/nostrdb.h b/nostrdb.h
@@ -55,7 +55,6 @@ struct ndb_builder {
 	struct cursor strings;
 	struct cursor str_indices;
 	struct ndb_note *note;
-	int size;
 };
 
 struct ndb_iterator {
@@ -66,8 +65,9 @@ struct ndb_iterator {
 	int index;
 };
 
-int ndb_note_from_json(const char *json, int len, struct ndb_note **);
-int ndb_builder_new(struct ndb_builder *builder, int *bufsize);
+// HI BUILDER
+int ndb_note_from_json(const char *json, int len, struct ndb_note **, unsigned char *buf, int buflen);
+int ndb_builder_new(struct ndb_builder *builder, unsigned char *buf, int bufsize);
 int ndb_builder_finalize(struct ndb_builder *builder, struct ndb_note **note);
 int ndb_builder_set_content(struct ndb_builder *builder, const char *content, int len);
 void ndb_builder_set_signature(struct ndb_builder *builder, unsigned char *signature);
@@ -75,6 +75,7 @@ void ndb_builder_set_pubkey(struct ndb_builder *builder, unsigned char *pubkey);
 void ndb_builder_set_id(struct ndb_builder *builder, unsigned char *id);
 void ndb_builder_set_kind(struct ndb_builder *builder, uint32_t kind);
 int ndb_builder_add_tag(struct ndb_builder *builder, const char **strs, uint16_t num_strs);
+// BYE BUILDER
 
 static inline int
 ndb_str_is_packed(union packed_str str) {
@@ -164,7 +165,7 @@ ndb_tags_iterate_start(struct ndb_note *note, struct ndb_iterator *iter) {
 	iter->tag = note->tags.tag;
 	iter->index = 0;
 
-	return iter->tag->count != 0;
+	return note->tags.count != 0 && iter->tag->count != 0;
 }
 
 static inline int
diff --git a/test.c b/test.c
@@ -8,6 +8,7 @@
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
 
 static void test_basic_event() {
+	unsigned char buf[512];
 	struct ndb_builder builder, *b = &builder;
 	struct ndb_note *note;
 	int ok;
@@ -26,20 +27,23 @@ static void test_basic_event() {
 	const char *tag[] = { "p", hex_pk };
 	const char *word_tag[] = { "word", "words", "w" };
 
-	ndb_builder_new(b, 0);
+	ok = ndb_builder_new(b, buf, sizeof(buf));
+	assert(ok);
 	note = builder.note;
 
 	memset(note->padding, 3, sizeof(note->padding));
 
 	const char *content = "hello, world!";
-	ndb_builder_set_content(b, content, strlen(content));
-	ndb_builder_set_id(b, id);
-	ndb_builder_set_pubkey(b, pubkey);
-	ndb_builder_set_signature(b, sig);
-	ndb_builder_add_tag(b, tag, ARRAY_SIZE(tag));
-	ndb_builder_add_tag(b, word_tag, ARRAY_SIZE(word_tag));
 
-	ndb_builder_finalize(b, &note);
+	ok = ndb_builder_set_content(b, content, strlen(content)); assert(ok);
+	ndb_builder_set_id(b, id); assert(ok);
+	ndb_builder_set_pubkey(b, pubkey); assert(ok);
+	ndb_builder_set_signature(b, sig); assert(ok);
+	ok = ndb_builder_add_tag(b, tag, ARRAY_SIZE(tag)); assert(ok);
+	ok = ndb_builder_add_tag(b, word_tag, ARRAY_SIZE(word_tag)); assert(ok);
+
+	ok = ndb_builder_finalize(b, &note);
+	assert(ok);
 
 	assert(note->tags.count == 2);
 
@@ -72,9 +76,13 @@ static void test_empty_tags() {
 	struct ndb_iterator iter, *it = &iter;
 	struct ndb_note *note;
 	int ok;
+	unsigned char buf[1024];
 
-	ndb_builder_new(b, 0);
-	ndb_builder_finalize(b, &note);
+	ok = ndb_builder_new(b, buf, sizeof(buf));
+	assert(ok);
+
+	ok = ndb_builder_finalize(b, &note);
+	assert(ok);
 
 	assert(note->tags.count == 0);
 
@@ -85,13 +93,14 @@ static void test_empty_tags() {
 
 static void test_parse_json() {
 	char hex_id[65] = {0};
+	unsigned char buffer[1024];
 	struct ndb_note *note;
 #define HEX_ID "5004a081e397c6da9dc2f2d6b3134006a9d0e8c1b46689d9fe150bb2f21a204d"
 #define HEX_PK "b169f596968917a1abeb4234d3cf3aa9baee2112e58998d17c6db416ad33fe40"
 	static const char *json = 
 		"{\"id\": \"" HEX_ID "\",\"pubkey\": \"" HEX_PK "\",\"created_at\": 1689836342,\"kind\": 1,\"tags\": [[\"p\",\"" HEX_ID "\"], [\"word\", \"words\"]],\"content\": \"共通語\",\"sig\": \"e4d528651311d567f461d7be916c37cbf2b4d530e672f29f15f353291ed6df60c665928e67d2f18861c5ca88\"}";
 
-	ndb_note_from_json(json, strlen(json), &note);
+	ndb_note_from_json(json, strlen(json), &note, buffer, sizeof(buffer));
 
 	const char *content = ndb_note_content(note);
 	unsigned char *id = ndb_note_id(note);

	nostrdb an unfairly fast embedded nostr database backed by lmdb
	git clone git://jb55.com/nostrdb
	Log \| Files \| Refs \| README \| LICENSE

M	.gitignore	\|	2	++
M	Makefile	\|	2	+-
M	nostrdb.c	\|	142	+++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------
M	nostrdb.h	\|	9	+++++----
M	test.c	\|	31	++++++++++++++++++++-----------