nostrdb

an unfairly fast embedded nostr database backed by lmdb
git clone git://jb55.com/nostrdb
Log | Files | Refs | README | LICENSE

commit 74a1f816f0b3f385227b39d990a9472eaab11bb6
parent 14f17b6d4f70c4a7ae2cc3f130603da96ec5802f
Author: William Casarin <jb55@jb55.com>
Date:   Thu, 20 Jul 2023 12:08:18 -0700

remove all dynamic allocations

The storage buffer is now passed in from the caller. The size is
returned from finalize so the caller can resize if needed.

Diffstat:
M.gitignore | 2++
MMakefile | 2+-
Mnostrdb.c | 142+++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------
Mnostrdb.h | 9+++++----
Mtest.c | 31++++++++++++++++++++-----------
5 files changed, 123 insertions(+), 63 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -2,3 +2,5 @@ tags test .buildcmd .build-result +.envrc +shell.nix diff --git a/Makefile b/Makefile @@ -1,4 +1,4 @@ -CFLAGS = -Wall -Wno-unused-function -Werror -O3 -DJSMN_PARENT_LINKS +CFLAGS = -Wall -Wno-unused-function -Werror -O1 -g -DJSMN_PARENT_LINKS -DJSMN_STRICT check: test ./test diff --git a/nostrdb.c b/nostrdb.c @@ -6,8 +6,13 @@ #include <stdlib.h> struct ndb_json_parser { + const char *json; + int json_len; struct ndb_builder builder; - struct cursor buf; + jsmn_parser json_parser; + jsmntok_t *toks, *toks_end; + int num_tokens; + struct cursor mem; }; static inline int @@ -16,26 +21,32 @@ cursor_push_tag(struct cursor *cur, struct ndb_tag *tag) { } int -ndb_builder_new(struct ndb_builder *builder, int *bufsize) { +ndb_builder_new(struct ndb_builder *builder, unsigned char *buf, int bufsize) { struct ndb_note *note; struct cursor mem; - // 1MB + 0.5MB - builder->size = 1024 * 1024; - if (bufsize) - builder->size = *bufsize; + int half, size, str_indices_size; + + // come on bruh + if (bufsize < sizeof(struct ndb_note) * 2) + return 0; - int str_indices_size = builder->size / 32; - unsigned char *bytes = malloc(builder->size + str_indices_size); - if (!bytes) return 0; + str_indices_size = bufsize / 32; + size = bufsize - str_indices_size; + half = size / 2; - make_cursor(bytes, bytes + builder->size + str_indices_size, &mem); + //debug("size %d half %d str_indices %d\n", size, half, str_indices_size); - note = builder->note = (struct ndb_note *)bytes; - size_t half = builder->size >> 1; + // make a safe cursor of our available memory + make_cursor(buf, buf + bufsize, &mem); - if (!cursor_slice(&mem, &builder->note_cur, half)) return 0; - if (!cursor_slice(&mem, &builder->strings, half)) return 0; - if (!cursor_slice(&mem, &builder->str_indices, str_indices_size)) return 0; + note = builder->note = (struct ndb_note *)buf; + + // take slices of the memory into subcursors + if (!(cursor_slice(&mem, &builder->note_cur, half) && + cursor_slice(&mem, &builder->strings, half) && + cursor_slice(&mem, &builder->str_indices, str_indices_size))) { + return 0; + } memset(note, 0, sizeof(*note)); builder->note_cur.p += sizeof(*note); @@ -45,6 +56,40 @@ ndb_builder_new(struct ndb_builder *builder, int *bufsize) { return 1; } +static inline int +ndb_json_parser_init(struct ndb_json_parser *p, const char *json, int json_len, unsigned char *buf, int bufsize) { + int half = bufsize / 2; + + p->toks = (jsmntok_t*)buf + half; + p->toks_end = (jsmntok_t*)buf + bufsize; + p->num_tokens = 0; + p->json = json; + p->json_len = json_len; + + // ndb_builder gets the first half of the buffer, and jsmn gets the + // second half. I like this way of alloating memory (without actually + // dynamically allocating memory). You get one big chunk upfront and + // then submodules can recursively subdivide it. Maybe you could do + // something even more clever like golden-ratio style subdivision where + // the more important stuff gets a larger chunk and then it spirals + // downward into smaller chunks. Thanks for coming to my TED talk. + if (!ndb_builder_new(&p->builder, buf, half)) + return 0; + + jsmn_init(&p->json_parser); + + return 1; +} + +static inline int +ndb_json_parser_parse(struct ndb_json_parser *p) { + int cap = (p->toks_end - p->toks)/sizeof(*p->toks); + p->num_tokens = + jsmn_parse(&p->json_parser, p->json, p->json_len, p->toks, cap); + + return p->num_tokens; +} + int ndb_builder_finalize(struct ndb_builder *builder, struct ndb_note **note) { int strings_len = builder->strings.p - builder->strings.start; @@ -57,8 +102,10 @@ ndb_builder_finalize(struct ndb_builder *builder, struct ndb_note **note) { // set the strings location builder->note->strings = builder->note_cur.p - builder->note_cur.start; - // remove any extra data - *note = realloc(builder->note_cur.start, total_size); + // record the total size + //builder->note->size = total_size; + + *note = builder->note; return total_size; } @@ -153,71 +200,72 @@ ndb_builder_process_json_tags(const char *json, jsmntok_t *array, struct ndb_bui int -ndb_note_from_json(const char *json, int len, struct ndb_note **note) { - jsmntok_t toks[4096], *tok = NULL; - unsigned char buf[64]; - struct ndb_builder builder; - jsmn_parser p; +ndb_note_from_json(const char *json, int len, struct ndb_note **note, + unsigned char *buf, int bufsize) +{ + jsmntok_t *tok = NULL; + unsigned char hexbuf[64]; - int i, r, tok_len; + int i, tok_len; const char *start; + struct ndb_json_parser parser; - jsmn_init(&p); - ndb_builder_new(&builder, &len); + ndb_json_parser_init(&parser, json, len, buf, bufsize); - r = jsmn_parse(&p, json, len, toks, sizeof(toks)/sizeof(toks[0])); + if (ndb_json_parser_parse(&parser) < 0) + return 0; - if (r < 0) return 0; - if (r < 1 || toks[0].type != JSMN_OBJECT) return 0; + if (parser.num_tokens < 1 || parser.toks[0].type != JSMN_OBJECT) + return 0; - for (i = 1; i < r; i++) { - tok = &toks[i]; + for (i = 1; i < parser.num_tokens; i++) { + tok = &parser.toks[i]; start = json + tok->start; tok_len = toksize(tok); //printf("toplevel %.*s %d\n", tok_len, json + tok->start, tok->type); - if (tok_len == 0 || i + 1 >= r) + if (tok_len == 0 || i + 1 >= parser.num_tokens) continue; if (start[0] == 'p' && jsoneq(json, tok, tok_len, "pubkey")) { // pubkey - tok = &toks[i+1]; - hex_decode(json + tok->start, toksize(tok), buf, sizeof(buf)); - ndb_builder_set_pubkey(&builder, buf); + tok = &parser.toks[i+1]; + hex_decode(json + tok->start, toksize(tok), hexbuf, sizeof(hexbuf)); + ndb_builder_set_pubkey(&parser.builder, hexbuf); } else if (tok_len == 2 && start[0] == 'i' && start[1] == 'd') { // id - tok = &toks[i+1]; - hex_decode(json + tok->start, toksize(tok), buf, sizeof(buf)); + tok = &parser.toks[i+1]; + hex_decode(json + tok->start, toksize(tok), hexbuf, sizeof(hexbuf)); // TODO: validate id - ndb_builder_set_id(&builder, buf); + ndb_builder_set_id(&parser.builder, hexbuf); } else if (tok_len == 3 && start[0] == 's' && start[1] == 'i' && start[2] == 'g') { // sig - tok = &toks[i+1]; - hex_decode(json + tok->start, toksize(tok), buf, sizeof(buf)); - ndb_builder_set_signature(&builder, buf); + tok = &parser.toks[i+1]; + hex_decode(json + tok->start, toksize(tok), hexbuf, sizeof(hexbuf)); + ndb_builder_set_signature(&parser.builder, hexbuf); } else if (start[0] == 'k' && jsoneq(json, tok, tok_len, "kind")) { // kind - tok = &toks[i+1]; + tok = &parser.toks[i+1]; printf("json_kind %.*s\n", toksize(tok), json + tok->start); } else if (start[0] == 'c') { if (jsoneq(json, tok, tok_len, "created_at")) { // created_at - tok = &toks[i+1]; + tok = &parser.toks[i+1]; printf("json_created_at %.*s\n", toksize(tok), json + tok->start); } else if (jsoneq(json, tok, tok_len, "content")) { // content - tok = &toks[i+1]; - if (!ndb_builder_set_content(&builder, json + tok->start, toksize(tok))) + tok = &parser.toks[i+1]; + if (!ndb_builder_set_content(&parser.builder, json + tok->start, toksize(tok))) return 0; } } else if (start[0] == 't' && jsoneq(json, tok, tok_len, "tags")) { - tok = &toks[i+1]; - ndb_builder_process_json_tags(json, tok, &builder); + tok = &parser.toks[i+1]; + ndb_builder_process_json_tags(json, tok, &parser.builder); i += tok->size; } } - return ndb_builder_finalize(&builder, note); + return ndb_builder_finalize(&parser.builder, note); } void diff --git a/nostrdb.h b/nostrdb.h @@ -55,7 +55,6 @@ struct ndb_builder { struct cursor strings; struct cursor str_indices; struct ndb_note *note; - int size; }; struct ndb_iterator { @@ -66,8 +65,9 @@ struct ndb_iterator { int index; }; -int ndb_note_from_json(const char *json, int len, struct ndb_note **); -int ndb_builder_new(struct ndb_builder *builder, int *bufsize); +// HI BUILDER +int ndb_note_from_json(const char *json, int len, struct ndb_note **, unsigned char *buf, int buflen); +int ndb_builder_new(struct ndb_builder *builder, unsigned char *buf, int bufsize); int ndb_builder_finalize(struct ndb_builder *builder, struct ndb_note **note); int ndb_builder_set_content(struct ndb_builder *builder, const char *content, int len); void ndb_builder_set_signature(struct ndb_builder *builder, unsigned char *signature); @@ -75,6 +75,7 @@ void ndb_builder_set_pubkey(struct ndb_builder *builder, unsigned char *pubkey); void ndb_builder_set_id(struct ndb_builder *builder, unsigned char *id); void ndb_builder_set_kind(struct ndb_builder *builder, uint32_t kind); int ndb_builder_add_tag(struct ndb_builder *builder, const char **strs, uint16_t num_strs); +// BYE BUILDER static inline int ndb_str_is_packed(union packed_str str) { @@ -164,7 +165,7 @@ ndb_tags_iterate_start(struct ndb_note *note, struct ndb_iterator *iter) { iter->tag = note->tags.tag; iter->index = 0; - return iter->tag->count != 0; + return note->tags.count != 0 && iter->tag->count != 0; } static inline int diff --git a/test.c b/test.c @@ -8,6 +8,7 @@ #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) static void test_basic_event() { + unsigned char buf[512]; struct ndb_builder builder, *b = &builder; struct ndb_note *note; int ok; @@ -26,20 +27,23 @@ static void test_basic_event() { const char *tag[] = { "p", hex_pk }; const char *word_tag[] = { "word", "words", "w" }; - ndb_builder_new(b, 0); + ok = ndb_builder_new(b, buf, sizeof(buf)); + assert(ok); note = builder.note; memset(note->padding, 3, sizeof(note->padding)); const char *content = "hello, world!"; - ndb_builder_set_content(b, content, strlen(content)); - ndb_builder_set_id(b, id); - ndb_builder_set_pubkey(b, pubkey); - ndb_builder_set_signature(b, sig); - ndb_builder_add_tag(b, tag, ARRAY_SIZE(tag)); - ndb_builder_add_tag(b, word_tag, ARRAY_SIZE(word_tag)); - ndb_builder_finalize(b, &note); + ok = ndb_builder_set_content(b, content, strlen(content)); assert(ok); + ndb_builder_set_id(b, id); assert(ok); + ndb_builder_set_pubkey(b, pubkey); assert(ok); + ndb_builder_set_signature(b, sig); assert(ok); + ok = ndb_builder_add_tag(b, tag, ARRAY_SIZE(tag)); assert(ok); + ok = ndb_builder_add_tag(b, word_tag, ARRAY_SIZE(word_tag)); assert(ok); + + ok = ndb_builder_finalize(b, &note); + assert(ok); assert(note->tags.count == 2); @@ -72,9 +76,13 @@ static void test_empty_tags() { struct ndb_iterator iter, *it = &iter; struct ndb_note *note; int ok; + unsigned char buf[1024]; - ndb_builder_new(b, 0); - ndb_builder_finalize(b, &note); + ok = ndb_builder_new(b, buf, sizeof(buf)); + assert(ok); + + ok = ndb_builder_finalize(b, &note); + assert(ok); assert(note->tags.count == 0); @@ -85,13 +93,14 @@ static void test_empty_tags() { static void test_parse_json() { char hex_id[65] = {0}; + unsigned char buffer[1024]; struct ndb_note *note; #define HEX_ID "5004a081e397c6da9dc2f2d6b3134006a9d0e8c1b46689d9fe150bb2f21a204d" #define HEX_PK "b169f596968917a1abeb4234d3cf3aa9baee2112e58998d17c6db416ad33fe40" static const char *json = "{\"id\": \"" HEX_ID "\",\"pubkey\": \"" HEX_PK "\",\"created_at\": 1689836342,\"kind\": 1,\"tags\": [[\"p\",\"" HEX_ID "\"], [\"word\", \"words\"]],\"content\": \"共通語\",\"sig\": \"e4d528651311d567f461d7be916c37cbf2b4d530e672f29f15f353291ed6df60c665928e67d2f18861c5ca88\"}"; - ndb_note_from_json(json, strlen(json), &note); + ndb_note_from_json(json, strlen(json), &note, buffer, sizeof(buffer)); const char *content = ndb_note_content(note); unsigned char *id = ndb_note_id(note);