nostrdb

an unfairly fast embedded nostr database backed by lmdb
git clone git://jb55.com/nostrdb
Log | Files | Refs | Submodules | README | LICENSE

commit 03b4d711aa0e5533cd996c6de53771eb4e41ca6d
parent a841b449102176241485b8882abd4e7002e6b520
Author: William Casarin <jb55@jb55.com>
Date:   Thu, 28 Dec 2023 13:54:22 -0800

blocks: add note block iterator

This adds an api that walks along and pulls compact note block data out of
nostrdb.

Diffstat:
MMakefile | 2+-
Asrc/block.c | 184+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/block.h | 29++++++++++++-----------------
Msrc/content_parser.c | 137++++++++++---------------------------------------------------------------------
Msrc/nostr_bech32.c | 4+++-
Msrc/nostr_bech32.h | 6+++---
Msrc/nostrdb.h | 37+++++++++++++++++++++++++++----------
Msrc/str_block.h | 2+-
Mtest.c | 37+++++++++++++++++++++++++++++++++++--
9 files changed, 283 insertions(+), 155 deletions(-)

diff --git a/Makefile b/Makefile @@ -2,7 +2,7 @@ CFLAGS = -Wall -Wno-misleading-indentation -Wno-unused-function -Werror -O2 -g - HEADERS = deps/lmdb/lmdb.h deps/secp256k1/include/secp256k1.h src/sha256.h src/nostrdb.h src/cursor.h src/hex.h src/jsmn.h src/config.h src/sha256.h src/random.h src/memchr.h src/cpu.h src/nostr_bech32.h src/block.h src/str_block.h $(C_BINDINGS) FLATCC_SRCS=deps/flatcc/src/runtime/json_parser.c deps/flatcc/src/runtime/verifier.c deps/flatcc/src/runtime/builder.c deps/flatcc/src/runtime/emitter.c deps/flatcc/src/runtime/refmap.c BOLT11_SRCS = src/bolt11/bolt11.c src/bolt11/bech32.c src/bolt11/tal.c src/bolt11/talstr.c src/bolt11/take.c src/bolt11/list.c src/bolt11/utf8.c src/bolt11/amount.c src/bolt11/hash_u5.c -SRCS = src/nostrdb.c src/sha256.c src/invoice.c src/nostr_bech32.c src/content_parser.c $(BOLT11_SRCS) $(FLATCC_SRCS) +SRCS = src/nostrdb.c src/sha256.c src/invoice.c src/nostr_bech32.c src/content_parser.c src/block.c $(BOLT11_SRCS) $(FLATCC_SRCS) LDS = $(OBJS) $(ARS) OBJS = $(SRCS:.c=.o) DEPS = $(OBJS) $(HEADERS) $(ARS) diff --git a/src/block.c b/src/block.c @@ -0,0 +1,184 @@ + + +#include "nostrdb.h" +#include "block.h" +#include <stdlib.h> + +struct ndb_block_iterator { + const char *content; + struct ndb_blocks *blocks; + struct ndb_block block; + struct cursor cur; +}; + +int push_str_block(struct cursor *buf, const char *content, struct ndb_str_block *block) { + return cursor_push_varint(buf, block->str - content) && + cursor_push_varint(buf, block->len); +} + +int pull_str_block(struct cursor *buf, const char *content, struct ndb_str_block *block) { + uint32_t start; + if (!cursor_pull_varint_u32(buf, &start)) + return 0; + + block->str = content + start; + + return cursor_pull_varint_u32(buf, &block->len); +} + +static int pull_nostr_bech32_type(struct cursor *cur, enum nostr_bech32_type *type) +{ + uint64_t inttype; + if (!cursor_pull_varint(cur, &inttype)) + return 0; + + if (inttype > NOSTR_BECH32_KNOWN_TYPES) + return 0; + + *type = inttype; + return 1; +} + + +static int pull_bech32_mention(const char *content, struct cursor *cur, struct ndb_mention_bech32_block *block) { + uint16_t size; + unsigned char *start; + enum nostr_bech32_type type; + + start = cur->p; + + if (!pull_str_block(cur, content, &block->str)) + return 0; + + if (!cursor_pull_u16(cur, &size)) + return 0; + + if (!pull_nostr_bech32_type(cur, &type)) + return 0; + + if (!parse_nostr_bech32_buffer(cur, type, &block->bech32)) + return 0; + + cur->p = start + size; + return 1; +} + +static int pull_invoice(const char *content, struct cursor *cur, + struct ndb_invoice_block *block) +{ + if (!pull_str_block(cur, content, &block->invstr)) + return 0; + + return ndb_decode_invoice(cur, &block->invoice); +} + +static int pull_block_type(struct cursor *cur, enum ndb_block_type *type) +{ + uint32_t itype; + *type = 0; + if (!cursor_pull_varint_u32(cur, &itype)) + return 0; + + if (itype <= 0 || itype > NDB_NUM_BLOCK_TYPES) + return 0; + + *type = itype; + return 1; +} + +static int pull_block(const char *content, struct cursor *cur, struct ndb_block *block) +{ + unsigned char *start = cur->p; + + if (!pull_block_type(cur, &block->type)) + return 0; + + switch (block->type) { + case BLOCK_HASHTAG: + case BLOCK_TEXT: + case BLOCK_URL: + if (!pull_str_block(cur, content, &block->block.str)) + goto fail; + break; + + case BLOCK_MENTION_INDEX: + if (!cursor_pull_varint_u32(cur, &block->block.mention_index)) + goto fail; + break; + + case BLOCK_MENTION_BECH32: + if (!pull_bech32_mention(content, cur, &block->block.mention_bech32)) + goto fail; + break; + + case BLOCK_INVOICE: + // we only push invoice strs here + if (!pull_invoice(content, cur, &block->block.invoice)) + goto fail; + break; + } + + return 1; +fail: + cur->p = start; + return 0; +} + + +enum ndb_block_type ndb_get_block_type(struct ndb_block *block) { + return block->type; +} + +// BLOCK ITERATORS +struct ndb_block_iterator *ndb_blocks_iterate_start(const char *content, struct ndb_blocks *blocks) { + struct ndb_block_iterator *iter = malloc(sizeof(*iter)); + if (!iter) + return NULL; + + iter->blocks = blocks; + iter->content = content; + + make_cursor((unsigned char *)blocks->blocks, + blocks->blocks + blocks->blocks_size, &iter->cur); + + return iter; +} + +void ndb_blocks_iterate_free(struct ndb_block_iterator *iter) +{ + if (iter) + free(iter); +} + +struct ndb_block *ndb_blocks_iterate_next(struct ndb_block_iterator *iter) +{ + while (iter->cur.p < iter->cur.end) { + if (!pull_block(iter->content, &iter->cur, &iter->block)) + return NULL; + else + return &iter->block; + } + + return NULL; +} + +// STR BLOCKS +struct ndb_str_block *ndb_block_str(struct ndb_block *block) +{ + switch (block->type) { + case BLOCK_HASHTAG: + case BLOCK_TEXT: + case BLOCK_URL: + return &block->block.str; + case BLOCK_MENTION_INDEX: + return NULL; + case BLOCK_MENTION_BECH32: + return &block->block.mention_bech32.str; + case BLOCK_INVOICE: + return &block->block.invoice.invstr; + } + + return NULL; +} +//const char *ndb_str_block_ptr(struct ndb_str_block *); +//uint32_t ndb_str_block_len(struct ndb_str_block *); diff --git a/src/block.h b/src/block.h @@ -4,12 +4,14 @@ #include "invoice.h" #include "str_block.h" +#include "cursor.h" #include "nostr_bech32.h" +#include "nostrdb.h" #include <inttypes.h> #pragma pack(push, 1) -struct ndb_note_blocks { +struct ndb_blocks { unsigned char version; unsigned char padding[3]; @@ -17,41 +19,34 @@ struct ndb_note_blocks { uint32_t num_blocks; uint32_t blocks_size; // future expansion - uint32_t reserved[4]; + uint32_t reserved[2]; unsigned char blocks[0]; // see ndb_block definition }; #pragma pack(pop) -enum block_type { - BLOCK_HASHTAG = 1, - BLOCK_TEXT = 2, - BLOCK_MENTION_INDEX = 3, - BLOCK_MENTION_BECH32 = 4, - BLOCK_URL = 5, - BLOCK_INVOICE = 6, -}; - - struct ndb_mention_bech32_block { - struct str_block str; + struct ndb_str_block str; struct nostr_bech32 bech32; }; struct ndb_invoice_block { - struct str_block invstr; + struct ndb_str_block invstr; struct ndb_invoice invoice; }; -struct note_block { - enum block_type type; +struct ndb_block { + enum ndb_block_type type; union { - struct str_block str; + struct ndb_str_block str; struct ndb_invoice_block invoice; struct ndb_mention_bech32_block mention_bech32; uint32_t mention_index; } block; }; +int push_str_block(struct cursor *buf, const char *content, struct ndb_str_block *block); +int pull_str_block(struct cursor *buf, const char *content, struct ndb_str_block *block); + #endif // NDB_BLOCK_H diff --git a/src/content_parser.c b/src/content_parser.c @@ -1,6 +1,7 @@ #include "cursor.h" #include "nostr_bech32.h" #include "block.h" +#include "nostrdb.h" #include "invoice.h" #include "bolt11/bolt11.h" #include "bolt11/bech32.h" @@ -8,16 +9,14 @@ #include <string.h> #include "cursor.h" -#include "block.h" struct ndb_content_parser { int bech32_strs; struct cursor buffer; struct cursor content; - struct ndb_note_blocks *blocks; + struct ndb_blocks *blocks; }; - static int parse_digit(struct cursor *cur, int *digit) { int c; if ((c = peek_char(cur, 0)) == -1) @@ -34,7 +33,7 @@ static int parse_digit(struct cursor *cur, int *digit) { } -static int parse_mention_index(struct cursor *cur, struct note_block *block) { +static int parse_mention_index(struct cursor *cur, struct ndb_block *block) { int d1, d2, d3, ind; unsigned char *start = cur->p; @@ -65,7 +64,7 @@ static int parse_mention_index(struct cursor *cur, struct note_block *block) { return 1; } -static int parse_hashtag(struct cursor *cur, struct note_block *block) { +static int parse_hashtag(struct cursor *cur, struct ndb_block *block) { int c; unsigned char *start = cur->p; @@ -87,21 +86,6 @@ static int parse_hashtag(struct cursor *cur, struct note_block *block) { return 1; } -static int push_str_block(struct cursor *buf, const char *content, struct str_block *block) { - return cursor_push_varint(buf, block->str - content) && - cursor_push_varint(buf, block->len); -} - -static int pull_str_block(struct cursor *buf, const char *content, struct str_block *block) { - uint32_t start; - if (!cursor_pull_varint_u32(buf, &start)) - return 0; - - block->str = content + start; - - return cursor_pull_varint_u32(buf, &block->len); -} - // // decode and push a bech32 mention into our blocks output buffer. // @@ -116,7 +100,7 @@ static int pull_str_block(struct cursor *buf, const char *content, struct str_bl // This allows us to not duplicate all of the TLV encoding and decoding code // for our on-disk nostrdb format. // -static int push_bech32_mention(struct ndb_content_parser *p, struct str_block *bech32) +static int push_bech32_mention(struct ndb_content_parser *p, struct ndb_str_block *bech32) { // we decode the raw bech32 directly into the output buffer struct cursor u8, u5; @@ -178,7 +162,7 @@ fail: return 0; } -static int push_invoice_str(struct ndb_content_parser *p, struct str_block *str) +static int push_invoice_str(struct ndb_content_parser *p, struct ndb_str_block *str) { unsigned char *start; struct bolt11 *bolt11; @@ -204,97 +188,10 @@ static int push_invoice_str(struct ndb_content_parser *p, struct str_block *str) return 1; } -static int pull_nostr_bech32_type(struct cursor *cur, enum nostr_bech32_type *type) -{ - uint64_t inttype; - if (!cursor_pull_varint(cur, &inttype)) - return 0; - - if (inttype > NOSTR_BECH32_KNOWN_TYPES) - return 0; - - *type = inttype; - return 1; -} - -static int pull_bech32_mention(const char *content, struct cursor *cur, - struct ndb_mention_bech32_block *block) { - uint16_t size; - unsigned char *start; - enum nostr_bech32_type type; - - start = cur->p; - - if (!pull_str_block(cur, content, &block->str)) - return 0; - - if (!cursor_pull_u16(cur, &size)) - return 0; - - if (!pull_nostr_bech32_type(cur, &type)) - return 0; - - if (!parse_nostr_bech32_buffer(cur, type, &block->bech32)) - return 0; - - cur->p = start + size; - return 1; -} - -static int pull_invoice(const char *content, struct cursor *cur, - struct ndb_invoice_block *block) -{ - if (!pull_str_block(cur, content, &block->invstr)) - return 0; - - return ndb_decode_invoice(cur, &block->invoice); -} - -static int pull_block(const char *content, struct cursor *cur, struct note_block *block) -{ - unsigned char *start = cur->p; - uint32_t type; - - if (!cursor_pull_varint_u32(cur, &type)) - return 0; - - block->type = type; - - switch (block->type) { - case BLOCK_HASHTAG: - case BLOCK_TEXT: - case BLOCK_URL: - if (!pull_str_block(cur, content, &block->block.str)) - goto fail; - break; - - case BLOCK_MENTION_INDEX: - if (!cursor_pull_varint_u32(cur, &block->block.mention_index)) - goto fail; - break; - - case BLOCK_MENTION_BECH32: - if (!pull_bech32_mention(content, cur, &block->block.mention_bech32)) - goto fail; - break; - - case BLOCK_INVOICE: - // we only push invoice strs here - if (!pull_invoice(content, cur, &block->block.invoice)) - goto fail; - break; - } - - return 1; -fail: - cur->p = start; - return 0; -} - -int push_block(struct ndb_content_parser *p, struct note_block *block); +int push_block(struct ndb_content_parser *p, struct ndb_block *block); static int add_text_block(struct ndb_content_parser *p, const char *start, const char *end) { - struct note_block b; + struct ndb_block b; if (start == end) return 1; @@ -307,7 +204,7 @@ static int add_text_block(struct ndb_content_parser *p, const char *start, const } -int push_block(struct ndb_content_parser *p, struct note_block *block) +int push_block(struct ndb_content_parser *p, struct ndb_block *block) { unsigned char *start = p->buffer.p; @@ -473,7 +370,7 @@ static int consume_url_host(struct cursor *cur) return count != 0; } -static int parse_url(struct cursor *cur, struct note_block *block) { +static int parse_url(struct cursor *cur, struct ndb_block *block) { unsigned char *start = cur->p; unsigned char *host; unsigned char tmp[4096]; @@ -555,7 +452,7 @@ static int parse_url(struct cursor *cur, struct note_block *block) { return 1; } -static int parse_invoice(struct cursor *cur, struct note_block *block) { +static int parse_invoice(struct cursor *cur, struct ndb_block *block) { unsigned char *start, *end; // optional @@ -584,7 +481,7 @@ static int parse_invoice(struct cursor *cur, struct note_block *block) { } -static int parse_mention_bech32(struct cursor *cur, struct note_block *block) { +static int parse_mention_bech32(struct cursor *cur, struct ndb_block *block) { unsigned char *start = cur->p; enum nostr_bech32_type type; @@ -605,7 +502,7 @@ static int parse_mention_bech32(struct cursor *cur, struct note_block *block) { } static int add_text_then_block(struct ndb_content_parser *p, - struct note_block *block, + struct ndb_block *block, unsigned char **start, const unsigned char *pre_mention) { @@ -619,19 +516,19 @@ static int add_text_then_block(struct ndb_content_parser *p, int ndb_parse_content(unsigned char *buf, int buf_size, const char *content, int content_len, - struct ndb_note_blocks **blocks_p) + struct ndb_blocks **blocks_p) { int cp, c; struct ndb_content_parser parser; - struct note_block block; + struct ndb_block block; unsigned char *start, *pre_mention; make_cursor(buf, buf + buf_size, &parser.buffer); // allocate some space for the blocks header - *blocks_p = parser.blocks = (struct ndb_note_blocks *)buf; - parser.buffer.p += sizeof(struct ndb_note_blocks); + *blocks_p = parser.blocks = (struct ndb_blocks *)buf; + parser.buffer.p += sizeof(struct ndb_blocks); make_cursor((unsigned char *)content, (unsigned char*)content + content_len, &parser.content); diff --git a/src/nostr_bech32.c b/src/nostr_bech32.c @@ -8,6 +8,8 @@ #include "nostr_bech32.h" #include <stdlib.h> #include "cursor.h" +#include "str_block.h" +#include "nostrdb.h" #include "bolt11/bech32.h" #define MAX_TLVS 32 @@ -88,7 +90,7 @@ static int parse_nostr_bech32_nsec(struct cursor *cur, struct bech32_nsec *nsec) static int add_relay(struct relays *relays, struct nostr_tlv *tlv) { - struct str_block *str; + struct ndb_str_block *str; if (relays->num_relays + 1 > MAX_RELAYS) return 0; diff --git a/src/nostr_bech32.h b/src/nostr_bech32.h @@ -14,7 +14,7 @@ #define MAX_RELAYS 24 struct relays { - struct str_block relays[MAX_RELAYS]; + struct ndb_str_block relays[MAX_RELAYS]; int num_relays; }; @@ -54,12 +54,12 @@ struct bech32_nprofile { struct bech32_naddr { struct relays relays; - struct str_block identifier; + struct ndb_str_block identifier; const unsigned char *pubkey; }; struct bech32_nrelay { - struct str_block relay; + struct ndb_str_block relay; }; struct nostr_bech32 { diff --git a/src/nostrdb.h b/src/nostrdb.h @@ -20,7 +20,8 @@ struct ndb_json_parser; struct ndb; -struct ndb_note_blocks; +struct ndb_blocks; +struct ndb_block; struct ndb_note; struct ndb_tag; struct ndb_tags; @@ -198,12 +199,6 @@ struct ndb_builder { struct ndb_tag *current_tag; }; -/* -struct ndb_block_iterator { - struct note_block block; -}; -*/ - struct ndb_iterator { struct ndb_note *note; struct ndb_tag *tag; @@ -394,8 +389,30 @@ enum ndb_common_kind ndb_kind_to_common_kind(int kind); // CONTENT PARSER int ndb_parse_content(unsigned char *buf, int buf_size, const char *content, int content_len, - struct ndb_note_blocks **blocks_p); - -//int ndb_blocks_iterate_next(struct ndb_block_iterator *iter); + struct ndb_blocks **blocks_p); + +// BLOCKS +enum ndb_block_type { + BLOCK_HASHTAG = 1, + BLOCK_TEXT = 2, + BLOCK_MENTION_INDEX = 3, + BLOCK_MENTION_BECH32 = 4, + BLOCK_URL = 5, + BLOCK_INVOICE = 6, +}; +#define NDB_NUM_BLOCK_TYPES 6 + +enum ndb_block_type ndb_block_type(struct ndb_blocks *blocks); + +// BLOCK ITERATORS +struct ndb_block_iterator *ndb_blocks_iterate_start(const char *, struct ndb_blocks *); +void ndb_blocks_iterate_free(struct ndb_block_iterator *); +struct ndb_block *ndb_blocks_iterate_next(struct ndb_block_iterator *); + +// STR BLOCKS +enum ndb_block_type ndb_get_block_type(struct ndb_block *block); +struct ndb_str_block *ndb_block_str(struct ndb_block *); +const char *ndb_str_block_ptr(struct ndb_str_block *); +uint32_t ndb_str_block_len(struct ndb_str_block *); #endif diff --git a/src/str_block.h b/src/str_block.h @@ -4,7 +4,7 @@ #include <inttypes.h> -struct str_block { +struct ndb_str_block { const char *str; uint32_t len; }; diff --git a/test.c b/test.c @@ -774,14 +774,45 @@ static void test_strings_work_before_finalization() { } static void test_parse_content() { - const char *content = "hello,#world,https://github.com/damus-io"; - struct ndb_note_blocks *blocks; unsigned char buf[4096]; + const char *content = "hello,#world,https://github.com/damus-io"; + struct ndb_blocks *blocks; assert(ndb_parse_content(buf, sizeof(buf), content, strlen(content), &blocks)); assert(blocks->num_blocks == 4); } +static void test_url_parsing() { + unsigned char buf[4096]; +#define DAMUSIO "https://github.com/damus-io" +#define JB55COM "https://jb55.com/" +#define WIKIORG "http://wikipedia.org" + const char *content = DAMUSIO ", " JB55COM ", " WIKIORG; + struct ndb_blocks *blocks; + struct ndb_block *block; + struct ndb_str_block *str; + + assert(ndb_parse_content(buf, sizeof(buf), content, strlen(content), &blocks)); + printf("blocks %d\n", blocks->num_blocks); + assert(blocks->num_blocks == 5); + + struct ndb_block_iterator *iter = ndb_blocks_iterate_start(content, blocks); + int i = 0; + while ((block = ndb_blocks_iterate_next(iter))) { + str = ndb_block_str(block); + switch (i++) { + case 0: assert(!strncmp(str->str, DAMUSIO, str->len)); break; + case 1: assert(!strncmp(str->str, ", ", str->len)); break; + case 2: assert(!strncmp(str->str, JB55COM, str->len)); break; + case 3: assert(!strncmp(str->str, ", ", str->len)); break; + case 4: assert(!strncmp(str->str, WIKIORG, str->len)); break; + } + } + + assert(i == 5); +} + + static void test_bech32_objects() { struct nostr_bech32 obj; unsigned char buf[4096]; @@ -1091,8 +1122,10 @@ static int test_varints() { int main(int argc, const char *argv[]) { test_parse_content(); + test_url_parsing(); test_varints(); test_bech32_objects(); + //test_block_coding(); test_encode_decode_invoice(); test_filters(); //test_migrate();