nostrdb

an unfairly fast embedded nostr database backed by lmdb
git clone git://jb55.com/nostrdb
Log | Files | Refs | Submodules | README | LICENSE

commit dbb9cabf34f1f5dd58a4023e16a31929c0fa378e
parent d5dcaef57ca6f9bc0ab861aeecdb2e9e038816d9
Author: William Casarin <jb55@jb55.com>
Date:   Wed, 27 Dec 2023 14:55:17 -0800

content_parser: add initial db decoders

We need to pull the data out as well! Let's add some initial decoders.
We still need tests to make sure it's working.

Diffstat:
MMakefile | 2+-
Mshell.nix | 2+-
Msrc/block.h | 7+++----
Msrc/content_parser.c | 179+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
Msrc/cursor.h | 5+++++
Msrc/nostrdb.h | 8++++++++
6 files changed, 167 insertions(+), 36 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,5 +1,5 @@ CFLAGS = -Wall -Wno-misleading-indentation -Wno-unused-function -Werror -O2 -g -Isrc -Ideps/secp256k1/include -Ideps/lmdb -Ideps/flatcc/include -HEADERS = deps/lmdb/lmdb.h deps/secp256k1/include/secp256k1.h src/sha256.h src/nostrdb.h src/cursor.h src/hex.h src/jsmn.h src/config.h src/sha256.h src/random.h src/memchr.h src/cpu.h $(C_BINDINGS) +HEADERS = deps/lmdb/lmdb.h deps/secp256k1/include/secp256k1.h src/sha256.h src/nostrdb.h src/cursor.h src/hex.h src/jsmn.h src/config.h src/sha256.h src/random.h src/memchr.h src/cpu.h src/nostr_bech32.h src/block.h src/str_block.h $(C_BINDINGS) FLATCC_SRCS=deps/flatcc/src/runtime/json_parser.c deps/flatcc/src/runtime/verifier.c deps/flatcc/src/runtime/builder.c deps/flatcc/src/runtime/emitter.c deps/flatcc/src/runtime/refmap.c BOLT11_SRCS = src/bolt11/bolt11.c src/bolt11/bech32.c src/bolt11/tal.c src/bolt11/talstr.c src/bolt11/take.c src/bolt11/list.c src/bolt11/utf8.c src/bolt11/amount.c src/bolt11/hash_u5.c SRCS = src/nostrdb.c src/sha256.c src/invoice.c src/nostr_bech32.c src/content_parser.c $(BOLT11_SRCS) $(FLATCC_SRCS) diff --git a/shell.nix b/shell.nix @@ -1,7 +1,7 @@ { pkgs ? import <nixpkgs> {} }: with pkgs; mkShell { - buildInputs = [ autoreconfHook flatbuffers flatcc pkg-config ]; + buildInputs = [ autoreconfHook flatbuffers flatcc gdb pkg-config ]; LIBCLANG_PATH="${llvmPackages.libclang}/lib"; } diff --git a/src/block.h b/src/block.h @@ -38,19 +38,18 @@ struct ndb_mention_bech32_block { struct nostr_bech32 bech32; }; -struct invoice_block { +struct ndb_invoice_block { struct str_block invstr; struct ndb_invoice invoice; - struct bolt11 *bolt11; }; struct note_block { enum block_type type; union { struct str_block str; - struct invoice_block invoice; + struct ndb_invoice_block invoice; struct ndb_mention_bech32_block mention_bech32; - int mention_index; + uint32_t mention_index; } block; }; diff --git a/src/content_parser.c b/src/content_parser.c @@ -103,7 +103,7 @@ static int pull_str_block(struct cursor *buf, const char *content, struct str_bl } // -// decode and push a bech32 string into our blocks output buffer. +// decode and push a bech32 mention into our blocks output buffer. // // bech32 blocks are stored as: // @@ -116,7 +116,7 @@ static int pull_str_block(struct cursor *buf, const char *content, struct str_bl // This allows us to not duplicate all of the TLV encoding and decoding code // for our on-disk nostrdb format. // -static int push_bech32_str(struct ndb_content_parser *p, struct str_block *bech32) +static int push_bech32_mention(struct ndb_content_parser *p, struct str_block *bech32) { // we decode the raw bech32 directly into the output buffer struct cursor u8, u5; @@ -132,6 +132,10 @@ static int push_bech32_str(struct ndb_content_parser *p, struct str_block *bech3 if (!parse_nostr_bech32_type(bech32->str, &type)) goto fail; + // make sure to push the str block! + if (!push_str_block(&p->buffer, (const char*)p->content.start, bech32)) + goto fail; + if (!cursor_push_varint(&p->buffer, type)) goto fail; @@ -174,7 +178,7 @@ fail: return 0; } -static int push_invoice_str(struct cursor *buf, struct str_block *str) +static int push_invoice_str(struct ndb_content_parser *p, struct str_block *str) { unsigned char *start; struct bolt11 *bolt11; @@ -183,9 +187,15 @@ static int push_invoice_str(struct cursor *buf, struct str_block *str) if (!(bolt11 = bolt11_decode(NULL, str->str, &fail))) return 0; - start = buf->p; - if (!ndb_encode_invoice(buf, bolt11)) { - buf->p = start; + start = p->buffer.p; + + // push the text block just incase we don't care for the invoice + if (!push_str_block(&p->buffer, (const char*)p->content.start, str)) + return 0; + + // push decoded invoice data for quick access + if (!ndb_encode_invoice(&p->buffer, bolt11)) { + p->buffer.p = start; tal_free(bolt11); return 0; } @@ -194,45 +204,95 @@ static int push_invoice_str(struct cursor *buf, struct str_block *str) return 1; } -static int push_block(struct ndb_content_parser *p, struct note_block *block) +static int pull_nostr_bech32_type(struct cursor *cur, enum nostr_bech32_type *type) { - // push the tag - if (!cursor_push_varint(&p->buffer, block->type)) + uint64_t inttype; + if (!cursor_pull_varint(cur, &inttype)) + return 0; + + if (inttype > NOSTR_BECH32_KNOWN_TYPES) + return 0; + + *type = inttype; + return 1; +} + +static int pull_bech32_mention(const char *content, struct cursor *cur, + struct ndb_mention_bech32_block *block) { + uint16_t size; + unsigned char *start; + enum nostr_bech32_type type; + + start = cur->p; + + if (!pull_str_block(cur, content, &block->str)) + return 0; + + if (!cursor_pull_u16(cur, &size)) + return 0; + + if (!pull_nostr_bech32_type(cur, &type)) + return 0; + + if (!parse_nostr_bech32_buffer(cur, type, &block->bech32)) + return 0; + + cur->p = start + size; + return 1; +} + +static int pull_invoice(const char *content, struct cursor *cur, + struct ndb_invoice_block *block) +{ + if (!pull_str_block(cur, content, &block->invstr)) return 0; + return ndb_decode_invoice(cur, &block->invoice); +} + +static int pull_block(const char *content, struct cursor *cur, struct note_block *block) +{ + unsigned char *start = cur->p; + uint32_t type; + + if (!cursor_pull_varint_u32(cur, &type)) + return 0; + + block->type = type; + switch (block->type) { case BLOCK_HASHTAG: case BLOCK_TEXT: case BLOCK_URL: - if (!push_str_block(&p->buffer, (const char*)p->content.start, - &block->block.str)) - return 0; + if (!pull_str_block(cur, content, &block->block.str)) + goto fail; break; case BLOCK_MENTION_INDEX: - if (!cursor_push_varint(&p->buffer, block->block.mention_index)) - return 0; + if (!cursor_pull_varint_u32(cur, &block->block.mention_index)) + goto fail; break; + case BLOCK_MENTION_BECH32: - // we only push bech32 strs here - if (!push_bech32_str(p, &block->block.str)) - return 0; + if (!pull_bech32_mention(content, cur, &block->block.mention_bech32)) + goto fail; break; - + case BLOCK_INVOICE: // we only push invoice strs here - if (!push_invoice_str(&p->buffer, &block->block.str)) - return 0; + if (!pull_invoice(content, cur, &block->block.invoice)) + goto fail; break; } - p->blocks->num_blocks++; - return 1; +fail: + cur->p = start; + return 0; } -static int add_text_block(struct ndb_content_parser *p, - const unsigned char *start, const unsigned char *end) +int push_block(struct ndb_content_parser *p, struct note_block *block); +static int add_text_block(struct ndb_content_parser *p, const char *start, const char *end) { struct note_block b; @@ -240,12 +300,69 @@ static int add_text_block(struct ndb_content_parser *p, return 1; b.type = BLOCK_TEXT; - b.block.str.str = (const char*)start; + b.block.str.str = start; b.block.str.len = end - start; return push_block(p, &b); } + +int push_block(struct ndb_content_parser *p, struct note_block *block) +{ + unsigned char *start = p->buffer.p; + + // push the tag + if (!cursor_push_varint(&p->buffer, block->type)) + return 0; + + switch (block->type) { + case BLOCK_HASHTAG: + case BLOCK_TEXT: + case BLOCK_URL: + if (!push_str_block(&p->buffer, (const char*)p->content.start, + &block->block.str)) + goto fail; + break; + + case BLOCK_MENTION_INDEX: + if (!cursor_push_varint(&p->buffer, block->block.mention_index)) + goto fail; + break; + case BLOCK_MENTION_BECH32: + // we only push bech32 strs here + if (!push_bech32_mention(p, &block->block.str)) { + // if we fail for some reason, try pushing just a text block + p->buffer.p = start; + if (!add_text_block(p, block->block.str.str, + block->block.str.str + + block->block.str.len)) { + goto fail; + } + } + break; + + case BLOCK_INVOICE: + // we only push invoice strs here + if (!push_invoice_str(p, &block->block.str)) { + // if we fail for some reason, try pushing just a text block + p->buffer.p = start; + if (!add_text_block(p, block->block.str.str, + block->block.str.str + block->block.str.len)) { + goto fail; + } + } + break; + } + + p->blocks->num_blocks++; + + return 1; + +fail: + p->buffer.p = start; + return 0; +} + static int consume_url_fragment(struct cursor *cur) { int c; @@ -315,6 +432,7 @@ static int parse_url(struct cursor *cur, struct note_block *block) { unsigned char tmp[4096]; int host_len; struct cursor path_cur, tmp_cur; + enum nostr_bech32_type type; make_cursor(tmp, tmp + sizeof(tmp), &tmp_cur); if (!parse_str(cur, "http")) @@ -376,7 +494,7 @@ static int parse_url(struct cursor *cur, struct note_block *block) { // if we have a damus link, make it a mention if (host_len == 8 && !strncmp((const char *)host, "damus.io", 8) - && parse_nostr_bech32_str(&path_cur)) + && parse_nostr_bech32_str(&path_cur, &type)) { block->block.str.len = path_cur.p - path_cur.start; block->type = BLOCK_MENTION_BECH32; @@ -421,13 +539,14 @@ static int parse_invoice(struct cursor *cur, struct note_block *block) { static int parse_mention_bech32(struct cursor *cur, struct note_block *block) { unsigned char *start = cur->p; + enum nostr_bech32_type type; parse_char(cur, '@'); parse_str(cur, "nostr:"); block->block.str.str = (const char *)cur->p; - if (!parse_nostr_bech32_str(cur)) { + if (!parse_nostr_bech32_str(cur, &type)) { cur->p = start; return 0; } @@ -443,7 +562,7 @@ static int add_text_then_block(struct ndb_content_parser *p, unsigned char **start, const unsigned char *pre_mention) { - if (!add_text_block(p, *start, pre_mention)) + if (!add_text_block(p, (const char *)*start, (const char*)pre_mention)) return 0; *start = (unsigned char*)p->content.p; @@ -457,8 +576,8 @@ int ndb_parse_content(unsigned char *buf, int buf_size, { int cp, c; struct ndb_content_parser parser; - struct note_block block; + unsigned char *start, *pre_mention; make_cursor(buf, buf + buf_size, &parser.buffer); @@ -508,7 +627,7 @@ int ndb_parse_content(unsigned char *buf, int buf_size, } if (parser.content.p - start > 0) { - if (!add_text_block(&parser, start, parser.content.p)) + if (!add_text_block(&parser, (const char*)start, (const char *)parser.content.p)) return 0; } diff --git a/src/cursor.h b/src/cursor.h @@ -360,6 +360,11 @@ static inline int cursor_push_u16(struct cursor *cursor, unsigned short i) return cursor_push(cursor, (unsigned char*)&i, sizeof(i)); } +static inline int cursor_pull_u16(struct cursor *cursor, uint16_t *i) +{ + return cursor_pull(cursor, (unsigned char*)i, sizeof(*i)); +} + static inline void *index_cursor(struct cursor *cursor, unsigned int index, int elem_size) { unsigned char *p; diff --git a/src/nostrdb.h b/src/nostrdb.h @@ -198,6 +198,12 @@ struct ndb_builder { struct ndb_tag *current_tag; }; +/* +struct ndb_block_iterator { + struct note_block block; +}; +*/ + struct ndb_iterator { struct ndb_note *note; struct ndb_tag *tag; @@ -390,4 +396,6 @@ int ndb_parse_content(unsigned char *buf, int buf_size, const char *content, int content_len, struct ndb_note_blocks **blocks_p); +//int ndb_blocks_iterate_next(struct ndb_block_iterator *iter); + #endif