nostrdb

an unfairly fast embedded nostr database backed by lmdb
git clone git://jb55.com/nostrdb
Log | Files | Refs | Submodules | README | LICENSE

commit dc4795088ce1e527718bb7b56cf0d2737fb61b15
parent bfa4fad89015939e13f63fd6cc425018d0be4f34
Author: William Casarin <jb55@jb55.com>
Date:   Wed, 27 Dec 2023 12:35:54 -0800

nostr_bech32: only parse up to raw bech32 buffers

We will be storing raw nostr bech32 buffers directly into nostrdb, so
adapt our bech32 code to reflect this.

When doing our content parsing pass, we will only look for strings and we
won't allocate any intermediate buffers. Only when we write this string
block to nostrdb will we actually allocate in our nostrdb output buffer
(no mallocs!)

Diffstat:
Asrc/block.h | 58++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/nostr_bech32.c | 153++++++++++++++++++++++++++++++++++++++-----------------------------------------
Msrc/nostr_bech32.h | 36++++++++++++++++++++++--------------
Asrc/str_block.h | 12++++++++++++
4 files changed, 166 insertions(+), 93 deletions(-)

diff --git a/src/block.h b/src/block.h @@ -0,0 +1,58 @@ + +#ifndef NDB_BLOCK_H +#define NDB_BLOCK_H + +#include "invoice.h" +#include "str_block.h" +#include "nostr_bech32.h" +#include <inttypes.h> + +#pragma pack(push, 1) + +struct ndb_note_blocks { + unsigned char version; + unsigned char padding[3]; + + uint32_t words; + uint32_t num_blocks; + uint32_t blocks_size; + // future expansion + uint32_t reserved[4]; + unsigned char blocks[0]; // see ndb_block definition +}; + +#pragma pack(pop) + +enum block_type { + BLOCK_HASHTAG = 1, + BLOCK_TEXT = 2, + BLOCK_MENTION_INDEX = 3, + BLOCK_MENTION_BECH32 = 4, + BLOCK_URL = 5, + BLOCK_INVOICE = 6, +}; + + +struct ndb_mention_bech32_block { + struct str_block str; + struct nostr_bech32 bech32; +}; + +struct invoice_block { + struct str_block invstr; + struct ndb_invoice invoice; + struct bolt11 *bolt11; +}; + +struct note_block { + enum block_type type; + union { + struct str_block str; + struct invoice_block invoice; + struct ndb_mention_bech32_block mention_bech32; + int mention_index; + } block; +}; + + +#endif // NDB_BLOCK_H diff --git a/src/nostr_bech32.c b/src/nostr_bech32.c @@ -8,9 +8,9 @@ #include "nostr_bech32.h" #include <stdlib.h> #include "cursor.h" -#include "bech32.h" +#include "bolt11/bech32.h" -#define MAX_TLVS 16 +#define MAX_TLVS 32 #define TLV_SPECIAL 0 #define TLV_RELAY 1 @@ -19,9 +19,9 @@ #define TLV_KNOWN_TLVS 4 struct nostr_tlv { - u8 type; - u8 len; - const u8 *value; + unsigned char type; + unsigned char len; + const unsigned char *value; }; struct nostr_tlvs { @@ -70,7 +70,7 @@ static int parse_nostr_tlvs(struct cursor *cur, struct nostr_tlvs *tlvs) { return 1; } -static int find_tlv(struct nostr_tlvs *tlvs, u8 type, struct nostr_tlv **tlv) { +static int find_tlv(struct nostr_tlvs *tlvs, unsigned char type, struct nostr_tlv **tlv) { *tlv = NULL; for (int i = 0; i < tlvs->num_tlvs; i++) { @@ -83,27 +83,27 @@ static int find_tlv(struct nostr_tlvs *tlvs, u8 type, struct nostr_tlv **tlv) { return 0; } -static int parse_nostr_bech32_type(const char *prefix, enum nostr_bech32_type *type) { +int parse_nostr_bech32_type(const char *prefix, enum nostr_bech32_type *type) { // Parse type - if (strcmp(prefix, "note") == 0) { + if (strncmp(prefix, "note", 4) == 0) { *type = NOSTR_BECH32_NOTE; return 1; - } else if (strcmp(prefix, "npub") == 0) { + } else if (strncmp(prefix, "npub", 4) == 0) { *type = NOSTR_BECH32_NPUB; return 1; - } else if (strcmp(prefix, "nsec") == 0) { + } else if (strncmp(prefix, "nsec", 4) == 0) { *type = NOSTR_BECH32_NSEC; return 1; - } else if (strcmp(prefix, "nprofile") == 0) { + } else if (strncmp(prefix, "nprofile", 8) == 0) { *type = NOSTR_BECH32_NPROFILE; return 1; - } else if (strcmp(prefix, "nevent") == 0) { + } else if (strncmp(prefix, "nevent", 6) == 0) { *type = NOSTR_BECH32_NEVENT; return 1; - } else if (strcmp(prefix, "nrelay") == 0) { + } else if (strncmp(prefix, "nrelay", 6) == 0) { *type = NOSTR_BECH32_NRELAY; return 1; - } else if (strcmp(prefix, "naddr") == 0) { + } else if (strncmp(prefix, "naddr", 5) == 0) { *type = NOSTR_BECH32_NADDR; return 1; } @@ -138,8 +138,8 @@ static int tlvs_to_relays(struct nostr_tlvs *tlvs, struct relays *relays) { break; str = &relays->relays[relays->num_relays++]; - str->start = (const char*)tlv->value; - str->end = (const char*)(tlv->value + tlv->len); + str->str = (const char*)tlv->value; + str->len = tlv->len; } return 1; @@ -179,8 +179,8 @@ static int parse_nostr_bech32_naddr(struct cursor *cur, struct bech32_naddr *nad if (!find_tlv(&tlvs, TLV_SPECIAL, &tlv)) return 0; - naddr->identifier.start = (const char*)tlv->value; - naddr->identifier.end = (const char*)tlv->value + tlv->len; + naddr->identifier.str = (const char*)tlv->value; + naddr->identifier.len = tlv->len; if (!find_tlv(&tlvs, TLV_AUTHOR, &tlv)) return 0; @@ -218,89 +218,84 @@ static int parse_nostr_bech32_nrelay(struct cursor *cur, struct bech32_nrelay *n if (!find_tlv(&tlvs, TLV_SPECIAL, &tlv)) return 0; - nrelay->relay.start = (const char*)tlv->value; - nrelay->relay.end = (const char*)tlv->value + tlv->len; + nrelay->relay.str = (const char*)tlv->value; + nrelay->relay.len = tlv->len; return 1; } -int parse_nostr_bech32(struct cursor *cur, struct nostr_bech32 *obj) { - u8 *start, *end; - - start = cur->p; - - if (!consume_until_non_alphanumeric(cur, 1)) { - cur->p = start; - return 0; - } - - end = cur->p; - - size_t data_len; - size_t input_len = end - start; - if (input_len < 10 || input_len > 10000) { - return 0; - } - - obj->buffer = malloc(input_len * 2); - if (!obj->buffer) - return 0; - - u8 data[input_len]; - char prefix[input_len]; - - if (bech32_decode_len(prefix, data, &data_len, (const char*)start, input_len) == BECH32_ENCODING_NONE) { - cur->p = start; - return 0; - } - - obj->buflen = 0; - if (!bech32_convert_bits(obj->buffer, &obj->buflen, 8, data, data_len, 5, 0)) { - goto fail; - } - - if (!parse_nostr_bech32_type(prefix, &obj->type)) { - goto fail; - } - - struct cursor bcur; - make_cursor(obj->buffer, obj->buffer + obj->buflen, &bcur); +/* +int parse_nostr_bech32_buffer(unsigned char *cur, int buflen, + enum nostr_bech32_type type, + struct nostr_bech32 *obj) +{ + obj->type = type; switch (obj->type) { case NOSTR_BECH32_NOTE: - if (!parse_nostr_bech32_note(&bcur, &obj->data.note)) - goto fail; + if (!parse_nostr_bech32_note(cur, &obj->data.note)) + return 0; break; case NOSTR_BECH32_NPUB: - if (!parse_nostr_bech32_npub(&bcur, &obj->data.npub)) - goto fail; + if (!parse_nostr_bech32_npub(cur, &obj->data.npub)) + return 0; break; case NOSTR_BECH32_NSEC: - if (!parse_nostr_bech32_nsec(&bcur, &obj->data.nsec)) - goto fail; + if (!parse_nostr_bech32_nsec(cur, &obj->data.nsec)) + return 0; break; case NOSTR_BECH32_NEVENT: - if (!parse_nostr_bech32_nevent(&bcur, &obj->data.nevent)) - goto fail; + if (!parse_nostr_bech32_nevent(cur, &obj->data.nevent)) + return 0; break; case NOSTR_BECH32_NADDR: - if (!parse_nostr_bech32_naddr(&bcur, &obj->data.naddr)) - goto fail; + if (!parse_nostr_bech32_naddr(cur, &obj->data.naddr)) + return 0; break; case NOSTR_BECH32_NPROFILE: - if (!parse_nostr_bech32_nprofile(&bcur, &obj->data.nprofile)) - goto fail; + if (!parse_nostr_bech32_nprofile(cur, &obj->data.nprofile)) + return 0; break; case NOSTR_BECH32_NRELAY: - if (!parse_nostr_bech32_nrelay(&bcur, &obj->data.nrelay)) - goto fail; + if (!parse_nostr_bech32_nrelay(cur, &obj->data.nrelay)) + return 0; break; } + + return 1; +} +*/ + +int parse_nostr_bech32_str(struct cursor *bech32) { + enum nostr_bech32_type type; + + if (!parse_nostr_bech32_type((const char *)bech32->p, &type)) + return 0; + if (!consume_until_non_alphanumeric(bech32, 1)) + return 0; + return 1; -fail: - free(obj->buffer); - cur->p = start; - return 0; + /* + *parsed_len = bech32->p - start; + + // some random sanity checking + if (*parsed_len < 10 || *parsed_len > 10000) + return 0; + + const char u5[*parsed_len]; + + if (bech32_decode_len(prefix, u5, &u5_out_len, (const char*)start, + *parsed_len, MAX_PREFIX) == BECH32_ENCODING_NONE) + { + return 0; + } + + if (!parse_nostr_bech32_type(prefix, type)) + return 0; + */ + + return 1; } + diff --git a/src/nostr_bech32.h b/src/nostr_bech32.h @@ -11,8 +11,7 @@ #include <stdio.h> #include "str_block.h" #include "cursor.h" -typedef unsigned char u8; -#define MAX_RELAYS 10 +#define MAX_RELAYS 24 struct relays { struct str_block relays[MAX_RELAYS]; @@ -30,42 +29,40 @@ enum nostr_bech32_type { }; struct bech32_note { - const u8 *event_id; + const unsigned char *event_id; }; struct bech32_npub { - const u8 *pubkey; + const unsigned char *pubkey; }; struct bech32_nsec { - const u8 *nsec; + const unsigned char *nsec; }; struct bech32_nevent { struct relays relays; - const u8 *event_id; - const u8 *pubkey; // optional + const unsigned char *event_id; + const unsigned char *pubkey; // optional }; struct bech32_nprofile { struct relays relays; - const u8 *pubkey; + const unsigned char *pubkey; }; struct bech32_naddr { struct relays relays; struct str_block identifier; - const u8 *pubkey; + const unsigned char *pubkey; }; struct bech32_nrelay { struct str_block relay; }; -typedef struct nostr_bech32 { +struct nostr_bech32 { enum nostr_bech32_type type; - u8 *buffer; // holds strings and tlv stuff - size_t buflen; union { struct bech32_note note; @@ -76,9 +73,20 @@ typedef struct nostr_bech32 { struct bech32_naddr naddr; struct bech32_nrelay nrelay; } data; -} nostr_bech32_t; +}; + + +int parse_nostr_bech32_str(struct cursor *bech32); +int parse_nostr_bech32_type(const char *prefix, enum nostr_bech32_type *type); +/* +int parse_nostr_bech32_buffer(unsigned char *buf, int buflen, + enum nostr_bech32_type type, + struct nostr_bech32 *obj); -int parse_nostr_bech32(struct cursor *cur, struct nostr_bech32 *obj); +int parse_nostr_bech32(const char *bech32, size_t input_len, + unsigned char *outbuf, size_t outlen, + enum nostr_bech32_type *type); + */ #endif /* nostr_bech32_h */ diff --git a/src/str_block.h b/src/str_block.h @@ -0,0 +1,12 @@ + +#ifndef NDB_STR_BLOCK_H +#define NDB_STR_BLOCK_H + +#include <inttypes.h> + +struct str_block { + const char *str; + uint32_t len; +}; + +#endif