commit b975cff7ba36400b33ef89efd4fb9e2bef5ebdc3
parent 5fee42e5ec9e0dbe3495ced4e0d2a4fc8c528fae
Author: William Casarin <jb55@jb55.com>
Date: Fri, 29 Dec 2023 09:40:02 -0800
blocks: write note blocks on ingest
When ingesting notes, parse text/longform contents and store them in nostrdb.
Diffstat:
2 files changed, 160 insertions(+), 9 deletions(-)
diff --git a/src/nostrdb.c b/src/nostrdb.c
@@ -10,6 +10,7 @@
#include "lmdb.h"
#include "util.h"
#include "cpu.h"
+#include "block.h"
#include "threadpool.h"
#include "protected_queue.h"
#include "memchr.h"
@@ -127,6 +128,7 @@ enum ndb_writer_msgtype {
NDB_WRITER_PROFILE, // write a profile to the db
NDB_WRITER_DBMETA, // write ndb metadata
NDB_WRITER_PROFILE_LAST_FETCH, // when profiles were last fetched
+ NDB_WRITER_BLOCKS, // write parsed note blocks
};
// keys used for storing data in the NDB metadata database (NDB_DB_NDB_META)
@@ -1141,6 +1143,12 @@ struct ndb_writer_last_fetch {
uint64_t fetched_at;
};
+// write note blocks
+struct ndb_writer_blocks {
+ struct ndb_blocks *blocks;
+ uint64_t note_key;
+};
+
// The different types of messages that the writer thread can write to the
// database
struct ndb_writer_msg {
@@ -1150,6 +1158,7 @@ struct ndb_writer_msg {
struct ndb_writer_profile profile;
struct ndb_writer_ndb_meta ndb_meta;
struct ndb_writer_last_fetch last_fetch;
+ struct ndb_writer_blocks blocks;
};
};
@@ -2651,8 +2660,48 @@ cont:
return 1;
}
+static void ndb_write_blocks(struct ndb_txn *txn, uint64_t note_key,
+ struct ndb_blocks *blocks)
+{
+ int rc;
+ MDB_val key, val;
+
+ key.mv_data = ¬e_key;
+ key.mv_size = sizeof(note_key);
+ val.mv_data = blocks;
+ val.mv_size = ndb_blocks_total_size(blocks);
+ assert((val.mv_size % 8) == 0);
+
+ if ((rc = mdb_put(txn->mdb_txn, txn->lmdb->dbs[NDB_DB_NOTE_BLOCKS], &key, &val, 0))) {
+ ndb_debug("write version to note_blocks failed: %s\n",
+ mdb_strerror(rc));
+ return;
+ }
+}
+
+static int ndb_write_new_blocks(struct ndb_txn *txn, struct ndb_note *note,
+ uint64_t note_key, unsigned char *scratch,
+ size_t scratch_size)
+{
+ size_t content_len;
+ const char *content;
+ struct ndb_blocks *blocks;
+
+ content_len = ndb_note_content_length(note);
+ content = ndb_note_content(note);
+
+ if (!ndb_parse_content(scratch, scratch_size, content, content_len, &blocks)) {
+ ndb_debug("failed to parse content '%.*s'\n", content_len, content);
+ return 0;
+ }
+
+ ndb_write_blocks(txn, note_key, blocks);
+ return 1;
+}
+
static uint64_t ndb_write_note(struct ndb_txn *txn,
- struct ndb_writer_note *note)
+ struct ndb_writer_note *note,
+ unsigned char *scratch, size_t scratch_size)
{
int rc;
uint64_t note_key;
@@ -2688,10 +2737,14 @@ static uint64_t ndb_write_note(struct ndb_txn *txn,
if (!ndb_write_note_kind_index(txn, note->note, note_key))
return 0;
- // only do fulltext index on text and longform notes
+ // only parse content and do fulltext index on text and longform notes
if (note->note->kind == 1 || note->note->kind == 30023) {
if (!ndb_write_note_fulltext_index(txn, note->note, note_key))
return 0;
+
+ // write note blocks
+ ndb_write_new_blocks(txn, note->note, note_key, scratch,
+ scratch_size);
}
if (note->note->kind == 7) {
@@ -2728,6 +2781,9 @@ static void *ndb_writer_thread(void *data)
{
struct ndb_writer *writer = data;
struct ndb_writer_msg msgs[THREAD_QUEUE_BATCH], *msg;
+ // 8mb scratch buffer for parsing note content
+ size_t scratch_size = 8 * 1024 * 1024;
+ unsigned char *scratch = malloc(scratch_size);
int i, popped, done, any_note;
uint64_t note_nkey;
MDB_txn *mdb_txn = NULL;
@@ -2748,6 +2804,7 @@ static void *ndb_writer_thread(void *data)
case NDB_WRITER_PROFILE: any_note = 1; break;
case NDB_WRITER_DBMETA: any_note = 1; break;
case NDB_WRITER_PROFILE_LAST_FETCH: any_note = 1; break;
+ case NDB_WRITER_BLOCKS: any_note = 1; break;
case NDB_WRITER_QUIT: break;
}
}
@@ -2770,7 +2827,8 @@ static void *ndb_writer_thread(void *data)
continue;
case NDB_WRITER_PROFILE:
note_nkey =
- ndb_write_note(&txn, &msg->note);
+ ndb_write_note(&txn, &msg->note,
+ scratch, scratch_size);
if (msg->profile.record.builder) {
// only write if parsing didn't fail
ndb_write_profile(&txn, &msg->profile,
@@ -2778,7 +2836,8 @@ static void *ndb_writer_thread(void *data)
}
break;
case NDB_WRITER_NOTE:
- ndb_write_note(&txn, &msg->note);
+ ndb_write_note(&txn, &msg->note, scratch,
+ scratch_size);
//printf("wrote note ");
//print_hex(msg->note.note->id, 32);
//printf("\n");
@@ -2786,6 +2845,10 @@ static void *ndb_writer_thread(void *data)
case NDB_WRITER_DBMETA:
ndb_write_version(&txn, msg->ndb_meta.version);
break;
+ case NDB_WRITER_BLOCKS:
+ ndb_write_blocks(&txn, msg->blocks.note_key,
+ msg->blocks.blocks);
+ break;
case NDB_WRITER_PROFILE_LAST_FETCH:
ndb_writer_last_profile_fetch(&txn,
msg->last_fetch.pubkey,
@@ -2804,15 +2867,18 @@ static void *ndb_writer_thread(void *data)
// free notes
for (i = 0; i < popped; i++) {
msg = &msgs[i];
- if (msg->type == NDB_WRITER_NOTE)
+ if (msg->type == NDB_WRITER_NOTE) {
free(msg->note.note);
- else if (msg->type == NDB_WRITER_PROFILE) {
+ } else if (msg->type == NDB_WRITER_PROFILE) {
free(msg->profile.note.note);
ndb_profile_record_builder_free(&msg->profile.record);
+ } else if (msg->type == NDB_WRITER_BLOCKS) {
+ ndb_blocks_free(msg->blocks.blocks);
}
}
}
+ free(scratch);
ndb_debug("quitting writer thread\n");
return NULL;
}
@@ -3060,24 +3126,30 @@ static int ndb_init_lmdb(const char *filename, struct ndb_lmdb *lmdb, size_t map
mdb_set_compare(txn, lmdb->dbs[NDB_DB_NOTE_ID], ndb_tsid_compare);
if ((rc = mdb_dbi_open(txn, "profile_pk", tsid_flags, &lmdb->dbs[NDB_DB_PROFILE_PK]))) {
- fprintf(stderr, "mdb_dbi_open id failed: %s\n", mdb_strerror(rc));
+ fprintf(stderr, "mdb_dbi_open profile_pk failed: %s\n", mdb_strerror(rc));
return 0;
}
mdb_set_compare(txn, lmdb->dbs[NDB_DB_PROFILE_PK], ndb_tsid_compare);
if ((rc = mdb_dbi_open(txn, "note_kind", tsid_flags, &lmdb->dbs[NDB_DB_NOTE_KIND]))) {
- fprintf(stderr, "mdb_dbi_open id failed: %s\n", mdb_strerror(rc));
+ fprintf(stderr, "mdb_dbi_open note_kind failed: %s\n", mdb_strerror(rc));
return 0;
}
mdb_set_compare(txn, lmdb->dbs[NDB_DB_NOTE_KIND], ndb_u64_tsid_compare);
if ((rc = mdb_dbi_open(txn, "note_text", MDB_CREATE | MDB_DUPSORT,
&lmdb->dbs[NDB_DB_NOTE_TEXT]))) {
- fprintf(stderr, "mdb_dbi_open id failed: %s\n", mdb_strerror(rc));
+ fprintf(stderr, "mdb_dbi_open note_text failed: %s\n", mdb_strerror(rc));
return 0;
}
mdb_set_compare(txn, lmdb->dbs[NDB_DB_NOTE_TEXT], ndb_text_search_key_compare);
+ if ((rc = mdb_dbi_open(txn, "note_blocks", MDB_CREATE | MDB_INTEGERKEY,
+ &lmdb->dbs[NDB_DB_NOTE_BLOCKS]))) {
+ fprintf(stderr, "mdb_dbi_open note_blocks failed: %s\n", mdb_strerror(rc));
+ return 0;
+ }
+
// Commit the transaction
if ((rc = mdb_txn_commit(txn))) {
fprintf(stderr, "mdb_txn_commit failed, error %d\n", rc);
@@ -4591,9 +4663,85 @@ const char *ndb_db_name(enum ndb_dbs db)
return "note_kind_index";
case NDB_DB_NOTE_TEXT:
return "note_fulltext";
+ case NDB_DB_NOTE_BLOCKS:
+ return "note_blocks";
case NDB_DBS:
return "count";
}
return "unknown";
}
+
+static struct ndb_blocks *ndb_note_to_blocks(struct ndb_note *note)
+{
+ const char *content;
+ size_t content_len;
+ struct ndb_blocks *blocks;
+
+ content = ndb_note_content(note);
+ content_len = ndb_note_content_length(note);
+
+ // something weird is going on
+ if (content_len >= INT32_MAX)
+ return NULL;
+
+ unsigned char *buffer = malloc(content_len);
+ if (!buffer)
+ return NULL;
+
+ if (!ndb_parse_content(buffer, content_len, content, content_len, &blocks)) {
+ free(buffer);
+ return NULL;
+ }
+
+ blocks = realloc(blocks, ndb_blocks_total_size(blocks));
+ if (blocks == NULL)
+ return NULL;
+
+ blocks->flags |= NDB_BLOCK_FLAG_OWNED;
+
+ return blocks;
+}
+
+struct ndb_blocks *ndb_get_blocks_by_key(struct ndb *ndb, struct ndb_txn *txn, uint64_t note_key)
+{
+ struct ndb_blocks *blocks, *blocks_to_writer;
+ size_t blocks_size;
+ struct ndb_note *note;
+ size_t note_len;
+
+ if ((blocks = ndb_lookup_by_key(txn, note_key, NDB_DB_NOTE_BLOCKS, ¬e_len))) {
+ return blocks;
+ }
+
+ // If we don't have note blocks, let's lazily generate them. This is
+ // migration-friendly instead of doing them all at once
+ if (!(note = ndb_get_note_by_key(txn, note_key, ¬e_len))) {
+ // no note found, can't return note blocks
+ return NULL;
+ }
+
+ if (!(blocks = ndb_note_to_blocks(note)))
+ return NULL;
+
+ // send a copy to the writer
+ blocks_size = ndb_blocks_total_size(blocks);
+ blocks_to_writer = malloc(blocks_size);
+ memcpy(blocks_to_writer, blocks, blocks_size);
+ assert(blocks->flags & NDB_BLOCK_FLAG_OWNED);
+
+ // we generated new blocks, let's store them in the DB
+ struct ndb_writer_blocks write_blocks = {
+ .blocks = blocks_to_writer,
+ .note_key = note_key
+ };
+
+ assert(write_blocks.blocks != blocks);
+
+ struct ndb_writer_msg msg = { .type = NDB_WRITER_BLOCKS };
+ msg.blocks = write_blocks;
+
+ ndb_writer_queue_msg(&ndb->writer, &msg);
+
+ return blocks;
+}
diff --git a/src/nostrdb.h b/src/nostrdb.h
@@ -168,6 +168,7 @@ enum ndb_dbs {
NDB_DB_PROFILE_LAST_FETCH,
NDB_DB_NOTE_KIND, // note kind index
NDB_DB_NOTE_TEXT, // note fulltext index
+ NDB_DB_NOTE_BLOCKS, // parsed note blocks for rendering
NDB_DBS,
};
@@ -474,6 +475,8 @@ size_t ndb_blocks_total_size(struct ndb_blocks *blocks);
/// Free blocks if they are owned, safe to call on unowned blocks as well.
void ndb_blocks_free(struct ndb_blocks *blocks);
+// BLOCK DB
+struct ndb_blocks *ndb_get_blocks_by_key(struct ndb *ndb, struct ndb_txn *txn, uint64_t note_key);
// BLOCK ITERATORS
struct ndb_block_iterator *ndb_blocks_iterate_start(const char *, struct ndb_blocks *);