nostrdb

an unfairly fast embedded nostr database backed by lmdb
git clone git://jb55.com/nostrdb
Log | Files | Refs | Submodules | README | LICENSE

commit 3fb90ccf247fba0177e5877801c1d73d11147a13
parent 2898a12f3b7fb10855b2f679ed492d7bdd130a21
Author: William Casarin <jb55@jb55.com>
Date:   Tue, 14 Nov 2023 09:45:41 -0800

stat: add ndb_stat for counting things

This adds ndb_stat for counting the number of items and the size of
these items in nostrdb.

Fixes: https://github.com/damus-io/damus/issues/1713
Changelog-Added: Added ndb_stat for counting the total size of nostrdb databases and note kinds

Diffstat:
Mnostrdb.c | 99+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
Mnostrdb.h | 125+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 212 insertions(+), 12 deletions(-)

diff --git a/nostrdb.c b/nostrdb.c @@ -72,18 +72,6 @@ enum ndb_writer_msgtype { NDB_WRITER_PROFILE_LAST_FETCH, // when profiles were last fetched }; -enum ndb_dbs { - NDB_DB_NOTE, - NDB_DB_META, - NDB_DB_PROFILE, - NDB_DB_NOTE_ID, - NDB_DB_PROFILE_PK, - NDB_DB_NDB_META, - NDB_DB_PROFILE_SEARCH, - NDB_DB_PROFILE_LAST_FETCH, - NDB_DBS, -}; - // keys used for storing data in the NDB metadata database (NDB_DB_NDB_META) enum ndb_meta_key { NDB_META_KEY_VERSION = 1 @@ -2921,6 +2909,93 @@ int ndb_builder_new_tag(struct ndb_builder *builder) return cursor_push_tag(&builder->note_cur, &tag); } +static void ndb_stat_counts_init(struct ndb_stat_counts *counts) +{ + counts->count = 0; + counts->key_size = 0; + counts->value_size = 0; +} + +static void ndb_stat_init(struct ndb_stat *stat) +{ + // init stats + int i; + + for (i = 0; i < NDB_CKIND_COUNT; i++) { + ndb_stat_counts_init(&stat->common_kinds[i]); + } + + for (i = 0; i < NDB_DBS; i++) { + ndb_stat_counts_init(&stat->dbs[i]); + } + + ndb_stat_counts_init(&stat->other_kinds); +} + +int ndb_stat(struct ndb *ndb, struct ndb_stat *stat) +{ + int rc; + MDB_cursor *cur; + MDB_val k, v; + MDB_dbi db; + struct ndb_txn txn; + struct ndb_note *note; + int i; + enum ndb_common_kind common_kind; + + // initialize to 0 + ndb_stat_init(stat); + + if (!ndb_begin_query(ndb, &txn)) { + fprintf(stderr, "ndb_stat failed at ndb_begin_query\n"); + return 0; + } + + // stat each dbi in the database + for (i = 0; i < NDB_DBS; i++) + { + db = ndb->lmdb.dbs[i]; + + if ((rc = mdb_cursor_open(txn.mdb_txn, db, &cur))) { + fprintf(stderr, "ndb_stat: mdb_cursor_open failed, error '%s'\n", + mdb_strerror(rc)); + return 0; + } + + // loop over every entry and count kv sizes + while (mdb_cursor_get(cur, &k, &v, MDB_NEXT) == 0) { + // we gather more detailed per-kind stats if we're in + // the notes db + if (i == NDB_DB_NOTE) { + note = v.mv_data; + common_kind = ndb_kind_to_common_kind(note->kind); + + // uncommon kind? just count them in bulk + if (common_kind == -1) { + stat->other_kinds.count++; + stat->other_kinds.key_size += k.mv_size; + stat->other_kinds.value_size += v.mv_size; + } else { + stat->common_kinds[common_kind].count++; + stat->common_kinds[common_kind].key_size += k.mv_size; + stat->common_kinds[common_kind].value_size += v.mv_size; + } + } + + stat->dbs[i].count++; + stat->dbs[i].key_size += k.mv_size; + stat->dbs[i].value_size += v.mv_size; + } + + // close the cursor, they are per-dbi + mdb_cursor_close(cur); + } + + ndb_end_query(&txn); + + return 1; +} + /// Push an element to the current tag /// /// Basic idea is to call ndb_builder_new_tag diff --git a/nostrdb.h b/nostrdb.h @@ -32,6 +32,51 @@ struct ndb_search_key uint64_t timestamp; }; +enum ndb_dbs { + NDB_DB_NOTE, + NDB_DB_META, + NDB_DB_PROFILE, + NDB_DB_NOTE_ID, + NDB_DB_PROFILE_PK, + NDB_DB_NDB_META, + NDB_DB_PROFILE_SEARCH, + NDB_DB_PROFILE_LAST_FETCH, + NDB_DBS, +}; + +// common kinds. we collect stats on these in ndb_stat. mainly because I don't +// want to deal with including a hashtable to the project. +enum ndb_common_kind { + NDB_CKIND_PROFILE, + NDB_CKIND_TEXT, + NDB_CKIND_CONTACTS, + NDB_CKIND_DM, + NDB_CKIND_DELETE, + NDB_CKIND_REPOST, + NDB_CKIND_REACTION, + NDB_CKIND_ZAP, + NDB_CKIND_ZAP_REQUEST, + NDB_CKIND_NWC_REQUEST, + NDB_CKIND_NWC_RESPONSE, + NDB_CKIND_HTTP_AUTH, + NDB_CKIND_LIST, + NDB_CKIND_LONGFORM, + NDB_CKIND_STATUS, + NDB_CKIND_COUNT, // should always be last +}; + +struct ndb_stat_counts { + size_t key_size; + size_t value_size; + size_t count; +}; + +struct ndb_stat { + struct ndb_stat_counts dbs[NDB_DBS]; + struct ndb_stat_counts common_kinds[NDB_CKIND_COUNT]; + struct ndb_stat_counts other_kinds; +}; + struct ndb_search { struct ndb_search_key *key; uint64_t profile_key; @@ -224,6 +269,9 @@ void ndb_builder_set_kind(struct ndb_builder *builder, uint32_t kind); int ndb_builder_new_tag(struct ndb_builder *builder); int ndb_builder_push_tag_str(struct ndb_builder *builder, const char *str, int len); +// stats +int ndb_stat(struct ndb *ndb, struct ndb_stat *stat); + static inline struct ndb_str ndb_note_str(struct ndb_note *note, union ndb_packed_str *pstr) { @@ -349,4 +397,81 @@ static inline int ndb_tags_iterate_next(struct ndb_iterator *iter) return 0; } +static inline enum ndb_common_kind +ndb_kind_to_common_kind(int kind) +{ + switch (kind) + { + case 0: return NDB_CKIND_PROFILE; + case 1: return NDB_CKIND_TEXT; + case 3: return NDB_CKIND_CONTACTS; + case 4: return NDB_CKIND_DM; + case 5: return NDB_CKIND_DELETE; + case 6: return NDB_CKIND_REPOST; + case 7: return NDB_CKIND_REACTION; + case 9735: return NDB_CKIND_ZAP; + case 9734: return NDB_CKIND_ZAP_REQUEST; + case 23194: return NDB_CKIND_NWC_REQUEST; + case 23195: return NDB_CKIND_NWC_RESPONSE; + case 27235: return NDB_CKIND_HTTP_AUTH; + case 30000: return NDB_CKIND_LIST; + case 30023: return NDB_CKIND_LONGFORM; + case 30315: return NDB_CKIND_STATUS; + } + + return -1; +} + +static inline const char * +ndb_kind_name(enum ndb_common_kind ck) +{ + switch (ck) { + case NDB_CKIND_PROFILE: return "profile"; + case NDB_CKIND_TEXT: return "text"; + case NDB_CKIND_CONTACTS: return "contacts"; + case NDB_CKIND_DM: return "dm"; + case NDB_CKIND_DELETE: return "delete"; + case NDB_CKIND_REPOST: return "repost"; + case NDB_CKIND_REACTION: return "reaction"; + case NDB_CKIND_ZAP: return "zap"; + case NDB_CKIND_ZAP_REQUEST: return "zap_request"; + case NDB_CKIND_NWC_REQUEST: return "nwc_request"; + case NDB_CKIND_NWC_RESPONSE: return "nwc_response"; + case NDB_CKIND_HTTP_AUTH: return "http_auth"; + case NDB_CKIND_LIST: return "list"; + case NDB_CKIND_LONGFORM: return "longform"; + case NDB_CKIND_STATUS: return "status"; + case NDB_CKIND_COUNT: return "unknown"; + } + + return "unknown"; +} + +static inline const char * +ndb_db_name(enum ndb_dbs db) +{ + switch (db) { + case NDB_DB_NOTE: + return "note"; + case NDB_DB_META: + return "note_metadata"; + case NDB_DB_PROFILE: + return "profile"; + case NDB_DB_NOTE_ID: + return "note_index"; + case NDB_DB_PROFILE_PK: + return "profile_pubkey_index"; + case NDB_DB_NDB_META: + return "nostrdb_metadata"; + case NDB_DB_PROFILE_SEARCH: + return "profile_search"; + case NDB_DB_PROFILE_LAST_FETCH: + return "profile_last_fetch"; + case NDB_DBS: + return "count"; + } + + return "unknown"; +} + #endif