nostrdb

an unfairly fast embedded nostr database backed by lmdb
git clone git://jb55.com/nostrdb
Log | Files | Refs | Submodules | README | LICENSE

ndb.c (9984B)


      1 
      2 
      3 #include "nostrdb.h"
      4 #include "lmdb.h"
      5 #include "print_util.h"
      6 #include "hex.h"
      7 #include <time.h>
      8 #include <stdio.h>
      9 #include <stdlib.h>
     10 #include <sys/stat.h>
     11 #include <sys/mman.h>
     12 #include <unistd.h>
     13 #include <fcntl.h>
     14 
     15 static int usage()
     16 {
     17 	printf("usage: ndb [--skip-verification] [-d db_dir] <command>\n\n");
     18 	printf("commands\n\n");
     19 	printf("	stat\n");
     20 	printf("	search [--oldest-first] [--limit 42] <fulltext query>\n");
     21 	printf("	query [-k 42] [-k 1337] [-l 42] [-e abcdef...] [-a abcdef... -a bcdef...]\n");
     22 	printf("	profile <pubkey>\n");
     23 	printf("	print-search-keys\n");
     24 	printf("	print-kind-keys\n");
     25 	printf("	print-tag-keys\n");
     26 	printf("	import <line-delimited json file>\n\n");
     27 	printf("settings\n\n");
     28 	printf("	--skip-verification  skip signature validation\n");
     29 	printf("	-d <db_dir>          set database directory\n");
     30 	return 1;
     31 }
     32 
     33 
     34 static int map_file(const char *filename, unsigned char **p, size_t *flen)
     35 {
     36 	struct stat st;
     37 	int des;
     38 	stat(filename, &st);
     39 	*flen = st.st_size;
     40 
     41 	des = open(filename, O_RDONLY);
     42 
     43 	*p = mmap(NULL, *flen, PROT_READ, MAP_PRIVATE, des, 0);
     44 	close(des);
     45 
     46 	return *p != MAP_FAILED;
     47 }
     48 
     49 static inline void print_stat_counts(struct ndb_stat_counts *counts)
     50 {
     51 	printf("%zu\t%zu\t%zu\t%zu\n",
     52 	       counts->count,
     53 	       counts->key_size,
     54 	       counts->value_size,
     55 	       counts->key_size + counts->value_size);
     56 }
     57 
     58 static void print_stats(struct ndb_stat *stat)
     59 {
     60 	int i;
     61 	const char *name;
     62 	struct ndb_stat_counts *c;
     63 
     64 	struct ndb_stat_counts total;
     65 	ndb_stat_counts_init(&total);
     66 
     67 	printf("name\tcount\tkey_bytes\tvalue_bytes\ttotal_bytes\n");
     68 	printf("---\ndbs\n---\n");
     69 	for (i = 0; i < NDB_DBS; i++) {
     70 		name = ndb_db_name(i);
     71 
     72 		total.count += stat->dbs[i].count;
     73 		total.key_size += stat->dbs[i].key_size;
     74 		total.value_size += stat->dbs[i].value_size;
     75 
     76 		printf("%s\t", name);
     77 		print_stat_counts(&stat->dbs[i]);
     78 	}
     79 
     80 	printf("total\t");
     81 	print_stat_counts(&total);
     82 
     83 	printf("-----\nkinds\n-----\n");
     84 	for (i = 0; i < NDB_CKIND_COUNT; i++) {
     85 		c = &stat->common_kinds[i];
     86 		if (c->count == 0)
     87 			continue;
     88 
     89 		printf("%s\t", ndb_kind_name(i));
     90 		print_stat_counts(c);
     91 	}
     92 
     93 	if (stat->other_kinds.count != 0) {
     94 		printf("other\t");
     95 		print_stat_counts(&stat->other_kinds);
     96 	}
     97 }
     98 
     99 int ndb_print_search_keys(struct ndb_txn *txn);
    100 int ndb_print_kind_keys(struct ndb_txn *txn);
    101 int ndb_print_tag_index(struct ndb_txn *txn);
    102 
    103 static void print_note(struct ndb_note *note)
    104 {
    105 	static char buf[5000000];
    106 	if (!ndb_note_json(note, buf, sizeof(buf))) {
    107 		print_hex_stream(stderr, ndb_note_id(note), 32);
    108 		fprintf(stderr, " is too big to print! >5mb");
    109 		return;
    110 	}
    111 	puts(buf);
    112 }
    113 
    114 static void ndb_print_text_search_result(struct ndb_txn *txn,
    115 		struct ndb_text_search_result *r)
    116 {
    117 	size_t len;
    118 	struct ndb_note *note;
    119 
    120 	//ndb_print_text_search_key(&r->key);
    121 
    122 	if (!(note = ndb_get_note_by_key(txn, r->key.note_id, &len))) {
    123 		fprintf(stderr,": note not found");
    124 		return;
    125 	}
    126 
    127 	//fprintf(stderr,"\n");
    128 	print_note(note);
    129 }
    130 
    131 
    132 int main(int argc, char *argv[])
    133 {
    134 	struct ndb *ndb;
    135 	int i, flags, limit, count, current_field, len, res;
    136 	long nanos;
    137 	struct ndb_stat stat;
    138 	struct ndb_txn txn;
    139 	struct ndb_text_search_results results;
    140 	struct ndb_text_search_result *result;
    141 	const char *dir;
    142 	unsigned char *data;
    143 	size_t data_len;
    144 	struct ndb_config config;
    145 	struct timespec t1, t2;
    146 	struct ndb_text_search_config search_config;
    147 	unsigned char tmp_id[32];
    148 
    149 	// profiles
    150 	const char *pk_str;
    151 	void *ptr;
    152 	size_t profile_len;
    153 	uint64_t key;
    154 
    155 	res = 0;
    156 	ndb_default_config(&config);
    157 	ndb_default_text_search_config(&search_config);
    158 	ndb_config_set_mapsize(&config, 1024ULL * 1024ULL * 1024ULL * 1024ULL /* 1 TiB */);
    159 
    160 	if (argc < 2) {
    161 		return usage();
    162 	}
    163 
    164 	dir = ".";
    165 	flags = 0;
    166 	for (i = 0; i < 2; i++)
    167 	{
    168 		if (!strcmp(argv[1], "-d") && argv[2]) {
    169 			dir = argv[2];
    170 			argv += 2;
    171 			argc -= 2;
    172 		} else if (!strcmp(argv[1], "--skip-verification")) {
    173 			flags = NDB_FLAG_SKIP_NOTE_VERIFY;
    174 			argv += 1;
    175 			argc -= 1;
    176 		}
    177 	}
    178 
    179 	ndb_config_set_flags(&config, flags);
    180 
    181 	fprintf(stderr, "using db '%s'\n", dir);
    182 
    183 	if (!ndb_init(&ndb, dir, &config)) {
    184 		return 2;
    185 	}
    186 
    187 	if (argc >= 3 && !strcmp(argv[1], "search")) {
    188 		for (i = 0; i < 2; i++) {
    189 			if (!strcmp(argv[2], "--oldest-first")) {
    190 				ndb_text_search_config_set_order(&search_config, NDB_ORDER_ASCENDING);
    191 				argv++;
    192 				argc--;
    193 			} else if (!strcmp(argv[2], "--limit") || !strcmp(argv[2], "-l")) {
    194 				limit = atoi(argv[3]);
    195 				ndb_text_search_config_set_limit(&search_config, limit);
    196 				argv += 2;
    197 				argc -= 2;
    198 			}
    199 		}
    200 
    201 		ndb_begin_query(ndb, &txn);
    202 		clock_gettime(CLOCK_MONOTONIC, &t1);
    203 		ndb_text_search(&txn, argv[2], &results, &search_config);
    204 		clock_gettime(CLOCK_MONOTONIC, &t2);
    205 
    206 		nanos = (t2.tv_sec - t1.tv_sec) * (long)1e9 + (t2.tv_nsec - t1.tv_nsec);
    207 
    208 		fprintf(stderr, "%d results in %f ms\n", results.num_results, nanos/1000000.0);
    209 
    210 		// print results for now
    211 		for (i = 0; i < results.num_results; i++) {
    212 			result = &results.results[i];
    213 			//fprintf(stderr, "[%02d] ", i+1);
    214 			ndb_print_text_search_result(&txn, result);
    215 		}
    216 
    217 		ndb_end_query(&txn);
    218 	} else if (argc == 2 && !strcmp(argv[1], "stat")) {
    219 		if (!ndb_stat(ndb, &stat)) {
    220 			res = 3;
    221 			goto cleanup;
    222 		}
    223 
    224 		print_stats(&stat);
    225 	} else if (argc >= 3 && !strcmp(argv[1], "query")) {
    226 		struct ndb_filter filter, *f = &filter;
    227 		ndb_filter_init(f);
    228 
    229 		argv += 2;
    230 		argc -= 2;
    231 		current_field = 0;
    232 
    233 		for (i = 0; argc && i < 1000; i++) {
    234 			if (!strcmp(argv[0], "-k")) {
    235 				if (current_field != NDB_FILTER_KINDS) {
    236 					ndb_filter_end_field(f);
    237 					ndb_filter_start_field(f, NDB_FILTER_KINDS);
    238 				}
    239 				current_field = NDB_FILTER_KINDS;
    240 				ndb_filter_add_int_element(f, atoll(argv[1]));
    241 				argv += 2;
    242 				argc -= 2;
    243 			} else if (!strcmp(argv[0], "-l")) {
    244 				limit = atol(argv[1]);
    245 				if (current_field) {
    246 					ndb_filter_end_field(f);
    247 					current_field = 0;
    248 				}
    249 				ndb_filter_start_field(f, NDB_FILTER_LIMIT);
    250 				current_field = NDB_FILTER_LIMIT;
    251 				ndb_filter_add_int_element(f, limit);
    252 				ndb_filter_end_field(f);
    253 				argv += 2;
    254 				argc -= 2;
    255 			} else if (!strcmp(argv[0], "--since") || !strcmp(argv[0], "-s")) {
    256 				if (current_field) {
    257 					ndb_filter_end_field(f);
    258 					current_field = 0;
    259 				}
    260 				ndb_filter_start_field(f, NDB_FILTER_SINCE);
    261 				ndb_filter_add_int_element(f, atoll(argv[1]));
    262 				ndb_filter_end_field(f);
    263 				argv += 2;
    264 				argc -= 2;
    265 			} else if (!strcmp(argv[0], "--until") || !strcmp(argv[0], "-u")) {
    266 				if (current_field) {
    267 					ndb_filter_end_field(f);
    268 					current_field = 0;
    269 				}
    270 				ndb_filter_start_field(f, NDB_FILTER_UNTIL);
    271 				ndb_filter_add_int_element(f, atoll(argv[1]));
    272 				ndb_filter_end_field(f);
    273 				argv += 2;
    274 				argc -= 2;
    275 			} else if (!strcmp(argv[0], "-t")) {
    276 				if (current_field) {
    277 					ndb_filter_end_field(f);
    278 					current_field = 0;
    279 				}
    280 				ndb_filter_start_tag_field(f, 't');
    281 				ndb_filter_add_str_element(f, argv[1]);
    282 				ndb_filter_end_field(f);
    283 				argv += 2;
    284 				argc -= 2;
    285 			} else if (!strcmp(argv[0], "-e")) {
    286 				if (current_field != 'e') {
    287 					if (!ndb_filter_start_tag_field(f, 'e')) {
    288 						fprintf(stderr, "field already started\n");
    289 						res = 44;
    290 						goto cleanup;
    291 					}
    292 				}
    293 				current_field = 'e';
    294 
    295 				if (len != 64 || !hex_decode(argv[1], 64, tmp_id, sizeof(tmp_id))) {
    296 					fprintf(stderr, "invalid hex id\n");
    297 					res = 42;
    298 					goto cleanup;
    299 				}
    300 
    301 				if (!ndb_filter_add_id_element(f, tmp_id)) {
    302 					fprintf(stderr, "too many event ids\n");
    303 					res = 43;
    304 					goto cleanup;
    305 				}
    306 
    307 				argv += 2;
    308 				argc -= 2;
    309 			} else if (!strcmp(argv[0], "-a")) {
    310 				if (current_field != NDB_FILTER_AUTHORS)
    311 					ndb_filter_start_field(f, NDB_FILTER_AUTHORS);
    312 				current_field = NDB_FILTER_AUTHORS;
    313 
    314 				len = strlen(argv[1]);
    315 				if (len != 64 || !hex_decode(argv[1], 64, tmp_id, sizeof(tmp_id))) {
    316 					fprintf(stderr, "invalid hex pubkey\n");
    317 					res = 42;
    318 					goto cleanup;
    319 				}
    320 
    321 				if (!ndb_filter_add_id_element(f, tmp_id)) {
    322 					fprintf(stderr, "too many author pubkeys\n");
    323 					res = 43;
    324 					goto cleanup;
    325 				}
    326 
    327 				argv += 2;
    328 				argc -= 2;
    329 			}
    330 		}
    331 
    332 		if (current_field) {
    333 			ndb_filter_end_field(f);
    334 			current_field = 0;
    335 		}
    336 
    337 		struct ndb_query_result results[10000];
    338 		ndb_begin_query(ndb, &txn);
    339 
    340 		clock_gettime(CLOCK_MONOTONIC, &t1);
    341 		if (!ndb_query(&txn, f, 1, results, 10000, &count)) {
    342 			fprintf(stderr, "query error\n");
    343 		}
    344 		clock_gettime(CLOCK_MONOTONIC, &t2);
    345 
    346 		nanos = (t2.tv_sec - t1.tv_sec) * (long)1e9 + (t2.tv_nsec - t1.tv_nsec);
    347 
    348 		fprintf(stderr, "%d results in %f ms\n", count, nanos/1000000.0);
    349 		for (i = 0; i < count; i++) {
    350 			print_note(results[i].note);
    351 		}
    352 
    353 		ndb_end_query(&txn);
    354 
    355 	} else if (argc == 3 && !strcmp(argv[1], "import")) {
    356 		if (!strcmp(argv[2], "-")) {
    357 			ndb_process_events_stream(ndb, stdin);
    358 		} else {
    359 			map_file(argv[2], &data, &data_len);
    360 			ndb_process_events(ndb, (const char *)data, data_len);
    361 			//ndb_process_client_events(ndb, (const char *)data, data_len);
    362 		}
    363 	} else if (argc == 2 && !strcmp(argv[1], "print-search-keys")) {
    364 		ndb_begin_query(ndb, &txn);
    365 		ndb_print_search_keys(&txn);
    366 		ndb_end_query(&txn);
    367 	} else if (argc == 2 && !strcmp(argv[1], "print-kind-keys")) {
    368 		ndb_begin_query(ndb, &txn);
    369 		ndb_print_kind_keys(&txn);
    370 		ndb_end_query(&txn);
    371 	} else if (argc == 2 && !strcmp(argv[1], "print-tag-keys")) {
    372 		ndb_begin_query(ndb, &txn);
    373 		ndb_print_tag_index(&txn);
    374 		ndb_end_query(&txn);
    375 	}  else if (argc == 3 && !strcmp(argv[1], "profile")) {
    376 		pk_str = argv[2];
    377 		if (!hex_decode(pk_str, strlen(pk_str), tmp_id, sizeof(tmp_id))) {
    378 			fprintf(stderr, "failed to decode hex pubkey '%s'\n", pk_str);
    379 			res = 88;
    380 			goto cleanup;
    381 		}
    382 		ndb_begin_query(ndb, &txn);
    383 		if (!(ptr = ndb_get_profile_by_pubkey(&txn, tmp_id, &profile_len, &key))) {
    384 			ndb_end_query(&txn);
    385 			fprintf(stderr, "profile not found\n");
    386 			res = 89;
    387 			goto cleanup;
    388 		}
    389 		ndb_end_query(&txn);
    390 		print_hex(ptr, profile_len);
    391 		printf("\n");
    392 	} else {
    393 		ndb_destroy(ndb);
    394 		return usage();
    395 	}
    396 
    397 cleanup:
    398 	ndb_destroy(ndb);
    399 	return res;
    400 }