nostrdb

an unfairly fast embedded nostr database backed by lmdb
git clone git://jb55.com/nostrdb
Log | Files | Refs | Submodules | README | LICENSE

ndb.c (12969B)


      1 
      2 
      3 #include "nostrdb.h"
      4 #include "lmdb.h"
      5 #include "print_util.h"
      6 #include "hex.h"
      7 #include <time.h>
      8 #include <stdio.h>
      9 #include <stdlib.h>
     10 #include <sys/stat.h>
     11 #include <sys/mman.h>
     12 #include <unistd.h>
     13 #include <fcntl.h>
     14 
     15 static int usage()
     16 {
     17 	printf("usage: ndb [--skip-verification] [-d db_dir] <command>\n\n");
     18 
     19 	printf("commands\n\n");
     20 
     21 	printf("	stat\n");
     22 	printf("	query [--kind 42] [--id abcdef...] [--notekey key] [--search term] [--limit 42] \n");
     23 	printf("	      [-e abcdef...] [--author abcdef... -a bcdef...] [--relay wss://relay.damus.io]\n");
     24 	printf("	profile <pubkey>                            print the raw profile data for a pubkey\n");
     25 	printf("	note-relays <note-id>                       list the relays a given note id has been seen on\n");
     26 	printf("	print-search-keys\n");
     27 	printf("	print-kind-keys\n");
     28 	printf("	print-tag-keys\n");
     29 	printf("	print-relay-kind-index-keys\n");
     30 	printf("	print-author-kind-index-keys\n");
     31 	printf("	print-note-metadata\n");
     32 	printf("	import <line-delimited json file>\n\n");
     33 
     34 	printf("settings\n\n");
     35 
     36 	printf("	--skip-verification  skip signature validation\n");
     37 	printf("	-d <db_dir>          set database directory\n");
     38 	return 1;
     39 }
     40 
     41 
     42 static int map_file(const char *filename, unsigned char **p, size_t *flen)
     43 {
     44 	struct stat st;
     45 	int des;
     46 	stat(filename, &st);
     47 	*flen = st.st_size;
     48 
     49 	des = open(filename, O_RDONLY);
     50 
     51 	*p = mmap(NULL, *flen, PROT_READ, MAP_PRIVATE, des, 0);
     52 	close(des);
     53 
     54 	return *p != MAP_FAILED;
     55 }
     56 
     57 static inline void print_stat_counts(struct ndb_stat_counts *counts)
     58 {
     59 	printf("%zu\t%zu\t%zu\t%zu\n",
     60 	       counts->count,
     61 	       counts->key_size,
     62 	       counts->value_size,
     63 	       counts->key_size + counts->value_size);
     64 }
     65 
     66 static void print_stats(struct ndb_stat *stat)
     67 {
     68 	int i;
     69 	const char *name;
     70 	struct ndb_stat_counts *c;
     71 
     72 	struct ndb_stat_counts total;
     73 	ndb_stat_counts_init(&total);
     74 
     75 	printf("name\tcount\tkey_bytes\tvalue_bytes\ttotal_bytes\n");
     76 	printf("---\ndbs\n---\n");
     77 	for (i = 0; i < NDB_DBS; i++) {
     78 		name = ndb_db_name(i);
     79 
     80 		total.count += stat->dbs[i].count;
     81 		total.key_size += stat->dbs[i].key_size;
     82 		total.value_size += stat->dbs[i].value_size;
     83 
     84 		printf("%s\t", name);
     85 		print_stat_counts(&stat->dbs[i]);
     86 	}
     87 
     88 	printf("total\t");
     89 	print_stat_counts(&total);
     90 
     91 	printf("-----\nkinds\n-----\n");
     92 	for (i = 0; i < NDB_CKIND_COUNT; i++) {
     93 		c = &stat->common_kinds[i];
     94 		if (c->count == 0)
     95 			continue;
     96 
     97 		printf("%s\t", ndb_kind_name(i));
     98 		print_stat_counts(c);
     99 	}
    100 
    101 	if (stat->other_kinds.count != 0) {
    102 		printf("other\t");
    103 		print_stat_counts(&stat->other_kinds);
    104 	}
    105 }
    106 
    107 int ndb_print_search_keys(struct ndb_txn *txn);
    108 int ndb_print_kind_keys(struct ndb_txn *txn);
    109 int ndb_print_tag_index(struct ndb_txn *txn);
    110 int ndb_print_relay_kind_index(struct ndb_txn *txn);
    111 int ndb_print_author_kind_index(struct ndb_txn *txn);
    112 int ndb_print_note_metadata(struct ndb_txn *txn);
    113 
    114 static void print_note(struct ndb_note *note)
    115 {
    116 	static char buf[5000000];
    117 	if (!ndb_note_json(note, buf, sizeof(buf))) {
    118 		print_hex_stream(stderr, ndb_note_id(note), 32);
    119 		fprintf(stderr, " is too big to print! >5mb");
    120 		return;
    121 	}
    122 	puts(buf);
    123 }
    124 
    125 static void ndb_print_text_search_result(struct ndb_txn *txn,
    126 		struct ndb_text_search_result *r)
    127 {
    128 	size_t len;
    129 	struct ndb_note *note;
    130 
    131 	//ndb_print_text_search_key(&r->key);
    132 
    133 	if (!(note = ndb_get_note_by_key(txn, r->key.note_id, &len))) {
    134 		fprintf(stderr,": note not found");
    135 		return;
    136 	}
    137 
    138 	//fprintf(stderr,"\n");
    139 	print_note(note);
    140 }
    141 
    142 
    143 int main(int argc, char *argv[])
    144 {
    145 	struct ndb *ndb;
    146 	int i, flags, limit, count, current_field, len, res;
    147 	long nanos;
    148 	struct ndb_stat stat;
    149 	struct ndb_txn txn;
    150 	const char *dir;
    151 	unsigned char *data;
    152 	size_t data_len;
    153 	struct ndb_config config;
    154 	struct timespec t1, t2;
    155 	unsigned char tmp_id[32];
    156 	char buf[1024];
    157 	buf[0] = 0;
    158 
    159 	// profiles
    160 	const char *arg_str;
    161 	void *ptr;
    162 	size_t profile_len;
    163 	uint64_t key = 0;
    164 
    165 	res = 0;
    166 	ndb_default_config(&config);
    167 	ndb_config_set_mapsize(&config, 1024ULL * 1024ULL * 1024ULL * 1024ULL /* 1 TiB */);
    168 
    169 	if (argc < 2) {
    170 		return usage();
    171 	}
    172 
    173 	dir = ".";
    174 	flags = 0;
    175 	for (i = 0; i < 2; i++)
    176 	{
    177 		if (!strcmp(argv[1], "-d") && argv[2]) {
    178 			dir = argv[2];
    179 			argv += 2;
    180 			argc -= 2;
    181 		} else if (!strcmp(argv[1], "--skip-verification")) {
    182 			flags = NDB_FLAG_SKIP_NOTE_VERIFY;
    183 			argv += 1;
    184 			argc -= 1;
    185 		}
    186 	}
    187 
    188 	ndb_config_set_flags(&config, flags);
    189 
    190 	//fprintf(stderr, "using db '%s'\n", dir);
    191 
    192 	if (!ndb_init(&ndb, dir, &config)) {
    193 		return 2;
    194 	}
    195 
    196 	if (argc == 2 && !strcmp(argv[1], "stat")) {
    197 		if (!ndb_stat(ndb, &stat)) {
    198 			res = 3;
    199 			goto cleanup;
    200 		}
    201 
    202 		print_stats(&stat);
    203 	} else if (argc >= 3 && !strcmp(argv[1], "query")) {
    204 		struct ndb_filter filter, *f = &filter;
    205 		ndb_filter_init(f);
    206 
    207 		argv += 2;
    208 		argc -= 2;
    209 		current_field = 0;
    210 
    211 		for (i = 0; argc && i < 1000; i++) {
    212 			if (!strcmp(argv[0], "-k") || !strcmp(argv[0], "--kind")) {
    213 				if (current_field != NDB_FILTER_KINDS) {
    214 					ndb_filter_end_field(f);
    215 					ndb_filter_start_field(f, NDB_FILTER_KINDS);
    216 				}
    217 				current_field = NDB_FILTER_KINDS;
    218 				ndb_filter_add_int_element(f, atoll(argv[1]));
    219 				argv += 2;
    220 				argc -= 2;
    221 			} else if (!strcmp(argv[0], "--notekey")) {
    222 				key = atol(argv[1]);
    223 				argv += 2;
    224 				argc -= 2;
    225 			} else if (!strcmp(argv[0], "-l") || !strcmp(argv[0], "--limit")) {
    226 				limit = atol(argv[1]);
    227 				if (current_field) {
    228 					ndb_filter_end_field(f);
    229 					current_field = 0;
    230 				}
    231 				ndb_filter_start_field(f, NDB_FILTER_LIMIT);
    232 				current_field = NDB_FILTER_LIMIT;
    233 				ndb_filter_add_int_element(f, limit);
    234 				ndb_filter_end_field(f);
    235 				argv += 2;
    236 				argc -= 2;
    237 			} else if (!strcmp(argv[0], "--since") || !strcmp(argv[0], "-s")) {
    238 				if (current_field) {
    239 					ndb_filter_end_field(f);
    240 					current_field = 0;
    241 				}
    242 				ndb_filter_start_field(f, NDB_FILTER_SINCE);
    243 				ndb_filter_add_int_element(f, atoll(argv[1]));
    244 				ndb_filter_end_field(f);
    245 				argv += 2;
    246 				argc -= 2;
    247 			} else if (!strcmp(argv[0], "--until") || !strcmp(argv[0], "-u")) {
    248 				if (current_field) {
    249 					ndb_filter_end_field(f);
    250 					current_field = 0;
    251 				}
    252 				ndb_filter_start_field(f, NDB_FILTER_UNTIL);
    253 				ndb_filter_add_int_element(f, atoll(argv[1]));
    254 				ndb_filter_end_field(f);
    255 				argv += 2;
    256 				argc -= 2;
    257 			} else if (!strcmp(argv[0], "-t")) {
    258 				if (current_field) {
    259 					ndb_filter_end_field(f);
    260 					current_field = 0;
    261 				}
    262 				ndb_filter_start_tag_field(f, 't');
    263 				ndb_filter_add_str_element(f, argv[1]);
    264 				ndb_filter_end_field(f);
    265 				argv += 2;
    266 				argc -= 2;
    267 			} else if (!strcmp(argv[0], "--search") || !strcmp(argv[0], "-S")) {
    268 				if (current_field) {
    269 					ndb_filter_end_field(f);
    270 					current_field = 0;
    271 				}
    272 				ndb_filter_start_field(f, NDB_FILTER_SEARCH);
    273 				ndb_filter_add_str_element(f, argv[1]);
    274 				ndb_filter_end_field(f);
    275 				argv += 2;
    276 				argc -= 2;
    277 			} else if (!strcmp(argv[0], "--relay") || !strcmp(argv[0], "-r")) {
    278 				if (current_field) {
    279 					ndb_filter_end_field(f);
    280 					current_field = 0;
    281 				}
    282 				ndb_filter_start_field(f, NDB_FILTER_RELAYS);
    283 				ndb_filter_add_str_element(f, argv[1]);
    284 				ndb_filter_end_field(f);
    285 				argv += 2;
    286 				argc -= 2;
    287 			} else if (!strcmp(argv[0], "-i") || !strcmp(argv[0], "--id")) {
    288 				if (current_field != NDB_FILTER_IDS) {
    289 					ndb_filter_end_field(f);
    290 					ndb_filter_start_field(f, NDB_FILTER_IDS);
    291 					current_field = NDB_FILTER_IDS;
    292 				}
    293 
    294 				len = strlen(argv[1]);
    295 				if (len != 64 || !hex_decode(argv[1], 64, tmp_id, sizeof(tmp_id))) {
    296 					fprintf(stderr, "invalid hex id\n");
    297 					res = 42;
    298 					goto cleanup;
    299 				}
    300 
    301 				ndb_filter_add_id_element(f, tmp_id);
    302 				argv += 2;
    303 				argc -= 2;
    304 			} else if (!strcmp(argv[0], "-e")) {
    305 				if (current_field != 'e') {
    306 					if (!ndb_filter_start_tag_field(f, 'e')) {
    307 						fprintf(stderr, "field already started\n");
    308 						res = 44;
    309 						goto cleanup;
    310 					}
    311 				}
    312 				current_field = 'e';
    313 
    314 				if (len != 64 || !hex_decode(argv[1], 64, tmp_id, sizeof(tmp_id))) {
    315 					fprintf(stderr, "invalid hex id\n");
    316 					res = 42;
    317 					goto cleanup;
    318 				}
    319 
    320 				if (!ndb_filter_add_id_element(f, tmp_id)) {
    321 					fprintf(stderr, "too many event ids\n");
    322 					res = 43;
    323 					goto cleanup;
    324 				}
    325 
    326 				argv += 2;
    327 				argc -= 2;
    328 			} else if (!strcmp(argv[0], "-q")) {
    329 				if (current_field != 'q') {
    330 					if (!ndb_filter_start_tag_field(f, 'q')) {
    331 						fprintf(stderr, "field already started\n");
    332 						res = 44;
    333 						goto cleanup;
    334 					}
    335 				}
    336 				current_field = 'q';
    337 
    338 				if (len != 64 || !hex_decode(argv[1], 64, tmp_id, sizeof(tmp_id))) {
    339 					fprintf(stderr, "invalid hex id\n");
    340 					res = 42;
    341 					goto cleanup;
    342 				}
    343 
    344 				if (!ndb_filter_add_id_element(f, tmp_id)) {
    345 					fprintf(stderr, "too many event ids\n");
    346 					res = 43;
    347 					goto cleanup;
    348 				}
    349 
    350 				argv += 2;
    351 				argc -= 2;
    352 			} 
    353 			else if (!strcmp(argv[0], "-a") || !strcmp(argv[0], "--author")) {
    354 				if (current_field != NDB_FILTER_AUTHORS) {
    355 					ndb_filter_end_field(f);
    356 					ndb_filter_start_field(f, NDB_FILTER_AUTHORS);
    357 					current_field = NDB_FILTER_AUTHORS;
    358 				}
    359 
    360 				len = strlen(argv[1]);
    361 				if (len != 64 || !hex_decode(argv[1], 64, tmp_id, sizeof(tmp_id))) {
    362 					fprintf(stderr, "invalid hex pubkey\n");
    363 					res = 42;
    364 					goto cleanup;
    365 				}
    366 
    367 				if (!ndb_filter_add_id_element(f, tmp_id)) {
    368 					fprintf(stderr, "too many author pubkeys\n");
    369 					res = 43;
    370 					goto cleanup;
    371 				}
    372 
    373 				argv += 2;
    374 				argc -= 2;
    375 			} else {
    376 				fprintf(stderr, "unrecognized option: %s\n", argv[0]);
    377 				res = 100;
    378 				goto cleanup;
    379 			}
    380 		}
    381 
    382 		if (current_field) {
    383 			ndb_filter_end_field(f);
    384 			current_field = 0;
    385 		}
    386 
    387 		ndb_filter_end(f);
    388 
    389 		ndb_filter_json(f, buf, sizeof(buf));
    390 		fprintf(stderr, "using filter '%s'\n", buf);
    391 
    392 		int rsize = 1000000;
    393 		struct ndb_query_result *results = malloc(sizeof(struct ndb_query_result) * rsize);
    394 		assert(results);
    395 		ndb_begin_query(ndb, &txn);
    396 
    397 		clock_gettime(CLOCK_MONOTONIC, &t1);
    398 		if (key) {
    399 			results[0].note = ndb_get_note_by_key(&txn, key, NULL);
    400 			if (results[0].note != NULL)
    401 				count = 1;
    402 			else
    403 				count = 0;
    404 		} else if (!ndb_query(&txn, f, 1, results, rsize, &count)) {
    405 			fprintf(stderr, "query error\n");
    406 		}
    407 		clock_gettime(CLOCK_MONOTONIC, &t2);
    408 
    409 		nanos = (t2.tv_sec - t1.tv_sec) * (long)1e9 + (t2.tv_nsec - t1.tv_nsec);
    410 
    411 		fprintf(stderr, "%d results in %f ms\n", count, nanos/1000000.0);
    412 		for (i = 0; i < count; i++) {
    413 			print_note(results[i].note);
    414 		}
    415 
    416 		ndb_end_query(&txn);
    417 		ndb_filter_destroy(f);
    418 
    419 		free(results);
    420 	} else if (argc == 3 && !strcmp(argv[1], "import")) {
    421 		if (!strcmp(argv[2], "-")) {
    422 			ndb_process_events_stream(ndb, stdin);
    423 		} else {
    424 			map_file(argv[2], &data, &data_len);
    425 			ndb_process_events(ndb, (const char *)data, data_len);
    426 			//ndb_process_client_events(ndb, (const char *)data, data_len);
    427 		}
    428 	} else if (argc == 2 && !strcmp(argv[1], "print-search-keys")) {
    429 		ndb_begin_query(ndb, &txn);
    430 		ndb_print_search_keys(&txn);
    431 		ndb_end_query(&txn);
    432 	} else if (argc == 2 && !strcmp(argv[1], "print-kind-keys")) {
    433 		ndb_begin_query(ndb, &txn);
    434 		ndb_print_kind_keys(&txn);
    435 		ndb_end_query(&txn);
    436 	} else if (argc == 2 && !strcmp(argv[1], "print-tag-keys")) {
    437 		ndb_begin_query(ndb, &txn);
    438 		ndb_print_tag_index(&txn);
    439 		ndb_end_query(&txn);
    440 	} else if (argc == 2 && !strcmp(argv[1], "print-relay-kind-index-keys")) {
    441 		ndb_begin_query(ndb, &txn);
    442 		ndb_print_relay_kind_index(&txn);
    443 		ndb_end_query(&txn);
    444 	} else if (argc == 2 && !strcmp(argv[1], "print-author-kind-index-keys")) {
    445 		ndb_begin_query(ndb, &txn);
    446 		ndb_print_author_kind_index(&txn);
    447 		ndb_end_query(&txn);
    448 	} else if (argc == 2 && !strcmp(argv[1], "print-note-metadata")) {
    449 		ndb_begin_query(ndb, &txn);
    450 		ndb_print_note_metadata(&txn);
    451 		ndb_end_query(&txn);
    452 	} else if (argc == 3 && !strcmp(argv[1], "note-relays")) {
    453 		struct ndb_note_relay_iterator iter;
    454 		const char *relay;
    455 
    456 		ndb_begin_query(ndb, &txn);
    457 		arg_str = argv[2];
    458 
    459 		if (!hex_decode(arg_str, strlen(arg_str), tmp_id, sizeof(tmp_id))) {
    460 			fprintf(stderr, "failed to decode hex pubkey '%s'\n", arg_str);
    461 			res = 88;
    462 			goto cleanup;
    463 		}
    464 
    465 		if (!(key = ndb_get_notekey_by_id(&txn, tmp_id))) {
    466 			fprintf(stderr, "noteid '%s' not found\n", arg_str);
    467 			res = 89;
    468 			goto cleanup;
    469 		}
    470 
    471 		ndb_note_relay_iterate_start(&txn, &iter, key);
    472 
    473 		for (i = 0; (relay = ndb_note_relay_iterate_next(&iter)); i++) {
    474 			printf("%s\n", relay);
    475 		}
    476 
    477 		fprintf(stderr, "seen on %d relays\n", i);
    478 
    479 		ndb_end_query(&txn);
    480 	} else if (argc == 3 && !strcmp(argv[1], "profile")) {
    481 		arg_str = argv[2];
    482 		if (!hex_decode(arg_str, strlen(arg_str), tmp_id, sizeof(tmp_id))) {
    483 			fprintf(stderr, "failed to decode hex pubkey '%s'\n", arg_str);
    484 			res = 88;
    485 			goto cleanup;
    486 		}
    487 		ndb_begin_query(ndb, &txn);
    488 		if (!(ptr = ndb_get_profile_by_pubkey(&txn, tmp_id, &profile_len, &key))) {
    489 			ndb_end_query(&txn);
    490 			fprintf(stderr, "profile not found\n");
    491 			res = 89;
    492 			goto cleanup;
    493 		}
    494 		ndb_end_query(&txn);
    495 		print_hex(ptr, profile_len);
    496 		printf("\n");
    497 	} else {
    498 		ndb_destroy(ndb);
    499 		return usage();
    500 	}
    501 
    502 cleanup:
    503 	ndb_destroy(ndb);
    504 	return res;
    505 }