nostrdb

an unfairly fast embedded nostr database backed by lmdb
git clone git://jb55.com/nostrdb
Log | Files | Refs | Submodules | README | LICENSE

mdb_load.c (11598B)


      1 /* mdb_load.c - memory-mapped database load tool */
      2 /*
      3  * Copyright 2011-2021 Howard Chu, Symas Corp.
      4  * All rights reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted only as authorized by the OpenLDAP
      8  * Public License.
      9  *
     10  * A copy of this license is available in the file LICENSE in the
     11  * top-level directory of the distribution or, alternatively, at
     12  * <http://www.OpenLDAP.org/license.html>.
     13  */
     14 #include <stdio.h>
     15 #include <stdlib.h>
     16 #include <errno.h>
     17 #include <string.h>
     18 #include <ctype.h>
     19 #include <unistd.h>
     20 #include "lmdb.h"
     21 
     22 #define PRINT	1
     23 #define NOHDR	2
     24 static int mode;
     25 
     26 static char *subname = NULL;
     27 
     28 static size_t lineno;
     29 static int version;
     30 
     31 static int flags;
     32 
     33 static char *prog;
     34 
     35 static int Eof;
     36 
     37 static MDB_envinfo info;
     38 
     39 static MDB_val kbuf, dbuf;
     40 static MDB_val k0buf;
     41 
     42 #ifdef _WIN32
     43 #define Z	"I"
     44 #else
     45 #define Z	"z"
     46 #endif
     47 
     48 #define STRLENOF(s)	(sizeof(s)-1)
     49 
     50 typedef struct flagbit {
     51 	int bit;
     52 	char *name;
     53 	int len;
     54 } flagbit;
     55 
     56 #define S(s)	s, STRLENOF(s)
     57 
     58 flagbit dbflags[] = {
     59 	{ MDB_REVERSEKEY, S("reversekey") },
     60 	{ MDB_DUPSORT, S("dupsort") },
     61 	{ MDB_INTEGERKEY, S("integerkey") },
     62 	{ MDB_DUPFIXED, S("dupfixed") },
     63 	{ MDB_INTEGERDUP, S("integerdup") },
     64 	{ MDB_REVERSEDUP, S("reversedup") },
     65 	{ 0, NULL, 0 }
     66 };
     67 
     68 static void readhdr(void)
     69 {
     70 	char *ptr;
     71 
     72 	flags = 0;
     73 	while (fgets(dbuf.mv_data, dbuf.mv_size, stdin) != NULL) {
     74 		lineno++;
     75 		if (!strncmp(dbuf.mv_data, "VERSION=", STRLENOF("VERSION="))) {
     76 			version=atoi((char *)dbuf.mv_data+STRLENOF("VERSION="));
     77 			if (version > 3) {
     78 				fprintf(stderr, "%s: line %" Z "d: unsupported VERSION %d\n",
     79 					prog, lineno, version);
     80 				exit(EXIT_FAILURE);
     81 			}
     82 		} else if (!strncmp(dbuf.mv_data, "HEADER=END", STRLENOF("HEADER=END"))) {
     83 			break;
     84 		} else if (!strncmp(dbuf.mv_data, "format=", STRLENOF("format="))) {
     85 			if (!strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "print", STRLENOF("print")))
     86 				mode |= PRINT;
     87 			else if (strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "bytevalue", STRLENOF("bytevalue"))) {
     88 				fprintf(stderr, "%s: line %" Z "d: unsupported FORMAT %s\n",
     89 					prog, lineno, (char *)dbuf.mv_data+STRLENOF("FORMAT="));
     90 				exit(EXIT_FAILURE);
     91 			}
     92 		} else if (!strncmp(dbuf.mv_data, "database=", STRLENOF("database="))) {
     93 			ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
     94 			if (ptr) *ptr = '\0';
     95 			if (subname) free(subname);
     96 			subname = strdup((char *)dbuf.mv_data+STRLENOF("database="));
     97 		} else if (!strncmp(dbuf.mv_data, "type=", STRLENOF("type="))) {
     98 			if (strncmp((char *)dbuf.mv_data+STRLENOF("type="), "btree", STRLENOF("btree")))  {
     99 				fprintf(stderr, "%s: line %" Z "d: unsupported type %s\n",
    100 					prog, lineno, (char *)dbuf.mv_data+STRLENOF("type="));
    101 				exit(EXIT_FAILURE);
    102 			}
    103 		} else if (!strncmp(dbuf.mv_data, "mapaddr=", STRLENOF("mapaddr="))) {
    104 			int i;
    105 			ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
    106 			if (ptr) *ptr = '\0';
    107 			i = sscanf((char *)dbuf.mv_data+STRLENOF("mapaddr="), "%p", &info.me_mapaddr);
    108 			if (i != 1) {
    109 				fprintf(stderr, "%s: line %" Z "d: invalid mapaddr %s\n",
    110 					prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapaddr="));
    111 				exit(EXIT_FAILURE);
    112 			}
    113 		} else if (!strncmp(dbuf.mv_data, "mapsize=", STRLENOF("mapsize="))) {
    114 			int i;
    115 			ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
    116 			if (ptr) *ptr = '\0';
    117 			i = sscanf((char *)dbuf.mv_data+STRLENOF("mapsize="), "%" Z "u", &info.me_mapsize);
    118 			if (i != 1) {
    119 				fprintf(stderr, "%s: line %" Z "d: invalid mapsize %s\n",
    120 					prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapsize="));
    121 				exit(EXIT_FAILURE);
    122 			}
    123 		} else if (!strncmp(dbuf.mv_data, "maxreaders=", STRLENOF("maxreaders="))) {
    124 			int i;
    125 			ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
    126 			if (ptr) *ptr = '\0';
    127 			i = sscanf((char *)dbuf.mv_data+STRLENOF("maxreaders="), "%u", &info.me_maxreaders);
    128 			if (i != 1) {
    129 				fprintf(stderr, "%s: line %" Z "d: invalid maxreaders %s\n",
    130 					prog, lineno, (char *)dbuf.mv_data+STRLENOF("maxreaders="));
    131 				exit(EXIT_FAILURE);
    132 			}
    133 		} else {
    134 			int i;
    135 			for (i=0; dbflags[i].bit; i++) {
    136 				if (!strncmp(dbuf.mv_data, dbflags[i].name, dbflags[i].len) &&
    137 					((char *)dbuf.mv_data)[dbflags[i].len] == '=') {
    138 					flags |= dbflags[i].bit;
    139 					break;
    140 				}
    141 			}
    142 			if (!dbflags[i].bit) {
    143 				ptr = memchr(dbuf.mv_data, '=', dbuf.mv_size);
    144 				if (!ptr) {
    145 					fprintf(stderr, "%s: line %" Z "d: unexpected format\n",
    146 						prog, lineno);
    147 					exit(EXIT_FAILURE);
    148 				} else {
    149 					*ptr = '\0';
    150 					fprintf(stderr, "%s: line %" Z "d: unrecognized keyword ignored: %s\n",
    151 						prog, lineno, (char *)dbuf.mv_data);
    152 				}
    153 			}
    154 		}
    155 	}
    156 }
    157 
    158 static void badend(void)
    159 {
    160 	fprintf(stderr, "%s: line %" Z "d: unexpected end of input\n",
    161 		prog, lineno);
    162 }
    163 
    164 static int unhex(unsigned char *c2)
    165 {
    166 	int x, c;
    167 	x = *c2++ & 0x4f;
    168 	if (x & 0x40)
    169 		x -= 55;
    170 	c = x << 4;
    171 	x = *c2 & 0x4f;
    172 	if (x & 0x40)
    173 		x -= 55;
    174 	c |= x;
    175 	return c;
    176 }
    177 
    178 static int readline(MDB_val *out, MDB_val *buf)
    179 {
    180 	unsigned char *c1, *c2, *end;
    181 	size_t len, l2;
    182 	int c;
    183 
    184 	if (!(mode & NOHDR)) {
    185 		c = fgetc(stdin);
    186 		if (c == EOF) {
    187 			Eof = 1;
    188 			return EOF;
    189 		}
    190 		if (c != ' ') {
    191 			lineno++;
    192 			if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) {
    193 badend:
    194 				Eof = 1;
    195 				badend();
    196 				return EOF;
    197 			}
    198 			if (c == 'D' && !strncmp(buf->mv_data, "ATA=END", STRLENOF("ATA=END")))
    199 				return EOF;
    200 			goto badend;
    201 		}
    202 	}
    203 	if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) {
    204 		Eof = 1;
    205 		return EOF;
    206 	}
    207 	lineno++;
    208 
    209 	c1 = buf->mv_data;
    210 	len = strlen((char *)c1);
    211 	l2 = len;
    212 
    213 	/* Is buffer too short? */
    214 	while (c1[len-1] != '\n') {
    215 		buf->mv_data = realloc(buf->mv_data, buf->mv_size*2);
    216 		if (!buf->mv_data) {
    217 			Eof = 1;
    218 			fprintf(stderr, "%s: line %" Z "d: out of memory, line too long\n",
    219 				prog, lineno);
    220 			return EOF;
    221 		}
    222 		c1 = buf->mv_data;
    223 		c1 += l2;
    224 		if (fgets((char *)c1, buf->mv_size+1, stdin) == NULL) {
    225 			Eof = 1;
    226 			badend();
    227 			return EOF;
    228 		}
    229 		buf->mv_size *= 2;
    230 		len = strlen((char *)c1);
    231 		l2 += len;
    232 	}
    233 	c1 = c2 = buf->mv_data;
    234 	len = l2;
    235 	c1[--len] = '\0';
    236 	end = c1 + len;
    237 
    238 	if (mode & PRINT) {
    239 		while (c2 < end) {
    240 			if (*c2 == '\\') {
    241 				if (c2[1] == '\\') {
    242 					*c1++ = *c2;
    243 				} else {
    244 					if (c2+3 > end || !isxdigit(c2[1]) || !isxdigit(c2[2])) {
    245 						Eof = 1;
    246 						badend();
    247 						return EOF;
    248 					}
    249 					*c1++ = unhex(++c2);
    250 				}
    251 				c2 += 2;
    252 			} else {
    253 				/* copies are redundant when no escapes were used */
    254 				*c1++ = *c2++;
    255 			}
    256 		}
    257 	} else {
    258 		/* odd length not allowed */
    259 		if (len & 1) {
    260 			Eof = 1;
    261 			badend();
    262 			return EOF;
    263 		}
    264 		while (c2 < end) {
    265 			if (!isxdigit(*c2) || !isxdigit(c2[1])) {
    266 				Eof = 1;
    267 				badend();
    268 				return EOF;
    269 			}
    270 			*c1++ = unhex(c2);
    271 			c2 += 2;
    272 		}
    273 	}
    274 	c2 = out->mv_data = buf->mv_data;
    275 	out->mv_size = c1 - c2;
    276 
    277 	return 0;
    278 }
    279 
    280 static void usage(void)
    281 {
    282 	fprintf(stderr, "usage: %s [-V] [-a] [-f input] [-n] [-s name] [-N] [-T] dbpath\n", prog);
    283 	exit(EXIT_FAILURE);
    284 }
    285 
    286 static int greater(const MDB_val *a, const MDB_val *b)
    287 {
    288 	return 1;
    289 }
    290 
    291 int main(int argc, char *argv[])
    292 {
    293 	int i, rc;
    294 	MDB_env *env;
    295 	MDB_txn *txn;
    296 	MDB_cursor *mc;
    297 	MDB_dbi dbi;
    298 	char *envname;
    299 	int envflags = MDB_NOSYNC, putflags = 0;
    300 	int dohdr = 0, append = 0;
    301 	MDB_val prevk;
    302 
    303 	prog = argv[0];
    304 
    305 	if (argc < 2) {
    306 		usage();
    307 	}
    308 
    309 	/* -a: append records in input order
    310 	 * -f: load file instead of stdin
    311 	 * -n: use NOSUBDIR flag on env_open
    312 	 * -s: load into named subDB
    313 	 * -N: use NOOVERWRITE on puts
    314 	 * -T: read plaintext
    315 	 * -V: print version and exit
    316 	 */
    317 	while ((i = getopt(argc, argv, "af:ns:NTV")) != EOF) {
    318 		switch(i) {
    319 		case 'V':
    320 			printf("%s\n", MDB_VERSION_STRING);
    321 			exit(0);
    322 			break;
    323 		case 'a':
    324 			append = 1;
    325 			break;
    326 		case 'f':
    327 			if (freopen(optarg, "r", stdin) == NULL) {
    328 				fprintf(stderr, "%s: %s: reopen: %s\n",
    329 					prog, optarg, strerror(errno));
    330 				exit(EXIT_FAILURE);
    331 			}
    332 			break;
    333 		case 'n':
    334 			envflags |= MDB_NOSUBDIR;
    335 			break;
    336 		case 's':
    337 			subname = strdup(optarg);
    338 			break;
    339 		case 'N':
    340 			putflags = MDB_NOOVERWRITE|MDB_NODUPDATA;
    341 			break;
    342 		case 'T':
    343 			mode |= NOHDR | PRINT;
    344 			break;
    345 		default:
    346 			usage();
    347 		}
    348 	}
    349 
    350 	if (optind != argc - 1)
    351 		usage();
    352 
    353 	dbuf.mv_size = 4096;
    354 	dbuf.mv_data = malloc(dbuf.mv_size);
    355 
    356 	if (!(mode & NOHDR))
    357 		readhdr();
    358 
    359 	envname = argv[optind];
    360 	rc = mdb_env_create(&env);
    361 	if (rc) {
    362 		fprintf(stderr, "mdb_env_create failed, error %d %s\n", rc, mdb_strerror(rc));
    363 		return EXIT_FAILURE;
    364 	}
    365 
    366 	mdb_env_set_maxdbs(env, 2);
    367 
    368 	if (info.me_maxreaders)
    369 		mdb_env_set_maxreaders(env, info.me_maxreaders);
    370 
    371 	if (info.me_mapsize)
    372 		mdb_env_set_mapsize(env, info.me_mapsize);
    373 
    374 	if (info.me_mapaddr)
    375 		envflags |= MDB_FIXEDMAP;
    376 
    377 	rc = mdb_env_open(env, envname, envflags, 0664);
    378 	if (rc) {
    379 		fprintf(stderr, "mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc));
    380 		goto env_close;
    381 	}
    382 
    383 	kbuf.mv_size = mdb_env_get_maxkeysize(env) * 2 + 2;
    384 	kbuf.mv_data = malloc(kbuf.mv_size * 2);
    385 	k0buf.mv_size = kbuf.mv_size;
    386 	k0buf.mv_data = (char *)kbuf.mv_data + kbuf.mv_size;
    387 	prevk.mv_data = k0buf.mv_data;
    388 
    389 	while(!Eof) {
    390 		MDB_val key, data;
    391 		int batch = 0;
    392 		flags = 0;
    393 		int appflag;
    394 
    395 		if (!dohdr) {
    396 			dohdr = 1;
    397 		} else if (!(mode & NOHDR))
    398 			readhdr();
    399 		
    400 		rc = mdb_txn_begin(env, NULL, 0, &txn);
    401 		if (rc) {
    402 			fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc));
    403 			goto env_close;
    404 		}
    405 
    406 		rc = mdb_open(txn, subname, flags|MDB_CREATE, &dbi);
    407 		if (rc) {
    408 			fprintf(stderr, "mdb_open failed, error %d %s\n", rc, mdb_strerror(rc));
    409 			goto txn_abort;
    410 		}
    411 		prevk.mv_size = 0;
    412 		if (append) {
    413 			mdb_set_compare(txn, dbi, greater);
    414 			if (flags & MDB_DUPSORT)
    415 				mdb_set_dupsort(txn, dbi, greater);
    416 		}
    417 
    418 		rc = mdb_cursor_open(txn, dbi, &mc);
    419 		if (rc) {
    420 			fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc));
    421 			goto txn_abort;
    422 		}
    423 
    424 		while(1) {
    425 			rc = readline(&key, &kbuf);
    426 			if (rc)  /* rc == EOF */
    427 				break;
    428 
    429 			rc = readline(&data, &dbuf);
    430 			if (rc) {
    431 				fprintf(stderr, "%s: line %" Z "d: failed to read key value\n", prog, lineno);
    432 				goto txn_abort;
    433 			}
    434 
    435 			if (append) {
    436 				appflag = MDB_APPEND;
    437 				if (flags & MDB_DUPSORT) {
    438 					if (prevk.mv_size == key.mv_size && !memcmp(prevk.mv_data, key.mv_data, key.mv_size))
    439 						appflag = MDB_CURRENT|MDB_APPENDDUP;
    440 					else {
    441 						memcpy(prevk.mv_data, key.mv_data, key.mv_size);
    442 						prevk.mv_size = key.mv_size;
    443 					}
    444 				}
    445 			} else {
    446 				appflag = 0;
    447 			}
    448 			rc = mdb_cursor_put(mc, &key, &data, putflags|appflag);
    449 			if (rc == MDB_KEYEXIST && putflags)
    450 				continue;
    451 			if (rc) {
    452 				fprintf(stderr, "mdb_cursor_put failed, error %d %s\n", rc, mdb_strerror(rc));
    453 				goto txn_abort;
    454 			}
    455 			batch++;
    456 			if (batch == 100) {
    457 				rc = mdb_txn_commit(txn);
    458 				if (rc) {
    459 					fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n",
    460 						prog, lineno, mdb_strerror(rc));
    461 					goto env_close;
    462 				}
    463 				rc = mdb_txn_begin(env, NULL, 0, &txn);
    464 				if (rc) {
    465 					fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc));
    466 					goto env_close;
    467 				}
    468 				rc = mdb_cursor_open(txn, dbi, &mc);
    469 				if (rc) {
    470 					fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc));
    471 					goto txn_abort;
    472 				}
    473 				if (appflag & MDB_APPENDDUP) {
    474 					MDB_val k, d;
    475 					mdb_cursor_get(mc, &k, &d, MDB_LAST);
    476 				}
    477 				batch = 0;
    478 			}
    479 		}
    480 		rc = mdb_txn_commit(txn);
    481 		txn = NULL;
    482 		if (rc) {
    483 			fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n",
    484 				prog, lineno, mdb_strerror(rc));
    485 			goto env_close;
    486 		}
    487 		mdb_dbi_close(env, dbi);
    488 	}
    489 
    490 txn_abort:
    491 	mdb_txn_abort(txn);
    492 env_close:
    493 	mdb_env_close(env);
    494 
    495 	return rc ? EXIT_FAILURE : EXIT_SUCCESS;
    496 }