mdb_load.c (11598B)
1 /* mdb_load.c - memory-mapped database load tool */ 2 /* 3 * Copyright 2011-2021 Howard Chu, Symas Corp. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted only as authorized by the OpenLDAP 8 * Public License. 9 * 10 * A copy of this license is available in the file LICENSE in the 11 * top-level directory of the distribution or, alternatively, at 12 * <http://www.OpenLDAP.org/license.html>. 13 */ 14 #include <stdio.h> 15 #include <stdlib.h> 16 #include <errno.h> 17 #include <string.h> 18 #include <ctype.h> 19 #include <unistd.h> 20 #include "lmdb.h" 21 22 #define PRINT 1 23 #define NOHDR 2 24 static int mode; 25 26 static char *subname = NULL; 27 28 static size_t lineno; 29 static int version; 30 31 static int flags; 32 33 static char *prog; 34 35 static int Eof; 36 37 static MDB_envinfo info; 38 39 static MDB_val kbuf, dbuf; 40 static MDB_val k0buf; 41 42 #ifdef _WIN32 43 #define Z "I" 44 #else 45 #define Z "z" 46 #endif 47 48 #define STRLENOF(s) (sizeof(s)-1) 49 50 typedef struct flagbit { 51 int bit; 52 char *name; 53 int len; 54 } flagbit; 55 56 #define S(s) s, STRLENOF(s) 57 58 flagbit dbflags[] = { 59 { MDB_REVERSEKEY, S("reversekey") }, 60 { MDB_DUPSORT, S("dupsort") }, 61 { MDB_INTEGERKEY, S("integerkey") }, 62 { MDB_DUPFIXED, S("dupfixed") }, 63 { MDB_INTEGERDUP, S("integerdup") }, 64 { MDB_REVERSEDUP, S("reversedup") }, 65 { 0, NULL, 0 } 66 }; 67 68 static void readhdr(void) 69 { 70 char *ptr; 71 72 flags = 0; 73 while (fgets(dbuf.mv_data, dbuf.mv_size, stdin) != NULL) { 74 lineno++; 75 if (!strncmp(dbuf.mv_data, "VERSION=", STRLENOF("VERSION="))) { 76 version=atoi((char *)dbuf.mv_data+STRLENOF("VERSION=")); 77 if (version > 3) { 78 fprintf(stderr, "%s: line %" Z "d: unsupported VERSION %d\n", 79 prog, lineno, version); 80 exit(EXIT_FAILURE); 81 } 82 } else if (!strncmp(dbuf.mv_data, "HEADER=END", STRLENOF("HEADER=END"))) { 83 break; 84 } else if (!strncmp(dbuf.mv_data, "format=", STRLENOF("format="))) { 85 if (!strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "print", STRLENOF("print"))) 86 mode |= PRINT; 87 else if (strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "bytevalue", STRLENOF("bytevalue"))) { 88 fprintf(stderr, "%s: line %" Z "d: unsupported FORMAT %s\n", 89 prog, lineno, (char *)dbuf.mv_data+STRLENOF("FORMAT=")); 90 exit(EXIT_FAILURE); 91 } 92 } else if (!strncmp(dbuf.mv_data, "database=", STRLENOF("database="))) { 93 ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); 94 if (ptr) *ptr = '\0'; 95 if (subname) free(subname); 96 subname = strdup((char *)dbuf.mv_data+STRLENOF("database=")); 97 } else if (!strncmp(dbuf.mv_data, "type=", STRLENOF("type="))) { 98 if (strncmp((char *)dbuf.mv_data+STRLENOF("type="), "btree", STRLENOF("btree"))) { 99 fprintf(stderr, "%s: line %" Z "d: unsupported type %s\n", 100 prog, lineno, (char *)dbuf.mv_data+STRLENOF("type=")); 101 exit(EXIT_FAILURE); 102 } 103 } else if (!strncmp(dbuf.mv_data, "mapaddr=", STRLENOF("mapaddr="))) { 104 int i; 105 ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); 106 if (ptr) *ptr = '\0'; 107 i = sscanf((char *)dbuf.mv_data+STRLENOF("mapaddr="), "%p", &info.me_mapaddr); 108 if (i != 1) { 109 fprintf(stderr, "%s: line %" Z "d: invalid mapaddr %s\n", 110 prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapaddr=")); 111 exit(EXIT_FAILURE); 112 } 113 } else if (!strncmp(dbuf.mv_data, "mapsize=", STRLENOF("mapsize="))) { 114 int i; 115 ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); 116 if (ptr) *ptr = '\0'; 117 i = sscanf((char *)dbuf.mv_data+STRLENOF("mapsize="), "%" Z "u", &info.me_mapsize); 118 if (i != 1) { 119 fprintf(stderr, "%s: line %" Z "d: invalid mapsize %s\n", 120 prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapsize=")); 121 exit(EXIT_FAILURE); 122 } 123 } else if (!strncmp(dbuf.mv_data, "maxreaders=", STRLENOF("maxreaders="))) { 124 int i; 125 ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); 126 if (ptr) *ptr = '\0'; 127 i = sscanf((char *)dbuf.mv_data+STRLENOF("maxreaders="), "%u", &info.me_maxreaders); 128 if (i != 1) { 129 fprintf(stderr, "%s: line %" Z "d: invalid maxreaders %s\n", 130 prog, lineno, (char *)dbuf.mv_data+STRLENOF("maxreaders=")); 131 exit(EXIT_FAILURE); 132 } 133 } else { 134 int i; 135 for (i=0; dbflags[i].bit; i++) { 136 if (!strncmp(dbuf.mv_data, dbflags[i].name, dbflags[i].len) && 137 ((char *)dbuf.mv_data)[dbflags[i].len] == '=') { 138 flags |= dbflags[i].bit; 139 break; 140 } 141 } 142 if (!dbflags[i].bit) { 143 ptr = memchr(dbuf.mv_data, '=', dbuf.mv_size); 144 if (!ptr) { 145 fprintf(stderr, "%s: line %" Z "d: unexpected format\n", 146 prog, lineno); 147 exit(EXIT_FAILURE); 148 } else { 149 *ptr = '\0'; 150 fprintf(stderr, "%s: line %" Z "d: unrecognized keyword ignored: %s\n", 151 prog, lineno, (char *)dbuf.mv_data); 152 } 153 } 154 } 155 } 156 } 157 158 static void badend(void) 159 { 160 fprintf(stderr, "%s: line %" Z "d: unexpected end of input\n", 161 prog, lineno); 162 } 163 164 static int unhex(unsigned char *c2) 165 { 166 int x, c; 167 x = *c2++ & 0x4f; 168 if (x & 0x40) 169 x -= 55; 170 c = x << 4; 171 x = *c2 & 0x4f; 172 if (x & 0x40) 173 x -= 55; 174 c |= x; 175 return c; 176 } 177 178 static int readline(MDB_val *out, MDB_val *buf) 179 { 180 unsigned char *c1, *c2, *end; 181 size_t len, l2; 182 int c; 183 184 if (!(mode & NOHDR)) { 185 c = fgetc(stdin); 186 if (c == EOF) { 187 Eof = 1; 188 return EOF; 189 } 190 if (c != ' ') { 191 lineno++; 192 if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) { 193 badend: 194 Eof = 1; 195 badend(); 196 return EOF; 197 } 198 if (c == 'D' && !strncmp(buf->mv_data, "ATA=END", STRLENOF("ATA=END"))) 199 return EOF; 200 goto badend; 201 } 202 } 203 if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) { 204 Eof = 1; 205 return EOF; 206 } 207 lineno++; 208 209 c1 = buf->mv_data; 210 len = strlen((char *)c1); 211 l2 = len; 212 213 /* Is buffer too short? */ 214 while (c1[len-1] != '\n') { 215 buf->mv_data = realloc(buf->mv_data, buf->mv_size*2); 216 if (!buf->mv_data) { 217 Eof = 1; 218 fprintf(stderr, "%s: line %" Z "d: out of memory, line too long\n", 219 prog, lineno); 220 return EOF; 221 } 222 c1 = buf->mv_data; 223 c1 += l2; 224 if (fgets((char *)c1, buf->mv_size+1, stdin) == NULL) { 225 Eof = 1; 226 badend(); 227 return EOF; 228 } 229 buf->mv_size *= 2; 230 len = strlen((char *)c1); 231 l2 += len; 232 } 233 c1 = c2 = buf->mv_data; 234 len = l2; 235 c1[--len] = '\0'; 236 end = c1 + len; 237 238 if (mode & PRINT) { 239 while (c2 < end) { 240 if (*c2 == '\\') { 241 if (c2[1] == '\\') { 242 *c1++ = *c2; 243 } else { 244 if (c2+3 > end || !isxdigit(c2[1]) || !isxdigit(c2[2])) { 245 Eof = 1; 246 badend(); 247 return EOF; 248 } 249 *c1++ = unhex(++c2); 250 } 251 c2 += 2; 252 } else { 253 /* copies are redundant when no escapes were used */ 254 *c1++ = *c2++; 255 } 256 } 257 } else { 258 /* odd length not allowed */ 259 if (len & 1) { 260 Eof = 1; 261 badend(); 262 return EOF; 263 } 264 while (c2 < end) { 265 if (!isxdigit(*c2) || !isxdigit(c2[1])) { 266 Eof = 1; 267 badend(); 268 return EOF; 269 } 270 *c1++ = unhex(c2); 271 c2 += 2; 272 } 273 } 274 c2 = out->mv_data = buf->mv_data; 275 out->mv_size = c1 - c2; 276 277 return 0; 278 } 279 280 static void usage(void) 281 { 282 fprintf(stderr, "usage: %s [-V] [-a] [-f input] [-n] [-s name] [-N] [-T] dbpath\n", prog); 283 exit(EXIT_FAILURE); 284 } 285 286 static int greater(const MDB_val *a, const MDB_val *b) 287 { 288 return 1; 289 } 290 291 int main(int argc, char *argv[]) 292 { 293 int i, rc; 294 MDB_env *env; 295 MDB_txn *txn; 296 MDB_cursor *mc; 297 MDB_dbi dbi; 298 char *envname; 299 int envflags = MDB_NOSYNC, putflags = 0; 300 int dohdr = 0, append = 0; 301 MDB_val prevk; 302 303 prog = argv[0]; 304 305 if (argc < 2) { 306 usage(); 307 } 308 309 /* -a: append records in input order 310 * -f: load file instead of stdin 311 * -n: use NOSUBDIR flag on env_open 312 * -s: load into named subDB 313 * -N: use NOOVERWRITE on puts 314 * -T: read plaintext 315 * -V: print version and exit 316 */ 317 while ((i = getopt(argc, argv, "af:ns:NTV")) != EOF) { 318 switch(i) { 319 case 'V': 320 printf("%s\n", MDB_VERSION_STRING); 321 exit(0); 322 break; 323 case 'a': 324 append = 1; 325 break; 326 case 'f': 327 if (freopen(optarg, "r", stdin) == NULL) { 328 fprintf(stderr, "%s: %s: reopen: %s\n", 329 prog, optarg, strerror(errno)); 330 exit(EXIT_FAILURE); 331 } 332 break; 333 case 'n': 334 envflags |= MDB_NOSUBDIR; 335 break; 336 case 's': 337 subname = strdup(optarg); 338 break; 339 case 'N': 340 putflags = MDB_NOOVERWRITE|MDB_NODUPDATA; 341 break; 342 case 'T': 343 mode |= NOHDR | PRINT; 344 break; 345 default: 346 usage(); 347 } 348 } 349 350 if (optind != argc - 1) 351 usage(); 352 353 dbuf.mv_size = 4096; 354 dbuf.mv_data = malloc(dbuf.mv_size); 355 356 if (!(mode & NOHDR)) 357 readhdr(); 358 359 envname = argv[optind]; 360 rc = mdb_env_create(&env); 361 if (rc) { 362 fprintf(stderr, "mdb_env_create failed, error %d %s\n", rc, mdb_strerror(rc)); 363 return EXIT_FAILURE; 364 } 365 366 mdb_env_set_maxdbs(env, 2); 367 368 if (info.me_maxreaders) 369 mdb_env_set_maxreaders(env, info.me_maxreaders); 370 371 if (info.me_mapsize) 372 mdb_env_set_mapsize(env, info.me_mapsize); 373 374 if (info.me_mapaddr) 375 envflags |= MDB_FIXEDMAP; 376 377 rc = mdb_env_open(env, envname, envflags, 0664); 378 if (rc) { 379 fprintf(stderr, "mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); 380 goto env_close; 381 } 382 383 kbuf.mv_size = mdb_env_get_maxkeysize(env) * 2 + 2; 384 kbuf.mv_data = malloc(kbuf.mv_size * 2); 385 k0buf.mv_size = kbuf.mv_size; 386 k0buf.mv_data = (char *)kbuf.mv_data + kbuf.mv_size; 387 prevk.mv_data = k0buf.mv_data; 388 389 while(!Eof) { 390 MDB_val key, data; 391 int batch = 0; 392 flags = 0; 393 int appflag; 394 395 if (!dohdr) { 396 dohdr = 1; 397 } else if (!(mode & NOHDR)) 398 readhdr(); 399 400 rc = mdb_txn_begin(env, NULL, 0, &txn); 401 if (rc) { 402 fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); 403 goto env_close; 404 } 405 406 rc = mdb_open(txn, subname, flags|MDB_CREATE, &dbi); 407 if (rc) { 408 fprintf(stderr, "mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); 409 goto txn_abort; 410 } 411 prevk.mv_size = 0; 412 if (append) { 413 mdb_set_compare(txn, dbi, greater); 414 if (flags & MDB_DUPSORT) 415 mdb_set_dupsort(txn, dbi, greater); 416 } 417 418 rc = mdb_cursor_open(txn, dbi, &mc); 419 if (rc) { 420 fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); 421 goto txn_abort; 422 } 423 424 while(1) { 425 rc = readline(&key, &kbuf); 426 if (rc) /* rc == EOF */ 427 break; 428 429 rc = readline(&data, &dbuf); 430 if (rc) { 431 fprintf(stderr, "%s: line %" Z "d: failed to read key value\n", prog, lineno); 432 goto txn_abort; 433 } 434 435 if (append) { 436 appflag = MDB_APPEND; 437 if (flags & MDB_DUPSORT) { 438 if (prevk.mv_size == key.mv_size && !memcmp(prevk.mv_data, key.mv_data, key.mv_size)) 439 appflag = MDB_CURRENT|MDB_APPENDDUP; 440 else { 441 memcpy(prevk.mv_data, key.mv_data, key.mv_size); 442 prevk.mv_size = key.mv_size; 443 } 444 } 445 } else { 446 appflag = 0; 447 } 448 rc = mdb_cursor_put(mc, &key, &data, putflags|appflag); 449 if (rc == MDB_KEYEXIST && putflags) 450 continue; 451 if (rc) { 452 fprintf(stderr, "mdb_cursor_put failed, error %d %s\n", rc, mdb_strerror(rc)); 453 goto txn_abort; 454 } 455 batch++; 456 if (batch == 100) { 457 rc = mdb_txn_commit(txn); 458 if (rc) { 459 fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n", 460 prog, lineno, mdb_strerror(rc)); 461 goto env_close; 462 } 463 rc = mdb_txn_begin(env, NULL, 0, &txn); 464 if (rc) { 465 fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); 466 goto env_close; 467 } 468 rc = mdb_cursor_open(txn, dbi, &mc); 469 if (rc) { 470 fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); 471 goto txn_abort; 472 } 473 if (appflag & MDB_APPENDDUP) { 474 MDB_val k, d; 475 mdb_cursor_get(mc, &k, &d, MDB_LAST); 476 } 477 batch = 0; 478 } 479 } 480 rc = mdb_txn_commit(txn); 481 txn = NULL; 482 if (rc) { 483 fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n", 484 prog, lineno, mdb_strerror(rc)); 485 goto env_close; 486 } 487 mdb_dbi_close(env, dbi); 488 } 489 490 txn_abort: 491 mdb_txn_abort(txn); 492 env_close: 493 mdb_env_close(env); 494 495 return rc ? EXIT_FAILURE : EXIT_SUCCESS; 496 }