nostrdb

an unfairly fast embedded nostr database backed by lmdb
git clone git://jb55.com/nostrdb
Log | Files | Refs | Submodules | README | LICENSE

hash_table.h (16927B)


      1 /*
      2  * The MIT License (MIT)
      3  *
      4  * Copyright (c) 2015 Mikkel F. Jørgensen, dvide.com
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a copy
      7  * of this software and associated documentation files (the "Software"), to deal
      8  * in the Software without restriction, including without limitation the rights
      9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     10  * copies of the Software, and to permit persons to whom the Software is
     11  * furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included in all
     14  * copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     22  * SOFTWARE.
     23  */
     24 
     25 #ifndef HASH_TABLE_H
     26 #define HASH_TABLE_H
     27 
     28 #include "ht_portable.h"
     29 #include <stddef.h>
     30 
     31 /*
     32  * Define HT_PRIVATE to make all name wrapping interface functions static
     33  * inline when including hash implementation directly in user code. This
     34  * can increase performance significantly (3x) on small hash tables with
     35  * fast hash functions because the compiler can better optimize static
     36  * functions. Some compiler optimizations will get the same speed
     37  * with external linkage (clang 4.2 -O4 but not -O3).
     38  *
     39  * Can also be used to simple hide the interface from global
     40  * linkage to avoid name clashes.
     41  */
     42 #ifndef HT_PRIVATE
     43 #define HT_PRIV
     44 #else
     45 #define HT_PRIV static inline
     46 #endif
     47 
     48 /*
     49  * Generic hash table type. This makes it possible to use hash tables
     50  * in datastructures and header files that do not have access to
     51  * the specific hash table implementation. Call to init is optional
     52  * if the structure is zeroed.
     53  *
     54  * Offsets are only used with Robin Hood hashing to segment each chain.
     55  *
     56  * Keys and values are both stored in the same item pointer. There are
     57  * downsides to this over a key / value represention, but since we also
     58  * use less space we can afford lower the load factor and we can have a
     59  * more complex key representations. The smaller bucket size also helps
     60  * when ordering Robin Hood hash chains.
     61  */
     62 typedef struct hash_table hash_table_t;
     63 struct hash_table {
     64     void *table;
     65     char *offsets;
     66     size_t count;
     67     /* May be stored as a direct count, or log2. */
     68     size_t buckets;
     69 };
     70 
     71 enum hash_table_insert_mode {
     72     ht_replace = 0,
     73     ht_keep = 1,
     74     ht_unique = 2,
     75     ht_multi = 3,
     76 };
     77 
     78 /*
     79  * This macro defines the prototypes of the hash table that user code
     80  * needs for linkage.
     81  *
     82  * See also "hash_table_def.h" which builds wrapper functions to a
     83  * generic hash table implementation so each specialization gets its own
     84  * set of named functions.
     85  *
     86  * The HT_ITEM is normally a pointer to and the hash table does not
     87  * store any signficant information internally. Customizations map
     88  * the item value to a key. Certain values can be reserved, for
     89  * example 0 indicates missing value, and sometimes 1 is reserved for
     90  * internal tombstones and 2 may be used to return allocation failure.
     91  */
     92 #define DECLARE_HASH_TABLE(HT_NAME, HT_ITEM)                                \
     93                                                                             \
     94 typedef hash_table_t HT_NAME##_t;                                           \
     95 typedef HT_ITEM HT_NAME##_item_t;                                           \
     96                                                                             \
     97 /* Prototype for user supplied callback when visiting all elements. */      \
     98 typedef void HT_NAME##_visitor_f(void *context, HT_ITEM item);              \
     99                                                                             \
    100 extern const HT_NAME##_item_t HT_NAME##_missing;                            \
    101 extern const HT_NAME##_item_t HT_NAME##_nomem;                              \
    102 extern const HT_NAME##_item_t HT_NAME##_deleted;                            \
    103                                                                             \
    104 static inline int HT_NAME##_is_valid(HT_ITEM item)                          \
    105 {                                                                           \
    106     return                                                                  \
    107         item != HT_NAME##_missing &&                                        \
    108         item != HT_NAME##_nomem &&                                          \
    109         item != HT_NAME##_deleted;                                          \
    110 }                                                                           \
    111                                                                             \
    112 static inline int HT_NAME##_is_missing(HT_ITEM item)                        \
    113 {                                                                           \
    114     return item == HT_NAME##_missing;                                       \
    115 }                                                                           \
    116                                                                             \
    117 static inline int HT_NAME##_is_nomem(HT_ITEM item)                          \
    118 {                                                                           \
    119     return item == HT_NAME##_nomem;                                         \
    120 }                                                                           \
    121                                                                             \
    122 static inline int HT_NAME##_is_deleted(HT_ITEM item)                        \
    123 {                                                                           \
    124     return item == HT_NAME##_deleted;                                       \
    125 }                                                                           \
    126                                                                             \
    127 /*                                                                          \
    128  * Allocates enough buckets to represent count elements without resizing.   \
    129  * The actual number of allocated buckets depends on the load factor        \
    130  * given as a macro argument in the implementation. The bucket number       \
    131  * rounds up to the nearest power of 2.                                     \
    132  *                                                                          \
    133  * `ht` should not be initialized beforehand, otherwise use resize.         \
    134  * Alternatively, it is also valid to zero initialize the table by          \
    135  * other means - this will postpone allocation until needed.                \
    136  *                                                                          \
    137  * The load factor (template argument) should be positive and at most       \
    138  * 100%, otherwise insertion and resize cannot succeed. The recommended     \
    139  * load factor is between 25% and 75%.                                      \
    140  *                                                                          \
    141  * Returns 0 on success, -1 on allocation failure or invalid load factor.   \
    142  */                                                                         \
    143 HT_PRIV int HT_NAME##_init(HT_NAME##_t *ht, size_t count);                  \
    144                                                                             \
    145 /*                                                                          \
    146  * Clears the allocated memory. Optionally takes a destructor               \
    147  * that will visit all items.                                               \
    148  * The table struct may be reused after being destroyed.                    \
    149  * May also be called on a zero initialised hash table.                     \
    150  *                                                                          \
    151  * Can be called in place of clear for more control.                        \
    152  */                                                                         \
    153 HT_PRIV void HT_NAME##_destroy(HT_NAME##_t *ht,                             \
    154         HT_NAME##_visitor_f *destructor, void *context);                    \
    155                                                                             \
    156 /*                                                                          \
    157  * Clears the allocated memory, but does manage memory or state of any      \
    158  * stored items. It is a simpler version of destroy.                        \
    159  */                                                                         \
    160 HT_PRIV void HT_NAME##_clear(HT_NAME##_t *ht);                              \
    161                                                                             \
    162 /*                                                                          \
    163  * Resizes the hash table to hold at least `count` elements.                \
    164  * The actual number of allocated buckets is a strictly larger power of     \
    165  * two. If `count` is smaller than the current number of elements,          \
    166  * that number is used instead of count. Thus, resize(ht, 0) may be         \
    167  * used to reduce the table size after a spike.                             \
    168  * The function is called automatically as elements are inserted,           \
    169  * but shrinking the table should be done manually.                         \
    170  *                                                                          \
    171  * If resizing to same size, table is still reallocated but will then       \
    172  * clean up old tombstones from excessive deletion.                         \
    173  *                                                                          \
    174  * Returns 0 on success, -1 on allocation failure.                          \
    175  */                                                                         \
    176 HT_PRIV int HT_NAME##_resize(HT_NAME##_t *ht, size_t count);                \
    177                                                                             \
    178 /*                                                                          \
    179  * Inserts an item pointer in one of the following modes:                   \
    180  *                                                                          \
    181  * ht_keep:                                                                 \
    182  *   If the key exists, the stored item is kept and returned,               \
    183  *   otherwise it is inserted and null is returned.                         \
    184  *                                                                          \
    185  * ht_replace:                                                              \
    186  *   If the key exists, the stored item is replaced and the old             \
    187  *   item is returned, otherwise the item is inserted and null              \
    188  *   is returned.                                                           \
    189  *                                                                          \
    190  * ht_unique:                                                               \
    191  *   Inserts an item without checking if a key exists. Always return        \
    192  *   null. This is faster when it is known that the key does not exists.    \
    193  *                                                                          \
    194  * ht_multi:                                                                \
    195  *   Same as ht_unique but with the intention that a duplicate key          \
    196  *   might exist. This should not be abused because not all hash table      \
    197  *   implementions work well with too many collissions. Robin Hood          \
    198  *   hashing might reallocate aggressively to keep the chain length         \
    199  *   down. Linear and Quadratic probing do handle this, albeit slow.        \
    200  *                                                                          \
    201  * The inserted item cannot have the value HT_MISSING and depending on      \
    202  * implementation also not HT_DELETED and HT_NOMEM, but the                 \
    203  * definitions are type specific.                                           \
    204  */                                                                         \
    205 HT_PRIV HT_ITEM HT_NAME##_insert(HT_NAME##_t *ht,                           \
    206         const void *key, size_t len, HT_ITEM item, int mode);               \
    207                                                                             \
    208 /* Similar to insert, but derives key from item. */                         \
    209 HT_PRIV HT_ITEM HT_NAME##_insert_item(HT_NAME##_t *ht,                      \
    210         HT_ITEM item, int mode);                                            \
    211                                                                             \
    212 /*                                                                          \
    213  * Finds the first matching item if any, or returns null.                   \
    214  * If there are duplicate keys, the first inserted is returned.             \
    215  */                                                                         \
    216 HT_PRIV HT_ITEM HT_NAME##_find(HT_NAME##_t *ht,                             \
    217         const void *key, size_t len);                                       \
    218                                                                             \
    219 /*                                                                          \
    220  * Removes first inserted item that match the given key, if any.            \
    221  * Returns the removed item if any, otherwise null.                         \
    222  */                                                                         \
    223 HT_PRIV HT_ITEM HT_NAME##_remove(HT_NAME##_t *ht,                           \
    224         const void *key, size_t len);                                       \
    225                                                                             \
    226 /*                                                                          \
    227  * Finds an item that compares the same as the given item but it is         \
    228  * not necessarily the same item if it either isn't stored, or if           \
    229  * there are duplicates in the table.                                       \
    230  */                                                                         \
    231 HT_PRIV HT_ITEM HT_NAME##_find_item(HT_NAME##_t *ht, HT_ITEM item);         \
    232                                                                             \
    233 /*                                                                          \
    234  * This removes the first item that matches the given item, not             \
    235  * necessarily the item itself, and the item need not be present            \
    236  * in the table. Even if the item is in fact removed, it may still          \
    237  * be present if stored multiple times through abuse use of the             \
    238  * insert_unique function.                                                  \
    239  */                                                                         \
    240 HT_PRIV HT_ITEM HT_NAME##_remove_item(HT_NAME##_t *ht, HT_ITEM item);       \
    241                                                                             \
    242 /*                                                                          \
    243  * Calls a function for every item in the hash table. This may be           \
    244  * used for destructing items, provided the table is not accessed           \
    245  * subsequently. In fact, the hash_table_clear function takes an            \
    246  * optional visitor that does exactly that.                                 \
    247  *                                                                          \
    248  * The function is linear of the allocated hash table size, so will be      \
    249  * inefficient if the hash table was resized much larger than the number    \
    250  * of stored items. In that case it is better to store links in the         \
    251  * items. For the default resizing, the function is reasonably fast         \
    252  * because for cache reasons it is very fast to exclude empty elements.     \
    253  */                                                                         \
    254 HT_PRIV void HT_NAME##_visit(HT_NAME##_t *ht,                               \
    255         HT_NAME##_visitor_f *visitor, void *context);                       \
    256                                                                             \
    257 /*                                                                          \
    258  * Returns number of elements in the table. (Not necessarily the number of  \
    259  * unique keys.                                                             \
    260  */                                                                         \
    261 static inline size_t HT_NAME##_count(HT_NAME##_t *ht)                       \
    262 {                                                                           \
    263     return ht->count;                                                       \
    264 }                                                                           \
    265 
    266 #endif /* HASH_TABLE_H */