hash_table.h (16927B)
1 /* 2 * The MIT License (MIT) 3 * 4 * Copyright (c) 2015 Mikkel F. Jørgensen, dvide.com 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25 #ifndef HASH_TABLE_H 26 #define HASH_TABLE_H 27 28 #include "ht_portable.h" 29 #include <stddef.h> 30 31 /* 32 * Define HT_PRIVATE to make all name wrapping interface functions static 33 * inline when including hash implementation directly in user code. This 34 * can increase performance significantly (3x) on small hash tables with 35 * fast hash functions because the compiler can better optimize static 36 * functions. Some compiler optimizations will get the same speed 37 * with external linkage (clang 4.2 -O4 but not -O3). 38 * 39 * Can also be used to simple hide the interface from global 40 * linkage to avoid name clashes. 41 */ 42 #ifndef HT_PRIVATE 43 #define HT_PRIV 44 #else 45 #define HT_PRIV static inline 46 #endif 47 48 /* 49 * Generic hash table type. This makes it possible to use hash tables 50 * in datastructures and header files that do not have access to 51 * the specific hash table implementation. Call to init is optional 52 * if the structure is zeroed. 53 * 54 * Offsets are only used with Robin Hood hashing to segment each chain. 55 * 56 * Keys and values are both stored in the same item pointer. There are 57 * downsides to this over a key / value represention, but since we also 58 * use less space we can afford lower the load factor and we can have a 59 * more complex key representations. The smaller bucket size also helps 60 * when ordering Robin Hood hash chains. 61 */ 62 typedef struct hash_table hash_table_t; 63 struct hash_table { 64 void *table; 65 char *offsets; 66 size_t count; 67 /* May be stored as a direct count, or log2. */ 68 size_t buckets; 69 }; 70 71 enum hash_table_insert_mode { 72 ht_replace = 0, 73 ht_keep = 1, 74 ht_unique = 2, 75 ht_multi = 3, 76 }; 77 78 /* 79 * This macro defines the prototypes of the hash table that user code 80 * needs for linkage. 81 * 82 * See also "hash_table_def.h" which builds wrapper functions to a 83 * generic hash table implementation so each specialization gets its own 84 * set of named functions. 85 * 86 * The HT_ITEM is normally a pointer to and the hash table does not 87 * store any signficant information internally. Customizations map 88 * the item value to a key. Certain values can be reserved, for 89 * example 0 indicates missing value, and sometimes 1 is reserved for 90 * internal tombstones and 2 may be used to return allocation failure. 91 */ 92 #define DECLARE_HASH_TABLE(HT_NAME, HT_ITEM) \ 93 \ 94 typedef hash_table_t HT_NAME##_t; \ 95 typedef HT_ITEM HT_NAME##_item_t; \ 96 \ 97 /* Prototype for user supplied callback when visiting all elements. */ \ 98 typedef void HT_NAME##_visitor_f(void *context, HT_ITEM item); \ 99 \ 100 extern const HT_NAME##_item_t HT_NAME##_missing; \ 101 extern const HT_NAME##_item_t HT_NAME##_nomem; \ 102 extern const HT_NAME##_item_t HT_NAME##_deleted; \ 103 \ 104 static inline int HT_NAME##_is_valid(HT_ITEM item) \ 105 { \ 106 return \ 107 item != HT_NAME##_missing && \ 108 item != HT_NAME##_nomem && \ 109 item != HT_NAME##_deleted; \ 110 } \ 111 \ 112 static inline int HT_NAME##_is_missing(HT_ITEM item) \ 113 { \ 114 return item == HT_NAME##_missing; \ 115 } \ 116 \ 117 static inline int HT_NAME##_is_nomem(HT_ITEM item) \ 118 { \ 119 return item == HT_NAME##_nomem; \ 120 } \ 121 \ 122 static inline int HT_NAME##_is_deleted(HT_ITEM item) \ 123 { \ 124 return item == HT_NAME##_deleted; \ 125 } \ 126 \ 127 /* \ 128 * Allocates enough buckets to represent count elements without resizing. \ 129 * The actual number of allocated buckets depends on the load factor \ 130 * given as a macro argument in the implementation. The bucket number \ 131 * rounds up to the nearest power of 2. \ 132 * \ 133 * `ht` should not be initialized beforehand, otherwise use resize. \ 134 * Alternatively, it is also valid to zero initialize the table by \ 135 * other means - this will postpone allocation until needed. \ 136 * \ 137 * The load factor (template argument) should be positive and at most \ 138 * 100%, otherwise insertion and resize cannot succeed. The recommended \ 139 * load factor is between 25% and 75%. \ 140 * \ 141 * Returns 0 on success, -1 on allocation failure or invalid load factor. \ 142 */ \ 143 HT_PRIV int HT_NAME##_init(HT_NAME##_t *ht, size_t count); \ 144 \ 145 /* \ 146 * Clears the allocated memory. Optionally takes a destructor \ 147 * that will visit all items. \ 148 * The table struct may be reused after being destroyed. \ 149 * May also be called on a zero initialised hash table. \ 150 * \ 151 * Can be called in place of clear for more control. \ 152 */ \ 153 HT_PRIV void HT_NAME##_destroy(HT_NAME##_t *ht, \ 154 HT_NAME##_visitor_f *destructor, void *context); \ 155 \ 156 /* \ 157 * Clears the allocated memory, but does manage memory or state of any \ 158 * stored items. It is a simpler version of destroy. \ 159 */ \ 160 HT_PRIV void HT_NAME##_clear(HT_NAME##_t *ht); \ 161 \ 162 /* \ 163 * Resizes the hash table to hold at least `count` elements. \ 164 * The actual number of allocated buckets is a strictly larger power of \ 165 * two. If `count` is smaller than the current number of elements, \ 166 * that number is used instead of count. Thus, resize(ht, 0) may be \ 167 * used to reduce the table size after a spike. \ 168 * The function is called automatically as elements are inserted, \ 169 * but shrinking the table should be done manually. \ 170 * \ 171 * If resizing to same size, table is still reallocated but will then \ 172 * clean up old tombstones from excessive deletion. \ 173 * \ 174 * Returns 0 on success, -1 on allocation failure. \ 175 */ \ 176 HT_PRIV int HT_NAME##_resize(HT_NAME##_t *ht, size_t count); \ 177 \ 178 /* \ 179 * Inserts an item pointer in one of the following modes: \ 180 * \ 181 * ht_keep: \ 182 * If the key exists, the stored item is kept and returned, \ 183 * otherwise it is inserted and null is returned. \ 184 * \ 185 * ht_replace: \ 186 * If the key exists, the stored item is replaced and the old \ 187 * item is returned, otherwise the item is inserted and null \ 188 * is returned. \ 189 * \ 190 * ht_unique: \ 191 * Inserts an item without checking if a key exists. Always return \ 192 * null. This is faster when it is known that the key does not exists. \ 193 * \ 194 * ht_multi: \ 195 * Same as ht_unique but with the intention that a duplicate key \ 196 * might exist. This should not be abused because not all hash table \ 197 * implementions work well with too many collissions. Robin Hood \ 198 * hashing might reallocate aggressively to keep the chain length \ 199 * down. Linear and Quadratic probing do handle this, albeit slow. \ 200 * \ 201 * The inserted item cannot have the value HT_MISSING and depending on \ 202 * implementation also not HT_DELETED and HT_NOMEM, but the \ 203 * definitions are type specific. \ 204 */ \ 205 HT_PRIV HT_ITEM HT_NAME##_insert(HT_NAME##_t *ht, \ 206 const void *key, size_t len, HT_ITEM item, int mode); \ 207 \ 208 /* Similar to insert, but derives key from item. */ \ 209 HT_PRIV HT_ITEM HT_NAME##_insert_item(HT_NAME##_t *ht, \ 210 HT_ITEM item, int mode); \ 211 \ 212 /* \ 213 * Finds the first matching item if any, or returns null. \ 214 * If there are duplicate keys, the first inserted is returned. \ 215 */ \ 216 HT_PRIV HT_ITEM HT_NAME##_find(HT_NAME##_t *ht, \ 217 const void *key, size_t len); \ 218 \ 219 /* \ 220 * Removes first inserted item that match the given key, if any. \ 221 * Returns the removed item if any, otherwise null. \ 222 */ \ 223 HT_PRIV HT_ITEM HT_NAME##_remove(HT_NAME##_t *ht, \ 224 const void *key, size_t len); \ 225 \ 226 /* \ 227 * Finds an item that compares the same as the given item but it is \ 228 * not necessarily the same item if it either isn't stored, or if \ 229 * there are duplicates in the table. \ 230 */ \ 231 HT_PRIV HT_ITEM HT_NAME##_find_item(HT_NAME##_t *ht, HT_ITEM item); \ 232 \ 233 /* \ 234 * This removes the first item that matches the given item, not \ 235 * necessarily the item itself, and the item need not be present \ 236 * in the table. Even if the item is in fact removed, it may still \ 237 * be present if stored multiple times through abuse use of the \ 238 * insert_unique function. \ 239 */ \ 240 HT_PRIV HT_ITEM HT_NAME##_remove_item(HT_NAME##_t *ht, HT_ITEM item); \ 241 \ 242 /* \ 243 * Calls a function for every item in the hash table. This may be \ 244 * used for destructing items, provided the table is not accessed \ 245 * subsequently. In fact, the hash_table_clear function takes an \ 246 * optional visitor that does exactly that. \ 247 * \ 248 * The function is linear of the allocated hash table size, so will be \ 249 * inefficient if the hash table was resized much larger than the number \ 250 * of stored items. In that case it is better to store links in the \ 251 * items. For the default resizing, the function is reasonably fast \ 252 * because for cache reasons it is very fast to exclude empty elements. \ 253 */ \ 254 HT_PRIV void HT_NAME##_visit(HT_NAME##_t *ht, \ 255 HT_NAME##_visitor_f *visitor, void *context); \ 256 \ 257 /* \ 258 * Returns number of elements in the table. (Not necessarily the number of \ 259 * unique keys. \ 260 */ \ 261 static inline size_t HT_NAME##_count(HT_NAME##_t *ht) \ 262 { \ 263 return ht->count; \ 264 } \ 265 266 #endif /* HASH_TABLE_H */