stringspeed.c (6170B)
1 /* Simple speed tests for a hash of strings. */ 2 #include <ccan/htable/htable_type.h> 3 #include <ccan/htable/htable.c> 4 #include <ccan/tal/str/str.h> 5 #include <ccan/tal/grab_file/grab_file.h> 6 #include <ccan/tal/tal.h> 7 #include <ccan/hash/hash.h> 8 #include <ccan/time/time.h> 9 #include <stdio.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <time.h> 13 #include <unistd.h> 14 #include <sys/time.h> 15 16 static size_t hashcount; 17 18 static const char *strkey(const char *str) 19 { 20 return str; 21 } 22 23 static size_t hash_str(const char *key) 24 { 25 hashcount++; 26 return hash(key, strlen(key), 0); 27 } 28 29 static bool cmp(const char *obj, const char *key) 30 { 31 return strcmp(obj, key) == 0; 32 } 33 34 HTABLE_DEFINE_TYPE(char, strkey, hash_str, cmp, htable_str); 35 36 /* Nanoseconds per operation */ 37 static size_t normalize(const struct timeabs *start, 38 const struct timeabs *stop, 39 unsigned int num) 40 { 41 return time_to_nsec(time_divide(time_between(*stop, *start), num)); 42 } 43 44 int main(int argc, char *argv[]) 45 { 46 size_t i, j, num; 47 struct timeabs start, stop; 48 struct htable_str ht; 49 char **words, **misswords; 50 51 words = tal_strsplit(NULL, grab_file(NULL, 52 argv[1] ? argv[1] : "/usr/share/dict/words"), "\n", 53 STR_NO_EMPTY); 54 htable_str_init(&ht); 55 num = tal_count(words) - 1; 56 /* Note that on my system, num is just > 98304, where we double! */ 57 printf("%zu words\n", num); 58 59 /* Append and prepend last char for miss testing. */ 60 misswords = tal_arr(words, char *, num); 61 for (i = 0; i < num; i++) { 62 char lastc; 63 if (strlen(words[i])) 64 lastc = words[i][strlen(words[i])-1]; 65 else 66 lastc = 'z'; 67 misswords[i] = tal_fmt(misswords, "%c%s%c%c", 68 lastc, words[i], lastc, lastc); 69 } 70 71 printf("#01: Initial insert: "); 72 fflush(stdout); 73 start = time_now(); 74 for (i = 0; i < num; i++) 75 htable_str_add(&ht, words[i]); 76 stop = time_now(); 77 printf(" %zu ns\n", normalize(&start, &stop, num)); 78 79 printf("Bytes allocated: %zu\n", 80 sizeof(ht.raw.table[0]) << ht.raw.bits); 81 82 printf("#02: Initial lookup (match): "); 83 fflush(stdout); 84 start = time_now(); 85 for (i = 0; i < num; i++) 86 if (htable_str_get(&ht, words[i]) != words[i]) 87 abort(); 88 stop = time_now(); 89 printf(" %zu ns\n", normalize(&start, &stop, num)); 90 91 printf("#03: Initial lookup (miss): "); 92 fflush(stdout); 93 start = time_now(); 94 for (i = 0; i < num; i++) { 95 if (htable_str_get(&ht, misswords[i])) 96 abort(); 97 } 98 stop = time_now(); 99 printf(" %zu ns\n", normalize(&start, &stop, num)); 100 101 /* Lookups in order are very cache-friendly for judy; try random */ 102 printf("#04: Initial lookup (random): "); 103 fflush(stdout); 104 start = time_now(); 105 for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num) 106 if (htable_str_get(&ht, words[j]) != words[j]) 107 abort(); 108 stop = time_now(); 109 printf(" %zu ns\n", normalize(&start, &stop, num)); 110 111 hashcount = 0; 112 printf("#05: Initial delete all: "); 113 fflush(stdout); 114 start = time_now(); 115 for (i = 0; i < num; i++) 116 if (!htable_str_del(&ht, words[i])) 117 abort(); 118 stop = time_now(); 119 printf(" %zu ns\n", normalize(&start, &stop, num)); 120 121 printf("#06: Initial re-inserting: "); 122 fflush(stdout); 123 start = time_now(); 124 for (i = 0; i < num; i++) 125 htable_str_add(&ht, words[i]); 126 stop = time_now(); 127 printf(" %zu ns\n", normalize(&start, &stop, num)); 128 129 hashcount = 0; 130 printf("#07: Deleting first half: "); 131 fflush(stdout); 132 start = time_now(); 133 for (i = 0; i < num; i+=2) 134 if (!htable_str_del(&ht, words[i])) 135 abort(); 136 stop = time_now(); 137 printf(" %zu ns\n", normalize(&start, &stop, num)); 138 139 printf("#08: Adding (a different) half: "); 140 fflush(stdout); 141 142 start = time_now(); 143 for (i = 0; i < num; i+=2) 144 htable_str_add(&ht, misswords[i]); 145 stop = time_now(); 146 printf(" %zu ns\n", normalize(&start, &stop, num)); 147 148 printf("#09: Lookup after half-change (match): "); 149 fflush(stdout); 150 start = time_now(); 151 for (i = 1; i < num; i+=2) 152 if (htable_str_get(&ht, words[i]) != words[i]) 153 abort(); 154 for (i = 0; i < num; i+=2) { 155 if (htable_str_get(&ht, misswords[i]) != misswords[i]) 156 abort(); 157 } 158 stop = time_now(); 159 printf(" %zu ns\n", normalize(&start, &stop, num)); 160 161 printf("#10: Lookup after half-change (miss): "); 162 fflush(stdout); 163 start = time_now(); 164 for (i = 0; i < num; i+=2) 165 if (htable_str_get(&ht, words[i])) 166 abort(); 167 for (i = 1; i < num; i+=2) { 168 if (htable_str_get(&ht, misswords[i])) 169 abort(); 170 } 171 stop = time_now(); 172 printf(" %zu ns\n", normalize(&start, &stop, num)); 173 174 /* Hashtables with delete markers can fill with markers over time. 175 * so do some changes to see how it operates in long-term. */ 176 printf("#11: Churn 1: "); 177 start = time_now(); 178 for (j = 0; j < num; j+=2) { 179 if (!htable_str_del(&ht, misswords[j])) 180 abort(); 181 if (!htable_str_add(&ht, words[j])) 182 abort(); 183 } 184 stop = time_now(); 185 printf(" %zu ns\n", normalize(&start, &stop, num)); 186 187 printf("#12: Churn 2: "); 188 start = time_now(); 189 for (j = 1; j < num; j+=2) { 190 if (!htable_str_del(&ht, words[j])) 191 abort(); 192 if (!htable_str_add(&ht, misswords[j])) 193 abort(); 194 } 195 stop = time_now(); 196 printf(" %zu ns\n", normalize(&start, &stop, num)); 197 198 printf("#13: Churn 3: "); 199 start = time_now(); 200 for (j = 1; j < num; j+=2) { 201 if (!htable_str_del(&ht, misswords[j])) 202 abort(); 203 if (!htable_str_add(&ht, words[j])) 204 abort(); 205 } 206 stop = time_now(); 207 printf(" %zu ns\n", normalize(&start, &stop, num)); 208 209 /* Now it's back to normal... */ 210 printf("#14: Post-Churn lookup (match): "); 211 fflush(stdout); 212 start = time_now(); 213 for (i = 0; i < num; i++) 214 if (htable_str_get(&ht, words[i]) != words[i]) 215 abort(); 216 stop = time_now(); 217 printf(" %zu ns\n", normalize(&start, &stop, num)); 218 219 printf("#15: Post-Churn lookup (miss): "); 220 fflush(stdout); 221 start = time_now(); 222 for (i = 0; i < num; i++) { 223 if (htable_str_get(&ht, misswords[i])) 224 abort(); 225 } 226 stop = time_now(); 227 printf(" %zu ns\n", normalize(&start, &stop, num)); 228 229 /* Lookups in order are very cache-friendly for judy; try random */ 230 printf("#16: Post-Churn lookup (random): "); 231 fflush(stdout); 232 start = time_now(); 233 for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num) 234 if (htable_str_get(&ht, words[j]) != words[j]) 235 abort(); 236 stop = time_now(); 237 printf(" %zu ns\n", normalize(&start, &stop, num)); 238 239 return 0; 240 }