libStatGen Software  1
khash.h
1 /* The MIT License
2 
3  Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor@live.co.uk>
4 
5  Permission is hereby granted, free of charge, to any person obtaining
6  a copy of this software and associated documentation files (the
7  "Software"), to deal in the Software without restriction, including
8  without limitation the rights to use, copy, modify, merge, publish,
9  distribute, sublicense, and/or sell copies of the Software, and to
10  permit persons to whom the Software is furnished to do so, subject to
11  the following conditions:
12 
13  The above copyright notice and this permission notice shall be
14  included in all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20  BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21  ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  SOFTWARE.
24 */
25 
26 /*
27  An example:
28 
29 #include "khash.h"
30 KHASH_MAP_INIT_INT(32, char)
31 int main() {
32  int ret, is_missing;
33  khiter_t k;
34  khash_t(32) *h = kh_init(32);
35  k = kh_put(32, h, 5, &ret);
36  if (!ret) kh_del(32, h, k);
37  kh_value(h, k) = 10;
38  k = kh_get(32, h, 10);
39  is_missing = (k == kh_end(h));
40  k = kh_get(32, h, 5);
41  kh_del(32, h, k);
42  for (k = kh_begin(h); k != kh_end(h); ++k)
43  if (kh_exist(h, k)) kh_value(h, k) = 1;
44  kh_destroy(32, h);
45  return 0;
46 }
47 */
48 
49 /*
50  2011-02-14 (0.2.5):
51 
52  * Allow to declare global functions.
53 
54  2009-09-26 (0.2.4):
55 
56  * Improve portability
57 
58  2008-09-19 (0.2.3):
59 
60  * Corrected the example
61  * Improved interfaces
62 
63  2008-09-11 (0.2.2):
64 
65  * Improved speed a little in kh_put()
66 
67  2008-09-10 (0.2.1):
68 
69  * Added kh_clear()
70  * Fixed a compiling error
71 
72  2008-09-02 (0.2.0):
73 
74  * Changed to token concatenation which increases flexibility.
75 
76  2008-08-31 (0.1.2):
77 
78  * Fixed a bug in kh_get(), which has not been tested previously.
79 
80  2008-08-31 (0.1.1):
81 
82  * Added destructor
83 */
84 
85 
86 #ifndef __AC_KHASH_H
87 #define __AC_KHASH_H
88 
89 /*!
90  @header
91 
92  Generic hash table library.
93 
94  @copyright Heng Li
95  */
96 
97 #define AC_VERSION_KHASH_H "0.2.5"
98 
99 #include <stdlib.h>
100 #include <string.h>
101 #include <limits.h>
102 
103 /* compiler specific configuration */
104 
105 #if UINT_MAX == 0xffffffffu
106 typedef unsigned int khint32_t;
107 #elif ULONG_MAX == 0xffffffffu
108 typedef unsigned long khint32_t;
109 #endif
110 
111 #if ULONG_MAX == ULLONG_MAX
112 typedef unsigned long khint64_t;
113 #else
114 typedef unsigned long long khint64_t;
115 #endif
116 
117 #ifdef _MSC_VER
118 #define inline __inline
119 #endif
120 
121 #ifdef _WIN32
122 #define inline __inline
123 #endif
124 
125 typedef khint32_t khint_t;
126 typedef khint_t khiter_t;
127 
128 #define __ac_HASH_PRIME_SIZE 32
129 static const khint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] =
130 {
131  0ul, 3ul, 11ul, 23ul, 53ul,
132  97ul, 193ul, 389ul, 769ul, 1543ul,
133  3079ul, 6151ul, 12289ul, 24593ul, 49157ul,
134  98317ul, 196613ul, 393241ul, 786433ul, 1572869ul,
135  3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul,
136  100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul,
137  3221225473ul, 4294967291ul
138 };
139 
140 #define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
141 #define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
142 #define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
143 #define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
144 #define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
145 #define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
146 #define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
147 
148 static const double __ac_HASH_UPPER = 0.77;
149 
150 #define KHASH_DECLARE(name, khkey_t, khval_t) \
151  typedef struct { \
152  khint_t n_buckets, size, n_occupied, upper_bound; \
153  khint32_t *flags; \
154  khkey_t *keys; \
155  khval_t *vals; \
156  } kh_##name##_t; \
157  extern kh_##name##_t *kh_init_##name(); \
158  extern void kh_destroy_##name(kh_##name##_t *h); \
159  extern void kh_clear_##name(kh_##name##_t *h); \
160  extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \
161  extern void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
162  extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \
163  extern void kh_del_##name(kh_##name##_t *h, khint_t x);
164 
165 #define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
166  typedef struct { \
167  khint_t n_buckets, size, n_occupied, upper_bound; \
168  khint32_t *flags; \
169  khkey_t *keys; \
170  khval_t *vals; \
171  } kh_##name##_t; \
172  SCOPE kh_##name##_t *kh_init_##name() { \
173  return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t)); \
174  } \
175  SCOPE void kh_destroy_##name(kh_##name##_t *h) \
176  { \
177  if (h) { \
178  free(h->keys); free(h->flags); \
179  free(h->vals); \
180  free(h); \
181  } \
182  } \
183  SCOPE void kh_clear_##name(kh_##name##_t *h) \
184  { \
185  if (h && h->flags) { \
186  memset(h->flags, 0xaa, ((h->n_buckets>>4) + 1) * sizeof(khint32_t)); \
187  h->size = h->n_occupied = 0; \
188  } \
189  } \
190  SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
191  { \
192  if (h->n_buckets) { \
193  khint_t inc, k, i, last; \
194  k = __hash_func(key); i = k % h->n_buckets; \
195  inc = 1 + k % (h->n_buckets - 1); last = i; \
196  while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
197  if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
198  else i += inc; \
199  if (i == last) return h->n_buckets; \
200  } \
201  return __ac_iseither(h->flags, i)? h->n_buckets : i; \
202  } else return 0; \
203  } \
204  SCOPE void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
205  { \
206  khint32_t *new_flags = 0; \
207  khint_t j = 1; \
208  { \
209  khint_t t = __ac_HASH_PRIME_SIZE - 1; \
210  while (__ac_prime_list[t] > new_n_buckets) --t; \
211  new_n_buckets = __ac_prime_list[t+1]; \
212  if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; \
213  else { \
214  new_flags = (khint32_t*)malloc(((new_n_buckets>>4) + 1) * sizeof(khint32_t)); \
215  memset(new_flags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(khint32_t)); \
216  if (h->n_buckets < new_n_buckets) { \
217  h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
218  if (kh_is_map) \
219  h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
220  } \
221  } \
222  } \
223  if (j) { \
224  for (j = 0; j != h->n_buckets; ++j) { \
225  if (__ac_iseither(h->flags, j) == 0) { \
226  khkey_t key = h->keys[j]; \
227  khval_t val; \
228  if (kh_is_map) val = h->vals[j]; \
229  __ac_set_isdel_true(h->flags, j); \
230  while (1) { \
231  khint_t inc, k, i; \
232  k = __hash_func(key); \
233  i = k % new_n_buckets; \
234  inc = 1 + k % (new_n_buckets - 1); \
235  while (!__ac_isempty(new_flags, i)) { \
236  if (i + inc >= new_n_buckets) i = i + inc - new_n_buckets; \
237  else i += inc; \
238  } \
239  __ac_set_isempty_false(new_flags, i); \
240  if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { \
241  { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
242  if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
243  __ac_set_isdel_true(h->flags, i); \
244  } else { \
245  h->keys[i] = key; \
246  if (kh_is_map) h->vals[i] = val; \
247  break; \
248  } \
249  } \
250  } \
251  } \
252  if (h->n_buckets > new_n_buckets) { \
253  h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
254  if (kh_is_map) \
255  h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
256  } \
257  free(h->flags); \
258  h->flags = new_flags; \
259  h->n_buckets = new_n_buckets; \
260  h->n_occupied = h->size; \
261  h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
262  } \
263  } \
264  SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
265  { \
266  khint_t x; \
267  if (h->n_occupied >= h->upper_bound) { \
268  if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); \
269  else kh_resize_##name(h, h->n_buckets + 1); \
270  } \
271  { \
272  khint_t inc, k, i, site, last; \
273  x = site = h->n_buckets; k = __hash_func(key); i = k % h->n_buckets; \
274  if (__ac_isempty(h->flags, i)) x = i; \
275  else { \
276  inc = 1 + k % (h->n_buckets - 1); last = i; \
277  while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
278  if (__ac_isdel(h->flags, i)) site = i; \
279  if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
280  else i += inc; \
281  if (i == last) { x = site; break; } \
282  } \
283  if (x == h->n_buckets) { \
284  if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
285  else x = i; \
286  } \
287  } \
288  } \
289  if (__ac_isempty(h->flags, x)) { \
290  h->keys[x] = key; \
291  __ac_set_isboth_false(h->flags, x); \
292  ++h->size; ++h->n_occupied; \
293  *ret = 1; \
294  } else if (__ac_isdel(h->flags, x)) { \
295  h->keys[x] = key; \
296  __ac_set_isboth_false(h->flags, x); \
297  ++h->size; \
298  *ret = 2; \
299  } else *ret = 0; \
300  return x; \
301  } \
302  SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \
303  { \
304  if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \
305  __ac_set_isdel_true(h->flags, x); \
306  --h->size; \
307  } \
308  }
309 
310 #define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
311  KHASH_INIT2(name, static inline, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
312 
313 /* --- BEGIN OF HASH FUNCTIONS --- */
314 
315 /*! @function
316  @abstract Integer hash function
317  @param key The integer [khint32_t]
318  @return The hash value [khint_t]
319  */
320 #define kh_int_hash_func(key) (khint32_t)(key)
321 /*! @function
322  @abstract Integer comparison function
323  */
324 #define kh_int_hash_equal(a, b) ((a) == (b))
325 /*! @function
326  @abstract 64-bit integer hash function
327  @param key The integer [khint64_t]
328  @return The hash value [khint_t]
329  */
330 #define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11)
331 /*! @function
332  @abstract 64-bit integer comparison function
333  */
334 #define kh_int64_hash_equal(a, b) ((a) == (b))
335 /*! @function
336  @abstract const char* hash function
337  @param s Pointer to a null terminated string
338  @return The hash value
339  */
340 static inline khint_t __ac_X31_hash_string(const char *s)
341 {
342  khint_t h = *s;
343  if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s;
344  return h;
345 }
346 /*! @function
347  @abstract Another interface to const char* hash function
348  @param key Pointer to a null terminated string [const char*]
349  @return The hash value [khint_t]
350  */
351 #define kh_str_hash_func(key) __ac_X31_hash_string(key)
352 /*! @function
353  @abstract Const char* comparison function
354  */
355 #define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
356 
357 /* --- END OF HASH FUNCTIONS --- */
358 
359 /* Other necessary macros... */
360 
361 /*!
362  @abstract Type of the hash table.
363  @param name Name of the hash table [symbol]
364  */
365 #define khash_t(name) kh_##name##_t
366 
367 /*! @function
368  @abstract Initiate a hash table.
369  @param name Name of the hash table [symbol]
370  @return Pointer to the hash table [khash_t(name)*]
371  */
372 #define kh_init(name) kh_init_##name()
373 
374 /*! @function
375  @abstract Destroy a hash table.
376  @param name Name of the hash table [symbol]
377  @param h Pointer to the hash table [khash_t(name)*]
378  */
379 #define kh_destroy(name, h) kh_destroy_##name(h)
380 
381 /*! @function
382  @abstract Reset a hash table without deallocating memory.
383  @param name Name of the hash table [symbol]
384  @param h Pointer to the hash table [khash_t(name)*]
385  */
386 #define kh_clear(name, h) kh_clear_##name(h)
387 
388 /*! @function
389  @abstract Resize a hash table.
390  @param name Name of the hash table [symbol]
391  @param h Pointer to the hash table [khash_t(name)*]
392  @param s New size [khint_t]
393  */
394 #define kh_resize(name, h, s) kh_resize_##name(h, s)
395 
396 /*! @function
397  @abstract Insert a key to the hash table.
398  @param name Name of the hash table [symbol]
399  @param h Pointer to the hash table [khash_t(name)*]
400  @param k Key [type of keys]
401  @param r Extra return code: 0 if the key is present in the hash table;
402  1 if the bucket is empty (never used); 2 if the element in
403  the bucket has been deleted [int*]
404  @return Iterator to the inserted element [khint_t]
405  */
406 #define kh_put(name, h, k, r) kh_put_##name(h, k, r)
407 
408 /*! @function
409  @abstract Retrieve a key from the hash table.
410  @param name Name of the hash table [symbol]
411  @param h Pointer to the hash table [khash_t(name)*]
412  @param k Key [type of keys]
413  @return Iterator to the found element, or kh_end(h) is the element is absent [khint_t]
414  */
415 #define kh_get(name, h, k) kh_get_##name(h, k)
416 
417 /*! @function
418  @abstract Remove a key from the hash table.
419  @param name Name of the hash table [symbol]
420  @param h Pointer to the hash table [khash_t(name)*]
421  @param k Iterator to the element to be deleted [khint_t]
422  */
423 #define kh_del(name, h, k) kh_del_##name(h, k)
424 
425 
426 /*! @function
427  @abstract Test whether a bucket contains data.
428  @param h Pointer to the hash table [khash_t(name)*]
429  @param x Iterator to the bucket [khint_t]
430  @return 1 if containing data; 0 otherwise [int]
431  */
432 #define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
433 
434 /*! @function
435  @abstract Get key given an iterator
436  @param h Pointer to the hash table [khash_t(name)*]
437  @param x Iterator to the bucket [khint_t]
438  @return Key [type of keys]
439  */
440 #define kh_key(h, x) ((h)->keys[x])
441 
442 /*! @function
443  @abstract Get value given an iterator
444  @param h Pointer to the hash table [khash_t(name)*]
445  @param x Iterator to the bucket [khint_t]
446  @return Value [type of values]
447  @discussion For hash sets, calling this results in segfault.
448  */
449 #define kh_val(h, x) ((h)->vals[x])
450 
451 /*! @function
452  @abstract Alias of kh_val()
453  */
454 #define kh_value(h, x) ((h)->vals[x])
455 
456 /*! @function
457  @abstract Get the start iterator
458  @param h Pointer to the hash table [khash_t(name)*]
459  @return The start iterator [khint_t]
460  */
461 #define kh_begin(h) (khint_t)(0)
462 
463 /*! @function
464  @abstract Get the end iterator
465  @param h Pointer to the hash table [khash_t(name)*]
466  @return The end iterator [khint_t]
467  */
468 #define kh_end(h) ((h)->n_buckets)
469 
470 /*! @function
471  @abstract Get the number of elements in the hash table
472  @param h Pointer to the hash table [khash_t(name)*]
473  @return Number of elements in the hash table [khint_t]
474  */
475 #define kh_size(h) ((h)->size)
476 
477 /*! @function
478  @abstract Get the number of buckets in the hash table
479  @param h Pointer to the hash table [khash_t(name)*]
480  @return Number of buckets in the hash table [khint_t]
481  */
482 #define kh_n_buckets(h) ((h)->n_buckets)
483 
484 /* More conenient interfaces */
485 
486 /*! @function
487  @abstract Instantiate a hash set containing integer keys
488  @param name Name of the hash table [symbol]
489  */
490 #define KHASH_SET_INIT_INT(name) \
491  KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
492 
493 /*! @function
494  @abstract Instantiate a hash map containing integer keys
495  @param name Name of the hash table [symbol]
496  @param khval_t Type of values [type]
497  */
498 #define KHASH_MAP_INIT_INT(name, khval_t) \
499  KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
500 
501 /*! @function
502  @abstract Instantiate a hash map containing 64-bit integer keys
503  @param name Name of the hash table [symbol]
504  */
505 #define KHASH_SET_INIT_INT64(name) \
506  KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
507 
508 /*! @function
509  @abstract Instantiate a hash map containing 64-bit integer keys
510  @param name Name of the hash table [symbol]
511  @param khval_t Type of values [type]
512  */
513 #define KHASH_MAP_INIT_INT64(name, khval_t) \
514  KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
515 
516 typedef const char *kh_cstr_t;
517 /*! @function
518  @abstract Instantiate a hash map containing const char* keys
519  @param name Name of the hash table [symbol]
520  */
521 #define KHASH_SET_INIT_STR(name) \
522  KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
523 
524 /*! @function
525  @abstract Instantiate a hash map containing const char* keys
526  @param name Name of the hash table [symbol]
527  @param khval_t Type of values [type]
528  */
529 #define KHASH_MAP_INIT_STR(name, khval_t) \
530  KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
531 
532 #endif /* __AC_KHASH_H */