diff options
author | gramanas <anastasis.gramm2@gmail.com> | 2022-05-10 21:12:33 +0300 |
---|---|---|
committer | gramanas <anastasis.gramm2@gmail.com> | 2022-05-10 21:12:33 +0300 |
commit | 16538abf8d1231279133508ba15376145b818518 (patch) | |
tree | e19079645f87163f674faa333044330c37374ea9 /src/hashmap.c | |
parent | e739c5dccc0faf13cbddacd1950e203305aa4bab (diff) | |
download | foodtools-16538abf8d1231279133508ba15376145b818518.tar.gz foodtools-16538abf8d1231279133508ba15376145b818518.tar.bz2 foodtools-16538abf8d1231279133508ba15376145b818518.zip |
autotools and check testign suite
Diffstat (limited to 'src/hashmap.c')
-rw-r--r-- | src/hashmap.c | 936 |
1 files changed, 936 insertions, 0 deletions
diff --git a/src/hashmap.c b/src/hashmap.c new file mode 100644 index 0000000..b05231e --- /dev/null +++ b/src/hashmap.c @@ -0,0 +1,936 @@ +// Copyright 2020 Joshua J Baker. All rights reserved. +// Use of this source code is governed by an MIT-style +// license that can be found in the LICENSE file. + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <stdint.h> +#include <stddef.h> +#include "hashmap.h" + +static void *(*_malloc)(size_t) = NULL; +static void *(*_realloc)(void *, size_t) = NULL; +static void (*_free)(void *) = NULL; + +// hashmap_set_allocator allows for configuring a custom allocator for +// all hashmap library operations. This function, if needed, should be called +// only once at startup and a prior to calling hashmap_new(). +void hashmap_set_allocator(void *(*malloc)(size_t), void (*free)(void*)) +{ + _malloc = malloc; + _free = free; +} + +#define panic(_msg_) { \ + fprintf(stderr, "panic: %s (%s:%d)\n", (_msg_), __FILE__, __LINE__); \ + exit(1); \ +} + +struct bucket { + uint64_t hash:48; + uint64_t dib:16; +}; + +// hashmap is an open addressed hash map using robinhood hashing. +struct hashmap { + void *(*malloc)(size_t); + void *(*realloc)(void *, size_t); + void (*free)(void *); + bool oom; + size_t elsize; + size_t cap; + uint64_t seed0; + uint64_t seed1; + uint64_t (*hash)(const void *item, uint64_t seed0, uint64_t seed1); + int (*compare)(const void *a, const void *b, void *udata); + void (*elfree)(void *item); + void *udata; + size_t bucketsz; + size_t nbuckets; + size_t count; + size_t mask; + size_t growat; + size_t shrinkat; + void *buckets; + void *spare; + void *edata; +}; + +static struct bucket *bucket_at(struct hashmap *map, size_t index) { + return (struct bucket*)(((char*)map->buckets)+(map->bucketsz*index)); +} + +static void *bucket_item(struct bucket *entry) { + return ((char*)entry)+sizeof(struct bucket); +} + +static uint64_t get_hash(struct hashmap *map, const void *key) { + return map->hash(key, map->seed0, map->seed1) << 16 >> 16; +} + +// hashmap_new_with_allocator returns a new hash map using a custom allocator. +// See hashmap_new for more information information +struct hashmap *hashmap_new_with_allocator( + void *(*_malloc)(size_t), + void *(*_realloc)(void*, size_t), + void (*_free)(void*), + size_t elsize, size_t cap, + uint64_t seed0, uint64_t seed1, + uint64_t (*hash)(const void *item, + uint64_t seed0, uint64_t seed1), + int (*compare)(const void *a, const void *b, + void *udata), + void (*elfree)(void *item), + void *udata) +{ + _malloc = _malloc ? _malloc : malloc; + _realloc = _realloc ? _realloc : realloc; + _free = _free ? _free : free; + int ncap = 16; + if (cap < ncap) { + cap = ncap; + } else { + while (ncap < cap) { + ncap *= 2; + } + cap = ncap; + } + size_t bucketsz = sizeof(struct bucket) + elsize; + while (bucketsz & (sizeof(uintptr_t)-1)) { + bucketsz++; + } + // hashmap + spare + edata + size_t size = sizeof(struct hashmap)+bucketsz*2; + struct hashmap *map = _malloc(size); + if (!map) { + return NULL; + } + memset(map, 0, sizeof(struct hashmap)); + map->elsize = elsize; + map->bucketsz = bucketsz; + map->seed0 = seed0; + map->seed1 = seed1; + map->hash = hash; + map->compare = compare; + map->elfree = elfree; + map->udata = udata; + map->spare = ((char*)map)+sizeof(struct hashmap); + map->edata = (char*)map->spare+bucketsz; + map->cap = cap; + map->nbuckets = cap; + map->mask = map->nbuckets-1; + map->buckets = _malloc(map->bucketsz*map->nbuckets); + if (!map->buckets) { + _free(map); + return NULL; + } + memset(map->buckets, 0, map->bucketsz*map->nbuckets); + map->growat = map->nbuckets*0.75; + map->shrinkat = map->nbuckets*0.10; + map->malloc = _malloc; + map->realloc = _realloc; + map->free = _free; + return map; +} + + +// hashmap_new returns a new hash map. +// Param `elsize` is the size of each element in the tree. Every element that +// is inserted, deleted, or retrieved will be this size. +// Param `cap` is the default lower capacity of the hashmap. Setting this to +// zero will default to 16. +// Params `seed0` and `seed1` are optional seed values that are passed to the +// following `hash` function. These can be any value you wish but it's often +// best to use randomly generated values. +// Param `hash` is a function that generates a hash value for an item. It's +// important that you provide a good hash function, otherwise it will perform +// poorly or be vulnerable to Denial-of-service attacks. This implementation +// comes with two helper functions `hashmap_sip()` and `hashmap_murmur()`. +// Param `compare` is a function that compares items in the tree. See the +// qsort stdlib function for an example of how this function works. +// The hashmap must be freed with hashmap_free(). +// Param `elfree` is a function that frees a specific item. This should be NULL +// unless you're storing some kind of reference data in the hash. +struct hashmap *hashmap_new(size_t elsize, size_t cap, + uint64_t seed0, uint64_t seed1, + uint64_t (*hash)(const void *item, + uint64_t seed0, uint64_t seed1), + int (*compare)(const void *a, const void *b, + void *udata), + void (*elfree)(void *item), + void *udata) +{ + return hashmap_new_with_allocator( + (_malloc?_malloc:malloc), + (_realloc?_realloc:realloc), + (_free?_free:free), + elsize, cap, seed0, seed1, hash, compare, elfree, udata + ); +} + +static void free_elements(struct hashmap *map) { + if (map->elfree) { + for (size_t i = 0; i < map->nbuckets; i++) { + struct bucket *bucket = bucket_at(map, i); + if (bucket->dib) map->elfree(bucket_item(bucket)); + } + } +} + + +// hashmap_clear quickly clears the map. +// Every item is called with the element-freeing function given in hashmap_new, +// if present, to free any data referenced in the elements of the hashmap. +// When the update_cap is provided, the map's capacity will be updated to match +// the currently number of allocated buckets. This is an optimization to ensure +// that this operation does not perform any allocations. +void hashmap_clear(struct hashmap *map, bool update_cap) { + map->count = 0; + free_elements(map); + if (update_cap) { + map->cap = map->nbuckets; + } else if (map->nbuckets != map->cap) { + void *new_buckets = map->malloc(map->bucketsz*map->cap); + if (new_buckets) { + map->free(map->buckets); + map->buckets = new_buckets; + } + map->nbuckets = map->cap; + } + memset(map->buckets, 0, map->bucketsz*map->nbuckets); + map->mask = map->nbuckets-1; + map->growat = map->nbuckets*0.75; + map->shrinkat = map->nbuckets*0.10; +} + + +static bool resize(struct hashmap *map, size_t new_cap) { + struct hashmap *map2 = hashmap_new(map->elsize, new_cap, map->seed1, + map->seed1, map->hash, map->compare, + map->elfree, map->udata); + if (!map2) { + return false; + } + for (size_t i = 0; i < map->nbuckets; i++) { + struct bucket *entry = bucket_at(map, i); + if (!entry->dib) { + continue; + } + entry->dib = 1; + size_t j = entry->hash & map2->mask; + for (;;) { + struct bucket *bucket = bucket_at(map2, j); + if (bucket->dib == 0) { + memcpy(bucket, entry, map->bucketsz); + break; + } + if (bucket->dib < entry->dib) { + memcpy(map2->spare, bucket, map->bucketsz); + memcpy(bucket, entry, map->bucketsz); + memcpy(entry, map2->spare, map->bucketsz); + } + j = (j + 1) & map2->mask; + entry->dib += 1; + } + } + map->free(map->buckets); + map->buckets = map2->buckets; + map->nbuckets = map2->nbuckets; + map->mask = map2->mask; + map->growat = map2->growat; + map->shrinkat = map2->shrinkat; + map->free(map2); + return true; +} + +// hashmap_set inserts or replaces an item in the hash map. If an item is +// replaced then it is returned otherwise NULL is returned. This operation +// may allocate memory. If the system is unable to allocate additional +// memory then NULL is returned and hashmap_oom() returns true. +void *hashmap_set(struct hashmap *map, void *item) { + if (!item) { + panic("item is null"); + } + map->oom = false; + if (map->count == map->growat) { + if (!resize(map, map->nbuckets*2)) { + map->oom = true; + return NULL; + } + } + + + struct bucket *entry = map->edata; + entry->hash = get_hash(map, item); + entry->dib = 1; + memcpy(bucket_item(entry), item, map->elsize); + + size_t i = entry->hash & map->mask; + for (;;) { + struct bucket *bucket = bucket_at(map, i); + if (bucket->dib == 0) { + memcpy(bucket, entry, map->bucketsz); + map->count++; + return NULL; + } + if (entry->hash == bucket->hash && + map->compare(bucket_item(entry), bucket_item(bucket), + map->udata) == 0) + { + memcpy(map->spare, bucket_item(bucket), map->elsize); + memcpy(bucket_item(bucket), bucket_item(entry), map->elsize); + return map->spare; + } + if (bucket->dib < entry->dib) { + memcpy(map->spare, bucket, map->bucketsz); + memcpy(bucket, entry, map->bucketsz); + memcpy(entry, map->spare, map->bucketsz); + } + i = (i + 1) & map->mask; + entry->dib += 1; + } +} + +// hashmap_get returns the item based on the provided key. If the item is not +// found then NULL is returned. +void *hashmap_get(struct hashmap *map, const void *key) { + if (!key) { + panic("key is null"); + } + uint64_t hash = get_hash(map, key); + size_t i = hash & map->mask; + for (;;) { + struct bucket *bucket = bucket_at(map, i); + if (!bucket->dib) { + return NULL; + } + if (bucket->hash == hash && + map->compare(key, bucket_item(bucket), map->udata) == 0) + { + return bucket_item(bucket); + } + i = (i + 1) & map->mask; + } +} + +// hashmap_probe returns the item in the bucket at position or NULL if an item +// is not set for that bucket. The position is 'moduloed' by the number of +// buckets in the hashmap. +void *hashmap_probe(struct hashmap *map, uint64_t position) { + size_t i = position & map->mask; + struct bucket *bucket = bucket_at(map, i); + if (!bucket->dib) { + return NULL; + } + return bucket_item(bucket); +} + + +// hashmap_delete removes an item from the hash map and returns it. If the +// item is not found then NULL is returned. +void *hashmap_delete(struct hashmap *map, void *key) { + if (!key) { + panic("key is null"); + } + map->oom = false; + uint64_t hash = get_hash(map, key); + size_t i = hash & map->mask; + for (;;) { + struct bucket *bucket = bucket_at(map, i); + if (!bucket->dib) { + return NULL; + } + if (bucket->hash == hash && + map->compare(key, bucket_item(bucket), map->udata) == 0) + { + memcpy(map->spare, bucket_item(bucket), map->elsize); + bucket->dib = 0; + for (;;) { + struct bucket *prev = bucket; + i = (i + 1) & map->mask; + bucket = bucket_at(map, i); + if (bucket->dib <= 1) { + prev->dib = 0; + break; + } + memcpy(prev, bucket, map->bucketsz); + prev->dib--; + } + map->count--; + if (map->nbuckets > map->cap && map->count <= map->shrinkat) { + // Ignore the return value. It's ok for the resize operation to + // fail to allocate enough memory because a shrink operation + // does not change the integrity of the data. + resize(map, map->nbuckets/2); + } + return map->spare; + } + i = (i + 1) & map->mask; + } +} + +// hashmap_count returns the number of items in the hash map. +size_t hashmap_count(struct hashmap *map) { + return map->count; +} + +// hashmap_free frees the hash map +// Every item is called with the element-freeing function given in hashmap_new, +// if present, to free any data referenced in the elements of the hashmap. +void hashmap_free(struct hashmap *map) { + if (!map) return; + free_elements(map); + map->free(map->buckets); + map->free(map); +} + +// hashmap_oom returns true if the last hashmap_set() call failed due to the +// system being out of memory. +bool hashmap_oom(struct hashmap *map) { + return map->oom; +} + +// hashmap_scan iterates over all items in the hash map +// Param `iter` can return false to stop iteration early. +// Returns false if the iteration has been stopped early. +bool hashmap_scan(struct hashmap *map, + bool (*iter)(const void *item, void *udata), void *udata) +{ + for (size_t i = 0; i < map->nbuckets; i++) { + struct bucket *bucket = bucket_at(map, i); + if (bucket->dib) { + if (!iter(bucket_item(bucket), udata)) { + return false; + } + } + } + return true; +} + +//----------------------------------------------------------------------------- +// SipHash reference C implementation +// +// Copyright (c) 2012-2016 Jean-Philippe Aumasson +// <jeanphilippe.aumasson@gmail.com> +// Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to> +// +// To the extent possible under law, the author(s) have dedicated all copyright +// and related and neighboring rights to this software to the public domain +// worldwide. This software is distributed without any warranty. +// +// You should have received a copy of the CC0 Public Domain Dedication along +// with this software. If not, see +// <http://creativecommons.org/publicdomain/zero/1.0/>. +// +// default: SipHash-2-4 +//----------------------------------------------------------------------------- +static uint64_t SIP64(const uint8_t *in, const size_t inlen, + uint64_t seed0, uint64_t seed1) +{ +#define U8TO64_LE(p) \ + { (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \ + ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \ + ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \ + ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)) } +#define U64TO8_LE(p, v) \ + { U32TO8_LE((p), (uint32_t)((v))); \ + U32TO8_LE((p) + 4, (uint32_t)((v) >> 32)); } +#define U32TO8_LE(p, v) \ + { (p)[0] = (uint8_t)((v)); \ + (p)[1] = (uint8_t)((v) >> 8); \ + (p)[2] = (uint8_t)((v) >> 16); \ + (p)[3] = (uint8_t)((v) >> 24); } +#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b)))) +#define SIPROUND \ + { v0 += v1; v1 = ROTL(v1, 13); \ + v1 ^= v0; v0 = ROTL(v0, 32); \ + v2 += v3; v3 = ROTL(v3, 16); \ + v3 ^= v2; \ + v0 += v3; v3 = ROTL(v3, 21); \ + v3 ^= v0; \ + v2 += v1; v1 = ROTL(v1, 17); \ + v1 ^= v2; v2 = ROTL(v2, 32); } + uint64_t k0 = U8TO64_LE((uint8_t*)&seed0); + uint64_t k1 = U8TO64_LE((uint8_t*)&seed1); + uint64_t v3 = UINT64_C(0x7465646279746573) ^ k1; + uint64_t v2 = UINT64_C(0x6c7967656e657261) ^ k0; + uint64_t v1 = UINT64_C(0x646f72616e646f6d) ^ k1; + uint64_t v0 = UINT64_C(0x736f6d6570736575) ^ k0; + const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t)); + for (; in != end; in += 8) { + uint64_t m = U8TO64_LE(in); + v3 ^= m; + SIPROUND; SIPROUND; + v0 ^= m; + } + const int left = inlen & 7; + uint64_t b = ((uint64_t)inlen) << 56; + switch (left) { + case 7: b |= ((uint64_t)in[6]) << 48; + case 6: b |= ((uint64_t)in[5]) << 40; + case 5: b |= ((uint64_t)in[4]) << 32; + case 4: b |= ((uint64_t)in[3]) << 24; + case 3: b |= ((uint64_t)in[2]) << 16; + case 2: b |= ((uint64_t)in[1]) << 8; + case 1: b |= ((uint64_t)in[0]); break; + case 0: break; + } + v3 ^= b; + SIPROUND; SIPROUND; + v0 ^= b; + v2 ^= 0xff; + SIPROUND; SIPROUND; SIPROUND; SIPROUND; + b = v0 ^ v1 ^ v2 ^ v3; + uint64_t out = 0; + U64TO8_LE((uint8_t*)&out, b); + return out; +} + +//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. +// +// Murmur3_86_128 +//----------------------------------------------------------------------------- +static void MM86128(const void *key, const int len, uint32_t seed, void *out) { +#define ROTL32(x, r) ((x << r) | (x >> (32 - r))) +#define FMIX32(h) h^=h>>16; h*=0x85ebca6b; h^=h>>13; h*=0xc2b2ae35; h^=h>>16; + const uint8_t * data = (const uint8_t*)key; + const int nblocks = len / 16; + uint32_t h1 = seed; + uint32_t h2 = seed; + uint32_t h3 = seed; + uint32_t h4 = seed; + uint32_t c1 = 0x239b961b; + uint32_t c2 = 0xab0e9789; + uint32_t c3 = 0x38b34ae5; + uint32_t c4 = 0xa1e38b93; + const uint32_t * blocks = (const uint32_t *)(data + nblocks*16); + for (int i = -nblocks; i; i++) { + uint32_t k1 = blocks[i*4+0]; + uint32_t k2 = blocks[i*4+1]; + uint32_t k3 = blocks[i*4+2]; + uint32_t k4 = blocks[i*4+3]; + k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; + h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b; + k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; + h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747; + k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; + h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35; + k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; + h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17; + } + const uint8_t * tail = (const uint8_t*)(data + nblocks*16); + uint32_t k1 = 0; + uint32_t k2 = 0; + uint32_t k3 = 0; + uint32_t k4 = 0; + switch(len & 15) { + case 15: k4 ^= tail[14] << 16; + case 14: k4 ^= tail[13] << 8; + case 13: k4 ^= tail[12] << 0; + k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; + case 12: k3 ^= tail[11] << 24; + case 11: k3 ^= tail[10] << 16; + case 10: k3 ^= tail[ 9] << 8; + case 9: k3 ^= tail[ 8] << 0; + k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; + case 8: k2 ^= tail[ 7] << 24; + case 7: k2 ^= tail[ 6] << 16; + case 6: k2 ^= tail[ 5] << 8; + case 5: k2 ^= tail[ 4] << 0; + k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; + case 4: k1 ^= tail[ 3] << 24; + case 3: k1 ^= tail[ 2] << 16; + case 2: k1 ^= tail[ 1] << 8; + case 1: k1 ^= tail[ 0] << 0; + k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; + }; + h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + FMIX32(h1); FMIX32(h2); FMIX32(h3); FMIX32(h4); + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + ((uint32_t*)out)[0] = h1; + ((uint32_t*)out)[1] = h2; + ((uint32_t*)out)[2] = h3; + ((uint32_t*)out)[3] = h4; +} + +// hashmap_sip returns a hash value for `data` using SipHash-2-4. +uint64_t hashmap_sip(const void *data, size_t len, + uint64_t seed0, uint64_t seed1) +{ + return SIP64((uint8_t*)data, len, seed0, seed1); +} + +// hashmap_murmur returns a hash value for `data` using Murmur3_86_128. +uint64_t hashmap_murmur(const void *data, size_t len, + uint64_t seed0, uint64_t seed1) +{ + char out[16]; + MM86128(data, len, seed0, &out); + return *(uint64_t*)out; +} + +//============================================================================== +// TESTS AND BENCHMARKS +// $ cc -DHASHMAP_TEST hashmap.c && ./a.out # run tests +// $ cc -DHASHMAP_TEST -O3 hashmap.c && BENCH=1 ./a.out # run benchmarks +//============================================================================== +#ifdef HASHMAP_TEST + +static size_t deepcount(struct hashmap *map) { + size_t count = 0; + for (size_t i = 0; i < map->nbuckets; i++) { + if (bucket_at(map, i)->dib) { + count++; + } + } + return count; +} + + +#pragma GCC diagnostic ignored "-Wextra" + + +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <assert.h> +#include <stdio.h> +#include "hashmap.h" + +static bool rand_alloc_fail = false; +static int rand_alloc_fail_odds = 3; // 1 in 3 chance malloc will fail. +static uintptr_t total_allocs = 0; +static uintptr_t total_mem = 0; + +static void *xmalloc(size_t size) { + if (rand_alloc_fail && rand()%rand_alloc_fail_odds == 0) { + return NULL; + } + void *mem = malloc(sizeof(uintptr_t)+size); + assert(mem); + *(uintptr_t*)mem = size; + total_allocs++; + total_mem += size; + return (char*)mem+sizeof(uintptr_t); +} + +static void xfree(void *ptr) { + if (ptr) { + total_mem -= *(uintptr_t*)((char*)ptr-sizeof(uintptr_t)); + free((char*)ptr-sizeof(uintptr_t)); + total_allocs--; + } +} + +static void shuffle(void *array, size_t numels, size_t elsize) { + char tmp[elsize]; + char *arr = array; + for (size_t i = 0; i < numels - 1; i++) { + int j = i + rand() / (RAND_MAX / (numels - i) + 1); + memcpy(tmp, arr + j * elsize, elsize); + memcpy(arr + j * elsize, arr + i * elsize, elsize); + memcpy(arr + i * elsize, tmp, elsize); + } +} + +static bool iter_ints(const void *item, void *udata) { + int *vals = *(int**)udata; + vals[*(int*)item] = 1; + return true; +} + +static int compare_ints(const void *a, const void *b) { + return *(int*)a - *(int*)b; +} + +static int compare_ints_udata(const void *a, const void *b, void *udata) { + return *(int*)a - *(int*)b; +} + +static int compare_strs(const void *a, const void *b, void *udata) { + return strcmp(*(char**)a, *(char**)b); +} + +static uint64_t hash_int(const void *item, uint64_t seed0, uint64_t seed1) { + return hashmap_murmur(item, sizeof(int), seed0, seed1); +} + +static uint64_t hash_str(const void *item, uint64_t seed0, uint64_t seed1) { + return hashmap_murmur(*(char**)item, strlen(*(char**)item), seed0, seed1); +} + +static void free_str(void *item) { + xfree(*(char**)item); +} + +static void all() { + int seed = getenv("SEED")?atoi(getenv("SEED")):time(NULL); + int N = getenv("N")?atoi(getenv("N")):2000; + printf("seed=%d, count=%d, item_size=%zu\n", seed, N, sizeof(int)); + srand(seed); + + rand_alloc_fail = true; + + // test sip and murmur hashes + assert(hashmap_sip("hello", 5, 1, 2) == 2957200328589801622); + assert(hashmap_murmur("hello", 5, 1, 2) == 1682575153221130884); + + int *vals; + while (!(vals = xmalloc(N * sizeof(int)))) {} + for (int i = 0; i < N; i++) { + vals[i] = i; + } + + struct hashmap *map; + + while (!(map = hashmap_new(sizeof(int), 0, seed, seed, + hash_int, compare_ints_udata, NULL, NULL))) {} + shuffle(vals, N, sizeof(int)); + for (int i = 0; i < N; i++) { + // // printf("== %d ==\n", vals[i]); + assert(map->count == i); + assert(map->count == hashmap_count(map)); + assert(map->count == deepcount(map)); + int *v; + assert(!hashmap_get(map, &vals[i])); + assert(!hashmap_delete(map, &vals[i])); + while (true) { + assert(!hashmap_set(map, &vals[i])); + if (!hashmap_oom(map)) { + break; + } + } + + for (int j = 0; j < i; j++) { + v = hashmap_get(map, &vals[j]); + assert(v && *v == vals[j]); + } + while (true) { + v = hashmap_set(map, &vals[i]); + if (!v) { + assert(hashmap_oom(map)); + continue; + } else { + assert(!hashmap_oom(map)); + assert(v && *v == vals[i]); + break; + } + } + v = hashmap_get(map, &vals[i]); + assert(v && *v == vals[i]); + v = hashmap_delete(map, &vals[i]); + assert(v && *v == vals[i]); + assert(!hashmap_get(map, &vals[i])); + assert(!hashmap_delete(map, &vals[i])); + assert(!hashmap_set(map, &vals[i])); + assert(map->count == i+1); + assert(map->count == hashmap_count(map)); + assert(map->count == deepcount(map)); + } + + int *vals2; + while (!(vals2 = xmalloc(N * sizeof(int)))) {} + memset(vals2, 0, N * sizeof(int)); + assert(hashmap_scan(map, iter_ints, &vals2)); + for (int i = 0; i < N; i++) { + assert(vals2[i] == 1); + } + xfree(vals2); + + shuffle(vals, N, sizeof(int)); + for (int i = 0; i < N; i++) { + int *v; + v = hashmap_delete(map, &vals[i]); + assert(v && *v == vals[i]); + assert(!hashmap_get(map, &vals[i])); + assert(map->count == N-i-1); + assert(map->count == hashmap_count(map)); + assert(map->count == deepcount(map)); + for (int j = N-1; j > i; j--) { + v = hashmap_get(map, &vals[j]); + assert(v && *v == vals[j]); + } + } + + for (int i = 0; i < N; i++) { + while (true) { + assert(!hashmap_set(map, &vals[i])); + if (!hashmap_oom(map)) { + break; + } + } + } + + assert(map->count != 0); + size_t prev_cap = map->cap; + hashmap_clear(map, true); + assert(prev_cap < map->cap); + assert(map->count == 0); + + + for (int i = 0; i < N; i++) { + while (true) { + assert(!hashmap_set(map, &vals[i])); + if (!hashmap_oom(map)) { + break; + } + } + } + + prev_cap = map->cap; + hashmap_clear(map, false); + assert(prev_cap == map->cap); + + hashmap_free(map); + + xfree(vals); + + + while (!(map = hashmap_new(sizeof(char*), 0, seed, seed, + hash_str, compare_strs, free_str, NULL))); + + for (int i = 0; i < N; i++) { + char *str; + while (!(str = xmalloc(16))); + sprintf(str, "s%i", i); + while(!hashmap_set(map, &str)); + } + + hashmap_clear(map, false); + assert(hashmap_count(map) == 0); + + for (int i = 0; i < N; i++) { + char *str; + while (!(str = xmalloc(16))); + sprintf(str, "s%i", i); + while(!hashmap_set(map, &str)); + } + + hashmap_free(map); + + if (total_allocs != 0) { + fprintf(stderr, "total_allocs: expected 0, got %lu\n", total_allocs); + exit(1); + } +} + +#define bench(name, N, code) {{ \ + if (strlen(name) > 0) { \ + printf("%-14s ", name); \ + } \ + size_t tmem = total_mem; \ + size_t tallocs = total_allocs; \ + uint64_t bytes = 0; \ + clock_t begin = clock(); \ + for (int i = 0; i < N; i++) { \ + (code); \ + } \ + clock_t end = clock(); \ + double elapsed_secs = (double)(end - begin) / CLOCKS_PER_SEC; \ + double bytes_sec = (double)bytes/elapsed_secs; \ + printf("%d ops in %.3f secs, %.0f ns/op, %.0f op/sec", \ + N, elapsed_secs, \ + elapsed_secs/(double)N*1e9, \ + (double)N/elapsed_secs \ + ); \ + if (bytes > 0) { \ + printf(", %.1f GB/sec", bytes_sec/1024/1024/1024); \ + } \ + if (total_mem > tmem) { \ + size_t used_mem = total_mem-tmem; \ + printf(", %.2f bytes/op", (double)used_mem/N); \ + } \ + if (total_allocs > tallocs) { \ + size_t used_allocs = total_allocs-tallocs; \ + printf(", %.2f allocs/op", (double)used_allocs/N); \ + } \ + printf("\n"); \ +}} + +static void benchmarks() { + int seed = getenv("SEED")?atoi(getenv("SEED")):time(NULL); + int N = getenv("N")?atoi(getenv("N")):5000000; + printf("seed=%d, count=%d, item_size=%zu\n", seed, N, sizeof(int)); + srand(seed); + + + int *vals = xmalloc(N * sizeof(int)); + for (int i = 0; i < N; i++) { + vals[i] = i; + } + + shuffle(vals, N, sizeof(int)); + + struct hashmap *map; + shuffle(vals, N, sizeof(int)); + + map = hashmap_new(sizeof(int), 0, seed, seed, hash_int, compare_ints_udata, + NULL, NULL); + bench("set", N, { + int *v = hashmap_set(map, &vals[i]); + assert(!v); + }) + shuffle(vals, N, sizeof(int)); + bench("get", N, { + int *v = hashmap_get(map, &vals[i]); + assert(v && *v == vals[i]); + }) + shuffle(vals, N, sizeof(int)); + bench("delete", N, { + int *v = hashmap_delete(map, &vals[i]); + assert(v && *v == vals[i]); + }) + hashmap_free(map); + + map = hashmap_new(sizeof(int), N, seed, seed, hash_int, compare_ints_udata, + NULL, NULL); + bench("set (cap)", N, { + int *v = hashmap_set(map, &vals[i]); + assert(!v); + }) + shuffle(vals, N, sizeof(int)); + bench("get (cap)", N, { + int *v = hashmap_get(map, &vals[i]); + assert(v && *v == vals[i]); + }) + shuffle(vals, N, sizeof(int)); + bench("delete (cap)" , N, { + int *v = hashmap_delete(map, &vals[i]); + assert(v && *v == vals[i]); + }) + + hashmap_free(map); + + + xfree(vals); + + if (total_allocs != 0) { + fprintf(stderr, "total_allocs: expected 0, got %lu\n", total_allocs); + exit(1); + } +} + +int main() { + hashmap_set_allocator(xmalloc, xfree); + + if (getenv("BENCH")) { + printf("Running hashmap.c benchmarks...\n"); + benchmarks(); + } else { + printf("Running hashmap.c tests...\n"); + all(); + printf("PASSED\n"); + } +} + + +#endif + + + |