247 lines
7.0 KiB
C++
247 lines
7.0 KiB
C++
|
/*
|
||
|
* Copyright (c) 2010-2013 BitTorrent, Inc.
|
||
|
*
|
||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||
|
* of this software and associated documentation files (the "Software"), to deal
|
||
|
* in the Software without restriction, including without limitation the rights
|
||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||
|
* copies of the Software, and to permit persons to whom the Software is
|
||
|
* furnished to do so, subject to the following conditions:
|
||
|
*
|
||
|
* The above copyright notice and this permission notice shall be included in
|
||
|
* all copies or substantial portions of the Software.
|
||
|
*
|
||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||
|
* THE SOFTWARE.
|
||
|
*/
|
||
|
|
||
|
#include "utp_hash.h"
|
||
|
#include "utp_types.h"
|
||
|
|
||
|
#define LIBUTP_HASH_UNUSED ((utp_link_t)-1)
|
||
|
|
||
|
#ifdef STRICT_ALIGN
|
||
|
inline uint32 Read32(const void *p)
|
||
|
{
|
||
|
uint32 tmp;
|
||
|
memcpy(&tmp, p, sizeof tmp);
|
||
|
return tmp;
|
||
|
}
|
||
|
|
||
|
#else
|
||
|
inline uint32 Read32(const void *p) { return *(uint32*)p; }
|
||
|
#endif
|
||
|
|
||
|
|
||
|
// Get the amount of memory required for the hash parameters and the bucket set
|
||
|
// Waste a space for an unused bucket in order to ensure the following managed memory have 32-bit aligned addresses
|
||
|
// TODO: make this 64-bit clean
|
||
|
#define BASE_SIZE(bc) (sizeof(utp_hash_t) + sizeof(utp_link_t) * ((bc) + 1))
|
||
|
|
||
|
// Get a pointer to the base of the structure array managed by the hash table
|
||
|
#define get_bep(h) ((byte*)(h)) + BASE_SIZE((h)->N)
|
||
|
|
||
|
// Get the address of the information associated with a specific structure in the array,
|
||
|
// given the address of the base of the structure.
|
||
|
// This assumes a utp_link_t link member is at the end of the structure.
|
||
|
// Given compilers filling out the memory to a 32-bit clean value, this may mean that
|
||
|
// the location named in the structure may not be the location actually used by the hash table,
|
||
|
// since the compiler may have padded the end of the structure with 2 bytes after the utp_link_t member.
|
||
|
// TODO: this macro should not require that the variable pointing at the hash table be named 'hash'
|
||
|
#define ptr_to_link(p) (utp_link_t *) (((byte *) (p)) + hash->E - sizeof(utp_link_t))
|
||
|
|
||
|
// Calculate how much to allocate for a hash table with bucket count, total size, and structure count
|
||
|
// TODO: make this 64-bit clean
|
||
|
#define ALLOCATION_SIZE(bc, ts, sc) (BASE_SIZE((bc)) + (ts) * (sc))
|
||
|
|
||
|
utp_hash_t *utp_hash_create(int N, int key_size, int total_size, int initial, utp_hash_compute_t hashfun, utp_hash_equal_t compfun)
|
||
|
{
|
||
|
// Must have odd number of hash buckets (prime number is best)
|
||
|
assert(N % 2);
|
||
|
// Ensure structures will be at aligned memory addresses
|
||
|
// TODO: make this 64-bit clean
|
||
|
assert(0 == (total_size % 4));
|
||
|
|
||
|
int size = ALLOCATION_SIZE(N, total_size, initial);
|
||
|
utp_hash_t *hash = (utp_hash_t *) malloc( size );
|
||
|
memset( hash, 0, size );
|
||
|
|
||
|
for (int i = 0; i < N + 1; ++i)
|
||
|
hash->inits[i] = LIBUTP_HASH_UNUSED;
|
||
|
hash->N = N;
|
||
|
hash->K = key_size;
|
||
|
hash->E = total_size;
|
||
|
hash->hash_compute = hashfun;
|
||
|
hash->hash_equal = compfun;
|
||
|
hash->allocated = initial;
|
||
|
hash->count = 0;
|
||
|
hash->used = 0;
|
||
|
hash->free = LIBUTP_HASH_UNUSED;
|
||
|
return hash;
|
||
|
}
|
||
|
|
||
|
uint utp_hash_mem(const void *keyp, size_t keysize)
|
||
|
{
|
||
|
uint hash = 0;
|
||
|
uint n = keysize;
|
||
|
while (n >= 4) {
|
||
|
hash ^= Read32(keyp);
|
||
|
keyp = (byte*)keyp + sizeof(uint32);
|
||
|
hash = (hash << 13) | (hash >> 19);
|
||
|
n -= 4;
|
||
|
}
|
||
|
while (n != 0) {
|
||
|
hash ^= *(byte*)keyp;
|
||
|
keyp = (byte*)keyp + sizeof(byte);
|
||
|
hash = (hash << 8) | (hash >> 24);
|
||
|
n--;
|
||
|
}
|
||
|
return hash;
|
||
|
}
|
||
|
|
||
|
uint utp_hash_mkidx(utp_hash_t *hash, const void *keyp)
|
||
|
{
|
||
|
// Generate a key from the hash
|
||
|
return hash->hash_compute(keyp, hash->K) % hash->N;
|
||
|
}
|
||
|
|
||
|
static inline bool compare(byte *a, byte *b,int n)
|
||
|
{
|
||
|
assert(n >= 4);
|
||
|
if (Read32(a) != Read32(b)) return false;
|
||
|
return memcmp(a+4, b+4, n-4) == 0;
|
||
|
}
|
||
|
|
||
|
#define COMPARE(h,k1,k2,ks) (((h)->hash_equal) ? (h)->hash_equal((void*)k1,(void*)k2,ks) : compare(k1,k2,ks))
|
||
|
|
||
|
// Look-up a key in the hash table.
|
||
|
// Returns NULL if not found
|
||
|
void *utp_hash_lookup(utp_hash_t *hash, const void *key)
|
||
|
{
|
||
|
utp_link_t idx = utp_hash_mkidx(hash, key);
|
||
|
|
||
|
// base pointer
|
||
|
byte *bep = get_bep(hash);
|
||
|
|
||
|
utp_link_t cur = hash->inits[idx];
|
||
|
while (cur != LIBUTP_HASH_UNUSED) {
|
||
|
byte *key2 = bep + (cur * hash->E);
|
||
|
if (COMPARE(hash, (byte*)key, key2, hash->K))
|
||
|
return key2;
|
||
|
cur = *ptr_to_link(key2);
|
||
|
}
|
||
|
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
// Add a new element to the hash table.
|
||
|
// Returns a pointer to the new element.
|
||
|
// This assumes the element is not already present!
|
||
|
void *utp_hash_add(utp_hash_t **hashp, const void *key)
|
||
|
{
|
||
|
//Allocate a new entry
|
||
|
byte *elemp;
|
||
|
utp_link_t elem;
|
||
|
utp_hash_t *hash = *hashp;
|
||
|
utp_link_t idx = utp_hash_mkidx(hash, key);
|
||
|
|
||
|
if ((elem=hash->free) == LIBUTP_HASH_UNUSED) {
|
||
|
utp_link_t all = hash->allocated;
|
||
|
if (hash->used == all) {
|
||
|
utp_hash_t *nhash;
|
||
|
if (all <= (LIBUTP_HASH_UNUSED/2)) {
|
||
|
all *= 2;
|
||
|
} else if (all != LIBUTP_HASH_UNUSED) {
|
||
|
all = LIBUTP_HASH_UNUSED;
|
||
|
} else {
|
||
|
// too many items! can't grow!
|
||
|
assert(0);
|
||
|
return NULL;
|
||
|
}
|
||
|
// otherwise need to allocate.
|
||
|
nhash = (utp_hash_t*)realloc(hash, ALLOCATION_SIZE(hash->N, hash->E, all));
|
||
|
if (!nhash) {
|
||
|
// out of memory (or too big to allocate)
|
||
|
assert(nhash);
|
||
|
return NULL;
|
||
|
}
|
||
|
hash = *hashp = nhash;
|
||
|
hash->allocated = all;
|
||
|
}
|
||
|
|
||
|
elem = hash->used++;
|
||
|
elemp = get_bep(hash) + elem * hash->E;
|
||
|
} else {
|
||
|
elemp = get_bep(hash) + elem * hash->E;
|
||
|
hash->free = *ptr_to_link(elemp);
|
||
|
}
|
||
|
|
||
|
*ptr_to_link(elemp) = hash->inits[idx];
|
||
|
hash->inits[idx] = elem;
|
||
|
hash->count++;
|
||
|
|
||
|
// copy key into it
|
||
|
memcpy(elemp, key, hash->K);
|
||
|
return elemp;
|
||
|
}
|
||
|
|
||
|
// Delete an element from the utp_hash_t
|
||
|
// Returns a pointer to the already deleted element.
|
||
|
void *utp_hash_del(utp_hash_t *hash, const void *key)
|
||
|
{
|
||
|
utp_link_t idx = utp_hash_mkidx(hash, key);
|
||
|
|
||
|
// base pointer
|
||
|
byte *bep = get_bep(hash);
|
||
|
|
||
|
utp_link_t *curp = &hash->inits[idx];
|
||
|
utp_link_t cur;
|
||
|
while ((cur=*curp) != LIBUTP_HASH_UNUSED) {
|
||
|
byte *key2 = bep + (cur * hash->E);
|
||
|
if (COMPARE(hash,(byte*)key,(byte*)key2, hash->K )) {
|
||
|
// found an item that matched. unlink it
|
||
|
*curp = *ptr_to_link(key2);
|
||
|
// Insert into freelist
|
||
|
*ptr_to_link(key2) = hash->free;
|
||
|
hash->free = cur;
|
||
|
hash->count--;
|
||
|
return key2;
|
||
|
}
|
||
|
curp = ptr_to_link(key2);
|
||
|
}
|
||
|
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
void *utp_hash_iterate(utp_hash_t *hash, utp_hash_iterator_t *iter)
|
||
|
{
|
||
|
utp_link_t elem;
|
||
|
|
||
|
if ((elem=iter->elem) == LIBUTP_HASH_UNUSED) {
|
||
|
// Find a bucket with an element
|
||
|
utp_link_t buck = iter->bucket + 1;
|
||
|
for(;;) {
|
||
|
if (buck >= hash->N)
|
||
|
return NULL;
|
||
|
if ((elem = hash->inits[buck]) != LIBUTP_HASH_UNUSED)
|
||
|
break;
|
||
|
buck++;
|
||
|
}
|
||
|
iter->bucket = buck;
|
||
|
}
|
||
|
|
||
|
byte *elemp = get_bep(hash) + (elem * hash->E);
|
||
|
iter->elem = *ptr_to_link(elemp);
|
||
|
return elemp;
|
||
|
}
|
||
|
|
||
|
void utp_hash_free_mem(utp_hash_t* hash)
|
||
|
{
|
||
|
free(hash);
|
||
|
}
|