一個hash表的實現

simpleman7210發表於2013-07-28

按照先前的設想，寫了一個一般性的hash表。為了支援各種型別，這個hash表寫為模板。

Hashtable模板類

#pragma once

#include "util.h"

//預設情況下，MyComparer以及MyHasher可以工作於一般型別，特別是基本資料型別
//必須遵守一致性：兩個key若是相等，其hashCode也一定相同。
template<class T>
class MyComparer
{
public:
    bool equals(const T& key1, const T& key2)
    {
        return key1 == key2;
    }
};

template <class T>
class MyHasher
{
public:
    int hashCode(const T& t)
    {
        return (int)t;
    }
};

template<class K, class V>
class HashtableEntry
{
public:
K key;
V value;
int hash;
HashtableEntry<K,V> *next;
};

//Hashtable的實現參考了java.util.HashMap以及其它一些實現（如MFC的CMap）。
//我希望這個Hashtable是個一般性的實現，Key可以為各種型別（需要實現相應Hash函式）。
//暫時可能不考慮Allocator以及一些優化，比如，批量申請entries，被刪除的entries重新利用。
template <class K, class V,
        class Comparer = MyComparer<K>, 
        class Hasher = MyHasher<K> >
class Hashtable
{
public:
//預設Hashtable大小
static const int DEFAULT_TABLE_SIZE = 16;
//預設負載因子（loadfactor=n/m，其中n為(K,V)對的數量，m為表的大小）
//static const float DEFAULT_LOAD_FACTOR = 0.75f;   //VC can not compile!

Hashtable();
Hashtable(int tableSize);
virtual ~Hashtable();

//查詢指定的key是否存在
bool find(K key);
//根據key查詢value，若未找到，返回false
bool get(K key, V& value);
//將鍵值對存入hash表
void put(K key, V value);
bool put(K key, V value, V& oldValue);
//根據key刪除相應的項
bool remove(K key);
//刪除所有鍵值對（但表的大小不變） 
void clear();
//按照給定的表大小重新hash
void rehash(int newTableSize);
//是否允許Hash表的大小自動增長，預設為true。此函式並不立即導致rehash。
//為true的情況下，當負載超過給定閥值時，就增大Hash表（表的大小翻倍）
void setAutogrow(bool autogrow, float loadFactor=0.75f);
int size() const { return _size; }
int getTableSize() const { return _tableSize; }
void print();

protected:
//使用了Comparer和Hasher之後，不再使用虛擬函式方式
//virtual int getHashCode(const K& key) = 0;
//virtual bool keyEquals(const K& key1, const K& key2) = 0;

private:
int _tableSize; //表大小
float _loadFactor;
bool _autogrow;
int _size;  //鍵值對的數量
int _threshold;
HashtableEntry<K, V> **_table;
Comparer _comparer;
Hasher _hasher;

void initHashtable(int tableSize);
int hashIndex(int hash, int tableSize);
};

template<class K, class V, class Comparer, class Hasher>
Hashtable<K,V,Comparer,Hasher>::Hashtable()
{
    //不能直接呼叫另一個建構函式，那會產生臨時物件並在其上構造
    //Hashtable(16);
    initHashtable(DEFAULT_TABLE_SIZE);
}

template<class K, class V, class Comparer, class Hasher>
Hashtable<K,V,Comparer,Hasher>::Hashtable(int tableSize)
{
    initHashtable(tableSize);
}

template<class K, class V, class Comparer, class Hasher>
void Hashtable<K,V,Comparer,Hasher>::initHashtable(int tableSize)
{
    assert_exception(tableSize > 0, "bad table size");
    _tableSize = tableSize;
    _loadFactor = 0.75f;    //DEFAULT_LOAD_FACTOR
    _autogrow = true;
    _threshold = (int)(_tableSize * _loadFactor);
    _size = 0;
    _table = new HashtableEntry<K, V> * [_tableSize];
    assert_exception(_table != NULL, "out of memory");
    //memset更快，但下面可讀性更好
    for (int i = 0; i < _tableSize; i++) {
        _table[i] = NULL;
    }
}

template<class K, class V, class Comparer, class Hasher>
void Hashtable<K,V,Comparer,Hasher>::setAutogrow(bool autogrow, float loadFactor)
{
    _autogrow = autogrow;
    _loadFactor = loadFactor;
    if (_autogrow) {
        _threshold = (int)(_tableSize * _loadFactor);
    }
}

template<class K, class V, class Comparer, class Hasher>
Hashtable<K,V,Comparer,Hasher>::~Hashtable()
{
    clear();
    delete [] _table;
}

template<class K, class V, class Comparer, class Hasher>
int Hashtable<K,V,Comparer,Hasher>::hashIndex(int hash, int tableSize)
{
    //index必須為正整數
    int index = (hash & 0x7fffffff) % tableSize;
    return index;
}

template<class K, class V, class Comparer, class Hasher>
bool Hashtable<K,V,Comparer,Hasher>::find(K key)
{
    int hash = _hasher.hashCode(key);
    int index = hashIndex(hash, _tableSize);
    HashtableEntry<K,V> * pEntry;
    for (pEntry = _table[index]; pEntry != NULL; pEntry = pEntry->next)
    {
        if (pEntry->hash == hash && _comparer.equals(key, pEntry->key))
        {
           return true;
        }
    }
    return false;
}

template<class K, class V, class Comparer, class Hasher>
bool Hashtable<K,V,Comparer,Hasher>::get(K key, V& value)
{
    int hash = _hasher.hashCode(key);
    int index = hashIndex(hash, _tableSize);
    HashtableEntry<K,V> * pEntry;
    for (pEntry = _table[index]; pEntry != NULL; pEntry = pEntry->next)
    {
        if (pEntry->hash == hash && _comparer.equals(key, pEntry->key))
        {
            value = pEntry->value;
            return true;
        }
    }
    return false;
}

template<class K, class V, class Comparer, class Hasher>
void Hashtable<K,V,Comparer,Hasher>::put(K key, V value)
{
    int hash = _hasher.hashCode(key);
    int index = hashIndex(hash, _tableSize);
    HashtableEntry<K,V> * pEntry;
    for (pEntry = _table[index]; pEntry != NULL; pEntry = pEntry->next)
    {
        if (pEntry->hash == hash && _comparer.equals(key, pEntry->key))
        {
            pEntry->value = value;
            return;
        }
    }
    pEntry = new HashtableEntry<K,V> ();
    assert_exception(pEntry != NULL, "new failed(out of memory?)");
    pEntry->key = key;
    pEntry->value = value;
    pEntry->hash = hash;
    pEntry->next = _table[index];
    _table[index] = pEntry;
    _size++;
    if (_autogrow && _size >= _threshold) {
        rehash(_tableSize * 2);
    }
}

template<class K, class V, class Comparer, class Hasher>
void Hashtable<K,V,Comparer,Hasher>::rehash(int newTableSize)
{
    assert_exception(newTableSize > 0, "bad rehash size");
    HashtableEntry<K,V> ** newTable = new HashtableEntry<K,V> * [newTableSize];
    assert_exception(newTable != NULL, "out of memory");
    
    for (int i = 0; i < newTableSize; i++) {
        newTable[i] = NULL;
    }
    //transfer from the old table to the new
    for (int index = 0; index < _tableSize; index++)
    {
        HashtableEntry<K, V> * pEntry;
        int newIndex;
        for (pEntry = _table[index]; pEntry != NULL; )
        {
            HashtableEntry<K, V> * pNextEntry = pEntry->next;
            newIndex = hashIndex(pEntry->hash, newTableSize);
            pEntry->next = newTable[newIndex];
            newTable[newIndex] = pEntry;
            pEntry = pNextEntry;
        }
    }
    delete [] _table;
    _table = newTable;
    _tableSize = newTableSize;
    _threshold = (int)(_tableSize * _loadFactor);
}

template<class K, class V, class Comparer, class Hasher>
bool Hashtable<K,V,Comparer,Hasher>::remove(K key)
{
    int hash = _hasher.hashCode(key);
    int index = hashIndex(hash, _tableSize);
    HashtableEntry<K,V> ** ppEntry = &(_table[index]);
    HashtableEntry<K,V> * pEntry;
    for (pEntry = *ppEntry; pEntry != NULL; pEntry = pEntry->next)
    {
        if (pEntry->hash == hash && _comparer.equals(key, pEntry->key))
        {
            *ppEntry = pEntry->next;
            delete pEntry;
            _size--;
            return true;
        }
        ppEntry = &(pEntry->next);
    }
    return false;
}

template<class K, class V, class Comparer, class Hasher>
void Hashtable<K,V,Comparer,Hasher>::clear()
{
    for (int index = 0; index < _tableSize; index++)
    {
        HashtableEntry<K, V> * pEntry;
        for (pEntry = _table[index]; pEntry != NULL; )
        {
             HashtableEntry<K, V> * pNextEntry = pEntry->next;
             delete pEntry;
             pEntry = pNextEntry;
        }
       _table[index] = NULL;
   }
   _size = 0;
}

template<class K, class V, class Comparer, class Hasher>
void Hashtable<K,V,Comparer,Hasher>::print()
{
    printf("Hashtable tableSize=%d, size=%d, loadFactor=%f\n",
        _tableSize, _size, _loadFactor);
}

可以寫一些程式碼來測試它。比如：

void testHashtable()
{
    Hashtable<int,float> mapInt2Float;
    int k1=1,k2=2;
    float f1=0.5f,f2=0.6f;
    mapInt2Float.put(k1,f1);
    mapInt2Float.put(k2,f2);
    mapInt2Float.put(17,f1);
    mapInt2Float.put(17,0.8f);
    mapInt2Float.print();

    Hashtable<int,int> mapInt2Int;
    mapInt2Int.setAutogrow(true, 60);
    for (int i = 0; i< 100000; i++)
    {
        mapInt2Int.put(i,i);
    }
    for (int i = 0; i < 100000;i++)
    {
        int x;
        if (!mapInt2Int.get(i,x) || x != i) {
            char *msg = "error";
        }
    }
}

關於符號表：可以實現為Key為String型別的hash表。我也寫了一個String類，如下。

String.h

#pragma once

//支援異常(Exception)之後，異常需要String

class String
{
public:
String(void);
String(const char *str);
String(const char *str, int len);
String(const String& strObj);
String& operator = (const String& strObj);
String& operator = (const char *str);
~String(void);
//注意：不要在臨時物件上呼叫const char *轉換
//因為臨時物件析構之後，const char *所指向的String資料已經被釋放
const char *cstr() const;
//過載const char *
operator const char *() const;
bool operator == (const String& strObj) const;
int length() const;

private:
//The internal string data, may be shared between String objects.
//The internal data layout as following:
//struct {
//int refCount;   //reference count
//char data[];
//};
char *_refData;
int _length;
static char _empty;

void initCopyString(const char *str, int len);

};

String.cpp

#include "String.h"
#include <stdio.h>
#include <string.h>

char String::_empty = 0;

String::String(void)
{
    _refData = NULL;
    _length = 0;
}

String::String(const char *str)
{
    int len = strlen(str);
    initCopyString(str, len);
}

String::String(const char *str, int len)
{
    initCopyString(str, len);
}

void String::initCopyString(const char *str, int len)
{
    //allocates memory to hold the string, include terminal null character.
    _refData = new char [sizeof(int) + len + 1];
    if (_refData != NULL)
    {
        int *pRefCount = (int *)_refData;
        *pRefCount = 1;
        memcpy(_refData + sizeof(int), str, len);
        _refData[sizeof(int) + len]='\0';
        _length = len;
    }
}

//copy-constructor
String::String(const String& strObj)
{
    _refData = strObj._refData;
    _length = strObj._length;

    if (_refData != NULL) {
        int *pRefCount = (int *)_refData;
        (*pRefCount)++;
    }
}

//assign
String& String::operator = (const String& strObj)
{
    if (this == &strObj) {
        return *this;
    }

    if (_refData != NULL) {
        int *pRefCount = (int *)_refData;
        (*pRefCount)--;
        if (*pRefCount == 0) {
            delete [] _refData;
        }
    }

    _refData = strObj._refData;
    _length = strObj._length;
    if (_refData != NULL) {
        int *pRefCount = (int *)_refData;
        (*pRefCount)++;
    }

    return *this;
}

String& String::operator = (const char *str)
{
    if (_refData != NULL) {
        int *pRefCount = (int *)_refData;
        (*pRefCount)--;
        if (*pRefCount == 0) {
            char *data = _refData + sizeof(int);
            if (str == data) {    //assign to self ?
                return *this;
            }
            delete [] _refData;
        }
    }

    int len = strlen(str);
    initCopyString(str, len);
    return (*this);
}

String::~String(void)
{
    if (_refData != NULL) {
        int *pRefCount = (int *)_refData;
        (*pRefCount)--;
        if (*pRefCount == 0) {
            delete [] _refData;
        }
    }
}

const char * String::cstr() const
{
    if (_refData == NULL) {
        return &_empty;
    }

    char *p = _refData + sizeof(int);
    return p;
}

int String::length() const {
    return _length;
}

String::operator const char * () const
{
    return cstr();
}

bool String::operator == (const String& strObj) const
{
    if (this == &strObj) {
        return true;
    }
    if (_length != strObj._length) {
        return false;
    }
    if (_length == 0) {
        return true;
    }
    const char *cstr1 = cstr();
    const char *cstr2 = strObj.cstr();
    if (cstr1 == cstr2) {
        return true;
    }
    for (int i = 0; i < _length; i++) {
        if (*cstr1++ != *cstr2++) {
            return false;
        }
    }
    return true;
}

有了String物件之後，我們可以把符號表實現為Hashtable<String,X>，其中X為其它型別。對於String型別，需要實現Comparer和Hasher。例如：

class StringComparer
{
public:
    bool equals(const String& key1, const String& key2)
    {
        return (key1 == key2);
    }
};

class StringHasher
{
public:
    //參考java.lang.String的hash計算方法
    int hashCode(const String& t)
    {
        int hash = 0;
        const char * str = t.cstr();
        int length = t.length();
        for (int i = 0; i< length; i++)
        {
            hash = 31*hash + str[i];
        }
        return hash;
    }
};

void testHashtable()
{
    Hashtable<String,int,StringComparer,StringHasher> mapStr2Int;
    mapStr2Int.put("abc",1);
    mapStr2Int.put("def",2);
    mapStr2Int.put("kkk",3);
    int x,y,z;
    mapStr2Int.get("abc",x);
    mapStr2Int.get("def",y);
    mapStr2Int.get("kk",z);

}

（未盡之處）符號表與hash表：

1。符號是否儲存在一個永久區內？
假如符號不會被刪除，就可以考慮儲存於永久區內。
虛擬機器的永久區可以用一個大陣列或者陣列的連結串列來實現。

2。Allocator，Hash元素空間的批量申請，以及元素空間的重複利用
Allocator，就是使用特定的記憶體分配器，通常是為了效能自定義的記憶體分配器，而不用預設的new/delete。
當往Hash表插入一個元素的時候，可以考慮一次批量申請元素空間，這樣不必每次插入元素的時候都申請空間。
元素空間的重複利用，是指被刪除的元素，其空間不要立即釋放，而是放回一個freelist中，下次插入元素的時候，可以從freelist中重新拿來使用，這樣避免了新申請記憶體。
這些做法通常都是為了提高效能。

[CareerCup] 8.10 Implement a Hash Table 實現一個雜湊表
2015-09-16
hash 表在 go 語言中的實現
2021-04-16
Go
MYSQL INNODB中hash查詢表的實現
2017-07-05
MySql
C語言實現一個簡易的Hash table(7)
2019-02-03
C語言
自己實現一個一致性 Hash 演算法
2019-03-04
演算法
基於list_head實現的通用核心Hash表
2016-01-01
一致性Hash的原理與實現
2022-04-10
flinkSql join redis的hash結構維表簡單實現
2020-11-20
SQLRedis
仿 ElmentUI 實現一個 Form 表單
2019-04-22
UIORM
【策略】一致性Hash演算法（Hash環）的java程式碼實現
2017-05-16
演算法Java
一致性hash的c++簡單實現
2017-01-10
C++
一文搞懂一致性 hash 的原理和實現
2021-07-20
一文搞懂一致性hash的原理和實現
2021-07-20
前端 JS 原生 javascript 和 location.hash 實現一個單頁應用的路由 router
2021-04-17
前端JSJavaScript路由
【閱讀筆記：雜湊表】Javascript任何物件都是一個雜湊表（hash表）！
2019-07-04
筆記JavaScript物件
強一致性hash實現java版本及強一致性hash原理
2018-08-14
Java
7、域滲透——Pass The Hash的實現
2018-06-11
【NinGoo】用Perl的hash陣列實現個性化監控
2008-06-07
Go陣列
【江楓】用Perl的hash陣列實現個性化監控
2008-06-07
陣列
perl 陣列的hash表
2013-05-10
陣列
如何在 web 端實現一個有日曆的報表
2020-06-22
Web
用hash cluster表提高查詢效能 (一)
2019-05-05
基於react的hash路由簡易實現
2019-04-09
React路由
手動實現一致性 Hash 演算法
2019-03-21
演算法
雜湊表(Hash)的應用
2014-12-16
實現一個自己的mvvm
2019-09-04
MVVM
實現一個jQuery的API
2018-07-16
jQueryAPI
實現一個完整的promise
2018-06-21
Promise
從零實現一個Vue表單驗證外掛
2019-03-04
Vue
如何實現一個跨庫連表SQL生成器？
2020-09-08
SQL
一致性hash演算法原理及go實現
2018-04-25
演算法Go
論如何用Vue實現一個彈窗-一個簡單的元件實現
2019-02-16
Vue元件
資料庫實現原理#4（Hash Join）
2020-04-13
資料庫
redis 雙寫實現策略 && hash取模
2019-03-02
Redis
consistent hash 原理，優化及實現
2018-11-26
優化
基於BKDRhash實現Hash演算法
2014-09-12
演算法
實現一個promise
2018-12-11
Promise
實現一個 Swiper
2017-07-17

一個hash表的實現

相關文章