演算法學習-雜湊表

HPhone發表於2014-01-26

    之前在大學學習資料結構的時候,學過陣列和連結串列。陣列的優點就是可以直接定位,速度快,但是缺點就是插入刪除,效率就慢了很多。連結串列的可以快速的插入刪除,但是不能直接定位,需要遍歷才可以。他們使用在不同的場景下面。

    有時候,又想快速的插入,又想快速的定位怎麼辦?這就需要把這兩者優點糅合起來,雜湊表就這麼產生了。

    雜湊表的原理:一個動態陣列,儲存著元素的地址。通過一個公式,把元素的key(唯一),算出一個數值index。陣列index位置就儲存這個元素的指標。

    這樣存資料以及取資料,都可以通過這個公式,得到同一個位置。這個公式的目的,就是將key和陣列的位置對映起來,當然沒有任何的一個公式能夠保證算出來的key和陣列位置是一一對應的,也就是說,多個key算出來的index結果是同一個,這就是雜湊衝突,稍後具體怎麼解決它。

    整個雜湊表,最重要的就是這個對映公式,不但能夠將字串變成數值,而且能夠平均分佈。最常用的就是Times33演算法

inline UINT CMyMap::HashKey(LPCTSTR key) const

{

UINT nHash = 5381;

while (*key)

nHash = (nHash << 5) + nHash + *key++;

return nHash;

}

5381是一個經驗值,php就是使用這個值。

下面就是我寫的程式碼。

 

  1 #pragma once
  2 #include "Bucket.h"
  3 class HashTable
  4 {
  5 public:
  6     HashTable(void);
  7     ~HashTable(void);
  8     static unsigned long hash_inline(const char* arKey,unsigned int nKeyLength);
  9     bool add(const char* arKey,void* value);
 10     void* getData(const char* arKey);
 11     bool remove(const char* arKey);
 12     bool resize(unsigned int nSize);
 13     unsigned int nTableSize;
 14     unsigned int nTableMask;
 15     unsigned int nNumOfElements;
 16     Bucket *pBucketCursor;
 17     Bucket **arBuckets;
 18     Bucket *pListHead;
 19     Bucket *pListTail;
 20 private:
 21     void init();
 22 };
 23 #include "stdafx.h"
 24 #include "HashTable.h"
 25 #include <iostream>
 26 #include <string.h>
 27 using namespace std;
 28 typedef unsigned long ulong;
 29 typedef unsigned int uint;
 30 
 31 HashTable::HashTable(void)
 32 {
 33     init();
 34 }
 35 
 36 HashTable::~HashTable(void)
 37 {
 38     delete [] arBuckets;
 39 }
 40 
 41 ulong HashTable::hash_inline( const char* arKey,uint nKeyLength )
 42 {
 43     register ulong hash=5381;
 44 //     for (; nKeyLength >= 8; nKeyLength -= 8)
 45 //     {
 46 //         const char *arKeyTemp=arKey;
 47 //         for(int i=0;i!=8;++i){
 48 //             hash = ((hash << 5) + hash) + *arKey++;
 49 //             if(*arKey){
 50 //                 arKey=arKeyTemp;
 51 //             }
 52 //         }    
 53 // 
 54 //     }
 55 //     switch (nKeyLength) 
 56 //     {
 57 //         case 7: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */
 58 //         case 6: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */
 59 //         case 5: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */
 60 //         case 4: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */
 61 //         case 3: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */
 62 //         case 2: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */
 63 //         case 1: hash = ((hash << 5) + hash) + *arKey++; break;
 64 //         case 0: break;
 65 //     }
 66     while (*arKey)
 67     {
 68         hash=(hash<<5)+hash+*arKey++;
 69     }
 70     return hash;
 71 }
 72 
 73 void HashTable::init()
 74 {
 75     nTableSize=8;
 76     nTableMask=nTableSize-1;
 77     arBuckets=new Bucket*[nTableSize]();
 78     nNumOfElements=0;
 79 }
 80 
 81 bool HashTable::add(const char* arKey,void* value )
 82 {
 83     ulong h=hash_inline(arKey,nTableSize);
 84     uint nIndex=nTableMask&h;
 85     Bucket *pbucket=arBuckets[nIndex];
 86     if (pbucket==NULL)
 87     {
 88         pbucket=new Bucket(arKey,value,h);
 89         arBuckets[nIndex]=pbucket;
 90         ++nNumOfElements;
 91         if(nNumOfElements==1)
 92         {
 93             pListHead=pbucket;
 94             pListTail=pbucket;
 95         }else{
 96             pbucket->pListPre=pListTail;
 97             pListTail->pListNext=pbucket;
 98             pListTail=pbucket;
 99         }
100         //cout<<"add key "<<pbucket->arKey<<endl;
101     }else{
102         if(strcmp(pbucket->arKey,arKey)==0){
103             //cout<<"key "<<arKey<<" is existed"<<endl;
104             return false;
105         }else{
106             Bucket *pNewBucket=new Bucket(arKey,value,h);
107             while (pbucket->pNext!=NULL)
108             {
109                 pbucket=pbucket->pNext;
110             }
111             pbucket->pNext=pNewBucket;
112             pNewBucket->pPre=pbucket;
113             pNewBucket->pListPre=pListTail;
114             pListTail->pListNext=pNewBucket;
115             pListTail=pNewBucket;
116             //cout<<"key "<<pbucket->arKey<<" next key is "<<pNewBucket->arKey<<endl;
117             ++nNumOfElements;
118         }
119     }
120     if(nNumOfElements>=nTableSize){
121         resize(nTableSize*2);
122     }
123     return true;
124 }
125 
126 bool HashTable::resize( unsigned int nSize )
127 {
128     Bucket **arPBucketTemp=new Bucket*[nSize]();
129     Bucket *pListHeadTemp=NULL;
130     Bucket *pListTailTemp=NULL;
131     nTableSize=nSize;
132     nTableMask=nTableSize-1;
133     Bucket *pBucketCursorTemp=pListHead;
134     uint nNumOfElementsTemp=0;
135     //cout<<"--------------rehash-----------------"<<endl;
136     //cout<<"resize size:"<<nSize<<endl;
137     while (pBucketCursorTemp!=NULL)
138     {
139         /*//cout<<"resize pBucket key:"<<pBucketCursorTemp->arKey<<endl;*/
140         Bucket *pbucket=pBucketCursorTemp;
141         pBucketCursorTemp=pBucketCursorTemp->pListNext;
142         ulong h=hash_inline(pbucket->arKey,nTableSize);
143         pbucket->nKeyLength=nTableSize;
144         pbucket->h=h;
145         uint nIndex=h&nTableMask;
146         Bucket *pbucketindex=arPBucketTemp[nIndex];
147         if (pbucketindex==NULL)
148         {    
149             arPBucketTemp[nIndex]=pbucket;
150             ++nNumOfElementsTemp;
151             if(nNumOfElementsTemp==1)
152             {
153                 pListHeadTemp=pbucket;
154                 pListTailTemp=pbucket;
155                 pbucket->pListPre=NULL;
156                 pbucket->pListNext=NULL;
157                 pbucket->pNext=NULL;
158                 pbucket->pPre=NULL;
159             }else{
160                 pbucket->pListPre=pListTailTemp;
161                 pbucket->pListNext=NULL;
162                 pbucket->pPre=NULL;
163                 pbucket->pNext=NULL;
164                 pListTailTemp->pListNext=pbucket;
165                 pListTailTemp=pbucket;
166             }
167         }else{
168             if(strcmp(pbucket->arKey,pbucketindex->arKey)==0){
169                 //cout<<"key "<<pbucket->arKey<<" is existed"<<endl;
170                 return false;
171             }else{
172                 while (pbucketindex->pNext!=NULL)
173                 {
174                     pbucketindex=pbucketindex->pNext;
175                 }
176                 pbucketindex->pNext=pbucket;
177                 pbucket->pPre=pbucketindex;
178                 pbucket->pNext=NULL;
179                 pbucket->pListPre=pListTailTemp;
180                 pbucket->pListNext=NULL;
181                 pListTailTemp->pListNext=pbucket;
182                 pListTailTemp=pbucket;
183                 /*//cout<<"key "<<pbucketindex->arKey<<" next key is "<<pbucketindex->pNext->arKey<<endl;*/
184                 ++nNumOfElementsTemp;
185             }
186         }
187         
188     }
189     delete [] arBuckets;
190     arBuckets=arPBucketTemp;
191     pListTail=pListTailTemp;
192     pListHead=pListHeadTemp;
193     nNumOfElements=nNumOfElementsTemp;
194     //cout<<"--------------rehash end-----------------"<<endl;
195     return false;
196 }
197 
198 void* HashTable::getData( const char* arKey )
199 {
200     ulong h=hash_inline(arKey,nTableSize);
201     ulong nIndex=h&nTableMask;
202     Bucket *pbucket=arBuckets[nIndex];
203     if(pbucket==NULL){
204         return NULL;
205     }else{
206         while (pbucket!=NULL)
207         {
208             if(strcmp(pbucket->arKey,arKey)==0){
209                 return pbucket->pData;
210             }else{
211                 pbucket=pbucket->pNext;
212             }
213         }
214         return NULL;
215     }
216 }
217 
218 bool HashTable::remove( const char* arKey )
219 {
220     ulong h=hash_inline(arKey,nTableSize);
221     ulong nIndex=h&nTableMask;
222     Bucket *pbucket=arBuckets[nIndex];
223     if(pbucket==NULL){
224         return false;
225     }else{
226         while (pbucket!=NULL)
227         {
228             if(strcmp(pbucket->arKey,arKey)==0){
229                 if(pbucket==pListHead){
230                     pListHead=pbucket->pListNext;
231                     if(pListHead!=NULL){
232                         pbucket->pListNext->pListPre=NULL;
233                     }
234                 }
235                 if(pbucket==pListTail){
236                     pListTail=pbucket->pListPre;
237                     if(pListTail!=NULL){
238                         pListTail->pListNext=NULL;
239                     }
240                 }
241                 if(pbucket->pListPre!=NULL){
242                     pbucket->pListPre->pListNext=pbucket->pListNext;
243                 }
244                 if(pbucket->pListNext!=NULL){
245                     pbucket->pListNext->pListPre=pbucket->pListPre;
246                 }
247                 if(pbucket->pPre!=NULL){
248                     pbucket->pPre->pNext=pbucket->pNext;
249                 }else{
250                     arBuckets[nIndex]=pbucket->pNext;
251                 }
252                 if(pbucket->pNext!=NULL){
253                     pbucket->pNext->pPre=pbucket->pPre;
254                 }
255                 --nNumOfElements;
256                 delete pbucket;
257                 return true;
258             }else{
259                 pbucket=pbucket->pNext;
260             }
261         }
262         return false;
263     }
264 }
HashTable.cpp

 

 1 #pragma once
 2 class Bucket
 3 {
 4 public:
 5     Bucket(const char* arKey,void *value,unsigned long h);
 6     ~Bucket(void);
 7     unsigned long h;
 8     unsigned int nKeyLength;
 9     void *pData;
10     const char *arKey;
11     Bucket *pNext;
12     Bucket *pPre;
13     Bucket *pListNext;
14     Bucket *pListPre;
15 };
16 #include "stdafx.h"
17 #include "Bucket.h"
18 
19 
20 Bucket::Bucket(const char* _arKey,void *value,unsigned long h):arKey(_arKey)
21 {
22     this->pData=value;
23     this->h=h;
24     this->pNext=NULL;
25     this->pPre=NULL;
26     this->pListNext=NULL;
27     this->pListPre=NULL;
28 }
29 
30 
31 Bucket::~Bucket(void)
32 {
33 }
Bucket.cpp

 

 

  1 // hashtables.cpp : 定義控制檯應用程式的入口點。
  2 //
  3 
  4 #include "stdafx.h"
  5 #include <iostream>
  6 #include "HashTable.h"
  7 using namespace std;
  8 
  9 int _tmain(int argc, _TCHAR* argv[])
 10 {
 11     HashTable ht;
 12     ht.add("name","max");
 13     ht.add("name","max");
 14     ht.add("name2","max2");
 15     ht.add("name3","max3");
 16     ht.add("name4","max4");
 17     ht.add("name5","max5");
 18     ht.add("name6","max6");
 19     ht.add("name7","max7");
 20     ht.add("name8","max8");
 21     ht.add("name9","max9");
 22     ht.add("name10","max10");
 23     ht.add("name11","max11");
 24     ht.add("name12","max12");
 25     ht.add("name13","max13");
 26     ht.add("name14","max14");
 27     ht.add("name15","max15");
 28     ht.add("name16","max16");
 29     ht.add("name17","max17");
 30     ht.add("name18","max18");
 31     ht.add("name19","max19");
 32     ht.add("name20","max20");
 33     ht.add("name21","max21");
 34     ht.add("name22","max22");
 35     ht.add("name23","max23");
 36     ht.add("name24","max24");
 37     ht.add("name25","max25");
 38     ht.add("name26","max26");
 39     ht.add("name27","max27");
 40     ht.add("name28","max28");
 41     ht.add("name29","max29");
 42     cout<<"------------------------------"<<endl;
 43     Bucket *bucket=ht.pListHead;
 44     while(bucket!=NULL)
 45     {
 46         cout<<"bucket key:"<<bucket->arKey<<";bucket value:"<<(char*)bucket->pData<<";"<<endl;
 47         bucket=bucket->pListNext;
 48     }
 49     cout<<"------------------------------"<<endl;
 50     bucket=ht.pListTail;
 51     while(bucket!=NULL)
 52     {
 53         cout<<"bucket key:"<<bucket->arKey<<";bucket value:"<<(char*)bucket->pData<<";"<<endl;
 54         bucket=bucket->pListPre;
 55     }
 56     
 57     cout<<"------------------------------"<<endl;
 58     for(int i=0;i!=32;++i){
 59         Bucket *pbucket=ht.arBuckets[i];
 60         while(pbucket!=NULL){
 61             cout<<"bucket key:"<<pbucket->arKey<<";index:"<<i<<endl;
 62             pbucket=pbucket->pNext;
 63         }
 64     }
 65     cout<<"------------------------------"<<endl;
 66     cout<<"bucket key "<<"name getData:"<<(char*)ht.getData("name")<<endl;
 67     cout<<"bucket key "<<"name1 getData:"<<ht.getData("name1")<<endl;
 68     cout<<"bucket key "<<"name5 getData:"<<(char*)ht.getData("name5")<<endl;
 69     cout<<"bucket key "<<"name10 getData:"<<(char*)ht.getData("name10")<<endl;
 70     cout<<"bucket key "<<"name15 getData:"<<(char*)ht.getData("name15")<<endl;
 71     cout<<"bucket key "<<"name20 getData:"<<(char*)ht.getData("name20")<<endl;
 72     cout<<"bucket key "<<"name29 getData:"<<(char*)ht.getData("name29")<<endl;
 73     cout<<"------------------------------"<<endl;
 74     cout<<"bucket key "<<"name delete:"<<ht.remove("name")<<endl;
 75     cout<<"bucket key "<<"name1 delete:"<<ht.remove("name1")<<endl;
 76     cout<<"bucket key "<<"name15 delete:"<<ht.remove("name15")<<endl;
 77     cout<<"bucket key "<<"name20 delete:"<<ht.remove("name20")<<endl;
 78     cout<<"bucket key "<<"name10 delete:"<<ht.remove("name10")<<endl;
 79     cout<<"bucket key "<<"name5 delete:"<<ht.remove("name5")<<endl;
 80     cout<<"bucket key "<<"name29 delete:"<<ht.remove("name29")<<endl;
 81     cout<<"------------------------------"<<endl;
 82     bucket=ht.pListHead;
 83     while(bucket!=NULL)
 84     {
 85         cout<<"bucket key:"<<bucket->arKey<<";bucket value:"<<(char*)bucket->pData<<";"<<endl;
 86         bucket=bucket->pListNext;
 87     }
 88     cout<<"------------------------------"<<endl;
 89     for(int i=0;i!=32;++i){
 90         Bucket *pbucket=ht.arBuckets[i];
 91         while(pbucket!=NULL){
 92             cout<<"bucket key:"<<pbucket->arKey<<";index:"<<i<<endl;
 93             pbucket=pbucket->pNext;
 94         }
 95     }
 96     cout<<"elemetes num "<<ht.nNumOfElements<<endl;
 97     cout<<"------------------------------"<<endl;
 98     int num=10000500;
 99     for(int i=0;i!=num;++i){
100         char *key=new char[10]();
101         for(int j=0;j!=9;++j){
102             int c=rand()%26+97;
103             key[j]=c;
104         }
105         key[9]=0;
106         ht.add(key,key);
107     }
108     cout<<"------------------------------"<<endl;
109 //     ht.add("name30","max30");
110 //     ht.add("name31","max31");
111 //     ht.add("name32","max32");
112 //     ht.add("name33","max33");
113 //     ht.add("name34","max34");
114 //     ht.add("name35","max35");
115 //     ht.add("name36","max3");
116 //     for(int i=0;i!=32;++i){
117 //         Bucket *pbucket=ht.arBuckets[i];
118 //         while(pbucket!=NULL){
119 //             cout<<"bucket key:"<<pbucket->arKey<<";index:"<<i<<endl;
120 //             pbucket=pbucket->pNext;
121 //         }
122 //     }
123 //     cout<<"elemetes num "<<ht.nNumOfElements<<endl;
124 //     cout<<"------------------------------"<<endl;
125 //     num=5000;
126 //     for(int i=0;i!=num;++i){
127 //         char *key=new char[10]();
128 //         for(int j=0;j!=9;++j){
129 //             int c=rand()%26+97;
130 //             key[j]=c;
131 //         }
132 //         key[9]=0;
133 //         cout<<"main key:"<<key<<endl;
134 //         ht.add(key,key);
135 //         cout<<"main end key:"<<key<<endl;
136 //     }
137 //     cout<<"elemetes num "<<ht.nNumOfElements<<endl;
138 //     cout<<"------------------------------"<<endl;
139 //     return 0;
140 }
main.cpp

 

相關文章