雜湊表 ADT 分離連結法【資料結構與演算法分析 c 語言描述】

前面看了

連結串列 ADT
棧 ADT
佇列 ADT
樹 ADT

各自 ADT 都有自己特有的優勢跟劣勢以及應用場景，雜湊表也不例外。
雜湊表
常數平均時間執行插入、查詢、刪除操作。不支援像二叉查詢樹的 find_min、find_max、以及排序等等，相對來說它在插入、查詢、刪除上面的時間複雜度是相當優異的（常數）。

2.1 雜湊表的基本概念

關鍵字
雜湊函式
雜湊表
衝突

一個關鍵字通過雜湊函式將之對映到一個固定長度的雜湊表裡面的一個單元上。
關鍵字 x；雜湊函式 f(x) = 0; 此時就把 x對映到 雜湊表 0 單元上。
如果一個關鍵字 a、b；有 f(a) = 0、f(b) = 0；兩個關鍵字通過雜湊函式處理後得到相同的值，這叫衝突

雜湊函式的設計跟編寫尤為重要，它決定了關鍵字對映到雜湊表的規則，儘量少的衝突、能否均勻分佈到雜湊表中。通常會對把表長設計為素數，同時對錶長取模來計算雜湊值。

這樣一來雜湊表的主要時間消耗在了雜湊函式的計算上面。雜湊函式的設計原則

複雜度低。
雜湊的結果接近均勻分佈。

關鍵字：1 <= 長度 <= 8 的字串。
雜湊函式：根據 Horner 法則計算 32 的多項式然後 mod 表長（表長取素數）。
衝突解決：分離連結法。
對分佈在相同單元的關鍵字採用單連結串列來儲存以解決衝突。
hash_sep.h 標頭檔案

typedef unsigned int index;
typedef char* element_type;

struct list_node;
typedef struct list_node *position;
struct hash_table_node;
typedef struct hash_table_node *hash_table;

index hash(const char *key, int table_size);
hash_table initialize_table(int table_size);
void destory_table(hash_table h);
void delete(element_type key, hash_table h);
position find(element_type key, hash_table h);
void insert(element_type key, hash_table h);
element_type retrieve(position p);
int next_prime(int table_size);

void print_hash_table(hash_table h);
void random_hash_table(hash_table h, int len);

void test();

struct list_node
{
    element_type element;
    position next;
};

typedef position list;

struct hash_table_node
{
    int table_size;
    list *list_arr;
};

hash_sep.c 實現

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include "hash_sep.h"

#define error(str) fatal_error(str)
#define fatal_error(str) fprintf(stderr, "%s\n", str),exit(1)

index hash(const char *key, int table_size)
{
    unsigned int hash_value = 0;
    while (*key != '\0') {
        hash_value = (hash_value << 5) + *key++;
    }
    return hash_value % table_size;
}

hash_table initialize_table(int table_size)
{
    hash_table h;
    int i;

    h = (hash_table)malloc(sizeof(struct hash_table_node));
    if (NULL == h)
        fatal_error("Out of space");

    h->table_size = next_prime(table_size); // 保證是素數

    // 陣列分配空間
    h->list_arr = malloc(sizeof(list) * h->table_size);
    if (NULL == h->list_arr)
        fatal_error("Out of space");

    // 為 list_arr 的每一項分配表頭
    for (int i = 0; i < h->table_size; i++) {
        h->list_arr[i] = (list)malloc(sizeof(struct list_node));
        if (NULL == h->list_arr[i])
            fatal_error("out of space");
        else
            h->list_arr[i]->next = NULL;
    }

    return h;

}

void destory_table(hash_table h)
{
    int i;
    for (int i = 0; i < h->table_size; i++) {
        free(h->list_arr[i]);
    }

    free(h->list_arr);

    free(h);
}

void delete(element_type key, hash_table h)
{
    // 找到然後釋放
    position pos, p;
    list l;

    pos = find(key, h);

    if (NULL == pos) {
        return;
    } else {
        l = h->list_arr[hash(key, h->table_size)];
        p = l;
        while (NULL != p->next && key != p->next->element)
            p = p->next;
        p->next = pos->next;
        free(pos->element);
        free(pos);
        pos = NULL;

    }
}

position find(element_type key, hash_table h)
{
    position p;
    list l;

    l = h->list_arr[hash(key, h->table_size)];
    p = l->next;
    while(NULL != p && p->element != key)
        p = p->next;

    return p;
}

void insert(element_type key, hash_table h)
{
    position pos, p, temp_cell;
    list l;

    pos = find(key, h);
    if (NULL == pos) {
        temp_cell = (position)malloc(sizeof(struct list_node));
        if (NULL == temp_cell)
            fatal_error("out of space");
        temp_cell->element = key;
        temp_cell->next = NULL;
        l = h->list_arr[hash(key, h->table_size)];
        p = l;
        while (NULL != p->next)
            p = p->next;
        p->next = temp_cell;
    }
}

element_type retrieve(position p)
{
    if (NULL == p)
        error("NULL position");
    return p->element;
}

int next_prime(int table_size)
{
    int i, j = 2, k;

    for(i = table_size; i > 0; i--)
    {
        k = sqrt(i);
        while( j <= k )
        {
            if(i % j == 0)
                break;
            j++;
        }
        if(j > k)
            break;
    }

    return i;

}

void print_hash_table(hash_table h)
{
    int i;
    list l;
    position p;

    for (i = 0; i < h->table_size; i++) {
        printf("%d =>", i);
        l = h->list_arr[i];
        p = l->next;
        while (p) {
            printf("\t%s", p->element);
            p = p->next;
        }
        printf("\n");
    }
}

void random_hash_table(hash_table h, int len)
{
    char dictionary[52] = "adcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
    int str_len, i, j;

    srand((unsigned)time(NULL));
    for (i = 0; i < len; i++) {
        str_len = rand() % 8 + 1; // 1-8
        char* str;
        str = (char*)malloc(sizeof(char) * (str_len + 1));
        for (j = 0; j < str_len; j++) {
            str[j] = dictionary[rand() % 52];
            // str[j] = rand() % 26 + 97; // 97-122
            // printf("%c\n", str[j]);
        }
        str[j] = '\0';
        // printf("%s\t", str);
        insert(str, h);
        str = NULL;
    }
}

void test()
{
    hash_table h;

    h = initialize_table(9);
    printf("\t\tinsert adc into hash table.\n");
    char* str = (char*)malloc(sizeof(char) * 4);
    str[0] = 'a';
    str[1] = 'b';
    str[2] = 'c';
    str[3] = '\0';
    insert(str, h);
    print_hash_table(h);
    printf("\t\tdelete abc.\n");
    delete(str, h);
    print_hash_table(h);
    random_hash_table(h, 7);
    printf("\t\ta random hash table\n");
    print_hash_table(h);
}

int main(int argc, char const *argv[])
{
    test();
    return 0;
}

file

雜湊表
雜湊函式
關鍵字
衝突
分離連結法解決衝突
如何設計優秀的雜湊函式
c 語言 char 跟字串
素數
以上是涉及到的知識點

高度自律，深度思考，以勤補拙

雜湊表 ADT 分離連結法【資料結構與演算法分析 c 語言描述】

2.1 雜湊表的基本概念

相關文章