【字串演算法】字典樹詳解

RioTian發表於2020-12-01

原文網址 : https://www.cnblogs.com/RioTian/p/14070740.html

字典樹

　　字典樹，又稱單詞查詢樹，Trie樹，是一種樹形結構，是一種雜湊樹的變種。典型應用是用於統計，排序和儲存大量的字串（但不僅限於字串），所以經常被搜尋引擎系統用於文字詞頻統計。它的優點是：利用字串的公共字首來節約儲存空間，最大限度地減少無謂的字串比較，查詢效率比雜湊表高。
　　字典樹與字典很相似,當你要查一個單詞是不是在字典樹中,首先看單詞的第一個字母是不是在字典的第一層,如果不在,說明字典樹裡沒有該單詞,如果在就在該字母的孩子節點裡找是不是有單詞的第二個字母,沒有說明沒有該單詞,有的話用同樣的方法繼續查詢.字典樹不僅可以用來儲存字母,也可以儲存數字等其它資料。

Trie的資料結構定義：

#define MAX 26
typedef struct Trie {
    Trie* next[MAX];
    int v;  //根據需要變化
};

Trie* root;

　　next是表示每層有多少種類的數，如果只是小寫字母，則26即可，若改為大小寫字母，則是52，若再加上數字，則是62了，這裡根據題意來確定。 v可以表示一個字典樹到此有多少相同字首的數目，這裡根據需要應當學會自由變化。

　　Trie的查詢（最主要的操作）：

　　(1) 每次從根結點開始一次搜尋；
　　(2) 取得要查詢關鍵詞的第一個字母，並根據該字母選擇對應的子樹並轉到該子樹繼續進行檢索；　　

(3) 在相應的子樹上，取得要查詢關鍵詞的第二個字母,並進一步選擇對應的子樹進行檢索。　　　 (4) 迭代過程……
　 (5) 在某個結點處，關鍵詞的所有字母已被取出，則讀取附在該結點上的資訊，即完成查詢。

　　這裡給出生成字典樹和查詢的模版：

生成字典樹：

void createTrie(char* str) {
    int len = strlen(str);
    Trie *p = root, *q;
    for (int i = 0; i < len; ++i) {
        int id = str[i] - '0';
        if (p->next[id] == NULL) {
            q = (Trie*)malloc(sizeof(Trie));
            q->v = 1;  //初始v==1
            for (int j = 0; j < MAX; ++j)
                q->next[j] = NULL;
            p->next[id] = q;
            p = p->next[id];
        } else {
            p->next[id]->v++;
            p = p->next[id];
        }
    }
    p->v = -1;  //若為結尾，則將v改成-1表示
}

查詢:

int findTrie(char* str) {
    int len = strlen(str);
    Trie* p = root;
    for (int i = 0; i < len; ++i) {
        int id = str[i] - '0';
        p = p->next[id];
        if (p == NULL)  //若為空集，表示不存以此為字首的串
            return 0;
        if (p->v == -1)  //字符集中已有串是此串的字首
            return -1;
    }
    return -1;  //此串是字符集中某串的字首
}

例題

hdu 1251 統計難題

　　題意：在給出的字串中找出由給出的字串中出現過的兩個串拼成的字串。
　　字典樹的模板題，先建字典數，然後再查詢每個給定的單詞。。

程式碼如下:

#include <string.h>
#include <iostream>
using namespace std;

const int sonsum = 26, base = 'a';
char s1[12], ss[12];

struct Trie {
    int num;
    bool flag;
    struct Trie* son[sonsum];
    Trie() {
        num = 1;
        flag = false;
        memset(son, NULL, sizeof(son));
    }
};

Trie* NewTrie() {
    Trie* temp = new Trie;
    return temp;
}

void Inset(Trie* root, char* s) {
    Trie* temp = root;
    while (*s) {
        if (temp->son[*s - base] == NULL) {
            temp->son[*s - base] = NewTrie();
        } else
            temp->son[*s - base]->num++;
        temp = temp->son[*s - base];
        s++;
    }
    temp->flag = true;
}

int search(Trie* root, char* s) {
    Trie* temp = root;
    while (*s) {
        if (temp->son[*s - base] == NULL)
            return 0;
        temp = temp->son[*s - base];
        s++;
    }
    return temp->num;
}

int main() {
    Trie* root = NewTrie();
    root->num = 0;
    // while(cin.get(s1,12))
    while (gets(s1) && strcmp(s1, "") != 0) {
        // if(strcmp(s1," ")==0)
        // break;
        Inset(root, s1);
    }
    while (cin >> ss) {
        int ans = search(root, ss);
        cout << ans << endl;
    }

    return 0;
}

poj 2001 Shortest Prefixes

　　題意：找出能唯一標示一個字串的最短字首，如果找不出，就輸出該字串。
　　用字典樹即可
　　
程式碼如下：

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iostream>
using namespace std;

char list[1005][25];

struct node {
    int count;
    node* childs[26];
    node() {
        count = 0;
        int i;
        for (i = 0; i < 26; i++)
            childs[i] = NULL;
    }
};

node* root = new node;
node *current, *newnode;

void insert(char* str) {
    int i, m;
    current = root;
    for (i = 0; i < strlen(str); i++) {
        m = str[i] - 'a';
        if (current->childs[m] != NULL) {
            current = current->childs[m];
            ++(current->count);
        } else {
            newnode = new node;
            ++(newnode->count);
            current->childs[m] = newnode;
            current = newnode;
        }
    }
}

void search(char* str) {
    int i, m;
    char ans[25];
    current = root;
    for (i = 0; i < strlen(str); i++) {
        m = str[i] - 'a';
        current = current->childs[m];
        ans[i] = str[i];
        ans[i + 1] = '\0';
        if (current->count == 1)  //可以唯一標示該字串的字首
        {
            printf("%s %s\n", str, ans);
            return;
        }
    }
    printf("%s %s\n", str, ans);  // 否則輸出該字串
}

int main() {
    int i, t = 0;
    while (scanf("%s", list[t]) != EOF) {
        insert(list[t]);
        t++;
    }
    for (i = 0; i < t; i++)
        search(list[i]);
    return 0;
}

hdu 4825 Xor Sum

　　題意：給你一些數字，再詢問Q個問題，每個問題給一個數字，使這個數字和之前給出的數字的異或和最大。
　　構造字典樹，高位在前，低位在後，然後順著字典樹根向深處遞迴查詢

程式碼如下：

#include <algorithm>
#include <cmath>
#include <cstdio>
#include <cstring>
#include <functional>
#include <iostream>
#include <map>
#include <queue>
#include <set>
#include <string>
#include <vector>

using namespace std;
typedef long long LL;
typedef pair<LL, int> PLI;

const int MX = 2e5 + 5;
const int INF = 0x3f3f3f3f;

struct Node {
    Node* Next[2];
    Node() { Next[0] = Next[1] = NULL; }
};

void trie_add(Node* root, int S) {
    Node* p = root;
    for (int i = 31; i >= 0; i--) {
        int id = ((S & (1 << i)) != 0);
        if (p->Next[id] == NULL) {
            p->Next[id] = new Node();
        }
        p = p->Next[id];
    }
}

int trie_query(Node* root, int S) {
    Node* p = root;
    int ans = 0;
    for (int i = 31; i >= 0; i--) {
        int id = ((S & (1 << i)) != 0);
        if (p->Next[id ^ 1] != NULL) {
            ans |= (id ^ 1) << i;
            p = p->Next[id ^ 1];
        } else {
            ans |= id << i;
            p = p->Next[id];
        }
    }
    return ans;
}

int main() {
    // freopen("input.txt", "r", stdin);
    int T, n, Q, t, ansk = 0;
    scanf("%d", &T);
    while (T--) {
        scanf("%d%d", &n, &Q);
        Node* root = new Node();

        for (int i = 1; i <= n; i++) {
            scanf("%d", &t);
            trie_add(root, t);
        }

        printf("Case #%d:\n", ++ansk);
        while (Q--) {
            scanf("%d", &t);
            printf("%d\n", trie_query(root, t));
        }
    }
    return 0;
}

字串匹配演算法(二)-BM演算法詳解
2021-07-31
字串匹配演算法
Trie樹，字典樹
2024-08-17
codevs 4189 字典【字典樹】
2018-09-03
dev
字典樹
2024-08-22
Python中字典使用詳解
2019-04-02
Python
trie字典樹
2023-09-25
字典樹Trie
2024-11-12
字典樹(Trie)
2024-07-09
高效字串匹配演算法——BM 演算法詳解（C++）
2023-02-14
字串匹配演算法C++
字串形式的列表，字典轉列表，字典
2024-03-26
字串
字串演算法--$\mathcal{KMP，Trie}$樹
2023-03-29
字串演算法KMP
字典樹學習
2018-09-02
字典樹專題
2024-04-20
貓樹詳解
2023-11-03
LSM 樹詳解
2020-10-30
（IOS）JSON字串轉字典
2018-07-31
iOSJSON字串
字典樹(字首樹)簡單實現
2020-11-15
紅黑樹詳解
2021-02-21
InnoDB資料字典詳解-系統表
2018-04-05
字串、列表、字典內建方法
2024-05-14
字串
字串列表字典互相轉換
2020-09-27
字串
演算法之樹（一，B-樹原理詳解）(Java版)-持續更新補充
2018-08-11
演算法Java
Javascript之字串拼接詳解
2018-05-25
JavaScript字串
KMP字串模式匹配詳解
2020-04-07
KMP字串模式
jQuery字串擷取詳解
2018-12-31
jQuery字串
資料結構之樹（線段樹，字典樹）
2020-10-28
資料結構
Android程式設計師會遇到的演算法(part 5 字典樹)
2019-04-07
Android程式設計師演算法
P8306 【模板】字典樹
2024-03-30
一些“字典樹”典
2024-05-16
資料結構-字典樹
2020-01-31
資料結構
一文搞懂字典樹
2021-10-12
樹狀陣列詳解
2020-08-01
陣列
線段樹（超詳解）
2024-10-11
快速生成樹原理詳解
2022-03-23
Python 列表、元組、字典及集合操作詳解
2018-09-04
Python
字典樹及其C++實現
2019-04-08
C++
演算法導論學習--紅黑樹詳解之刪除(含完整紅黑樹程式碼)
2020-04-04
演算法
樹莓派wiringPi庫詳解
2024-04-19
樹莓派

【字串演算法】字典樹詳解

字典樹

例題

相關文章