Trie tree實踐

weixin_33816946發表於2019-01-08

1、Trie樹

Trie樹即字典樹或字首樹,

2、實踐

程式碼實踐如下:

  1 package cn.edu.buaa.trie;
  2 
  3 import java.util.HashSet;
  4 
  5 /**
  6  * @author zsm
  7  * @date 2016年10月25日 上午11:03:13
  8  * @version 1.0
  9  * @parameter
 10  * @return
 11  */
 12 public class Trie {
 13     private TrieNode trieRoot;
 14     private int treeSize;
 15 
 16     public Trie() {
 17         trieRoot = new TrieNode();
 18         treeSize = 0;
 19     }
 20 
 21     public TrieNode getRoot() {
 22         return trieRoot;
 23     }
 24 
 25     public int getTreeSize() {
 26         return treeSize;
 27     }
 28 
 29     /**
 30      * 新增單詞
 31      */
 32     public void addWord(String word, int wordId) {
 33         addWord(trieRoot, word, wordId);
 34     }
 35 
 36     public void addWord(TrieNode root, String word, int wordId) {
 37         // 輸入單詞為空
 38         if (null == word || word.length() == 0) {
 39             return;
 40         }
 41 
 42         // 確定第一個字元在放在哪個孩子節點中
 43         int k = TrieNode.getCharPosition(word.charAt(0));
 44 
 45         // 該孩子為空,初始化
 46         if (root.childNodes[k] == null) {
 47             root.childNodes[k] = new TrieNode();
 48             treeSize++;
 49             root.childNodes[k].nodeChar = word.charAt(0);
 50         }
 51         // 單詞出現在該孩子節點中
 52         root.childNodes[k].wordSet.add(wordId);
 53 
 54         word = word.substring(1);
 55         // 說明是最後一個字元,該詞詞頻數加1
 56         if (word.length() == 0) {
 57             root.childNodes[k].freq++;
 58         } else {
 59             addWord(root.childNodes[k], word, wordId);
 60         }
 61     }
 62 
 63     /**
 64      * 刪除單詞
 65      */
 66     public void deleteWord(String word, int wordId) {
 67         deleteWord(trieRoot, word, wordId);
 68     }
 69 
 70     enum DELETERES {
 71         FAIL_EMPTYWORLD, FAIL_WORLD_NOT_EXIST, SUCCESS
 72     };
 73 
 74     public DELETERES deleteWord(TrieNode root, String word, int wordId) {
 75         // 輸入單詞為空
 76         if (null == word || word.length() == 0) {
 77             return DELETERES.FAIL_EMPTYWORLD;
 78         }
 79 
 80         int k = TrieNode.getCharPosition(word.charAt(0));
 81 
 82         // 第一個字元不在樹中,說明沒有要刪除的單詞
 83         if (root.childNodes[k] == null) {
 84             return DELETERES.FAIL_WORLD_NOT_EXIST;
 85         }
 86 
 87         // 第一個字元在樹中
 88         DELETERES res;
 89         {
 90             word = word.substring(1);
 91             // 找到該單詞
 92             if (word.length() == 0 && root.childNodes[k].freq > 0) {
 93                 root.childNodes[k].freq--;
 94                 res = DELETERES.SUCCESS;
 95             } else {
 96                 res = deleteWord(root.childNodes[k], word, wordId);
 97             }
 98 
 99             if (res == DELETERES.SUCCESS) {
100                 // 從沿途節點移除該單詞
101                 root.childNodes[k].wordSet.remove(wordId);
102                 // 沒單詞了,釋放節點
103                 if (root.childNodes[k].wordSet.size() == 0) {
104                     root.childNodes[k] = null;
105                     treeSize--;
106                 }
107             }
108             return res;
109         }
110     }
111 
112     /**
113      * 修改單詞
114      */
115     public void updateWord(String newWord, String oldWord, int wordId) {
116         updateWord(trieRoot, newWord, oldWord, wordId);
117     }
118 
119     public void updateWord(TrieNode root, String newWord, String oldWord, int wordId) {
120         if (deleteWord(root, oldWord, wordId) == DELETERES.SUCCESS) {
121             addWord(root, newWord, wordId);
122         }
123     }
124 
125     /**
126      * 找以給定單詞為字首的所有單詞的id
127      */
128     public HashSet<Integer> searchPrefixWord(String word) {
129         return searchPrefixWord(trieRoot, word);
130     }
131 
132     public HashSet<Integer> searchPrefixWord(TrieNode root, String word) {
133 
134         HashSet<Integer> wordSet = new HashSet<Integer>();
135 
136         // 輸入單詞為空
137         if (null == word || word.length() == 0) {
138             return wordSet;
139         }
140 
141         int k = TrieNode.getCharPosition(word.charAt(0));
142         // 單詞裡某個字元在樹種不存在,說明沒有該單詞
143         if (root.childNodes[k] == null) {
144             return wordSet;
145         }
146 
147         word = word.substring(1);
148 
149         if (word.length() == 0) {
150             wordSet = root.childNodes[k].wordSet;
151         } else {
152             wordSet = searchPrefixWord(root.childNodes[k], word);
153         }
154         return wordSet;
155     }
156 
157     /**
158      * 統計給定單詞出現的次數
159      */
160     public int wordCount(String word) {
161         return wordCount(trieRoot, word);
162     }
163 
164     public int wordCount(TrieNode root, String word) {
165 
166         // 輸入單詞為空
167         if (null == word || word.length() == 0) {
168             return 0;
169         }
170 
171         int k = TrieNode.getCharPosition(word.charAt(0));
172         // 單詞裡某個字元在樹種不存在,說明沒有該單詞
173         if (root.childNodes[k] == null) {
174             return 0;
175         }
176 
177         int count = 0;
178         word = word.substring(1);
179 
180         if (word.length() == 0) {
181             count = root.childNodes[k].freq;
182         } else {
183             count = wordCount(root.childNodes[k], word);
184         }
185 
186         return count;
187     }
188 }
189 
190 /**
191  * Trie樹的節點<br>
192  * 假定單詞都由26個英文字母組成,Trie樹根節點不存字元
193  */
194 class TrieNode {
195     // 孩子節點
196     public TrieNode[] childNodes;
197     // 該節點的字元
198     public char nodeChar;
199 
200     // 以該節點為結束的單詞的詞頻
201     public int freq;
202     // 包含該節點的單詞的id
203     public HashSet<Integer> wordSet;
204 
205     // 初始化
206     public TrieNode() {
207         childNodes = new TrieNode[CHILD_NUM];
208         freq = 0;
209         wordSet = new HashSet<Integer>();
210     }
211 
212     private static final int CHILD_NUM = 26;
213 
214     public static int getCharPosition(char ch) {
215         return (ch - 'a');
216     }
217 }
View Code

測試:

 1 package cn.edu.buaa.trie;
 2 
 3 /**
 4  * @author zsm
 5  * @date 2016年10月25日 下午3:12:02
 6  * @version 1.0
 7  * @parameter
 8  * @return
 9  */
10 public class Main_Trie {
11 
12     public static void main(String[] args) {
13         // TODO Auto-generated method stub
14         Trie trie = new Trie();
15         String wd1 = "ab";
16         String wd2 = "ac";
17         String wd3 = "acd";
18 
19         String wd4 = "add";
20 
21         trie.addWord(wd1, 1);
22         trie.addWord(wd2, 2);
23         trie.addWord(wd2, 3);
24         trie.addWord(wd3, 4);
25 
26         // wd1,wd2,wd2,wd3
27         System.out.println(trie.wordCount(wd2));// 2
28         System.out.println(trie.wordCount(wd3));// 1
29         System.out.println(trie.getTreeSize());// 4
30         System.out.println();
31 
32         trie.deleteWord(wd3, 4);
33         // wd1,wd2,wd2
34         System.out.println(trie.wordCount(wd2));// 2
35         System.out.println(trie.wordCount(wd3));// 0
36         System.out.println(trie.getTreeSize());// 3
37         System.out.println();
38 
39         trie.addWord(wd3, 4);
40         // wd1,wd2,wd2,wd3
41         System.out.println(trie.wordCount(wd2));// 2
42         System.out.println(trie.wordCount(wd3));// 1
43         System.out.println(trie.getTreeSize());// 4
44         System.out.println();
45 
46         trie.deleteWord(wd2, 2);
47         trie.deleteWord(wd2, 3);
48         // wd1,wd3
49         System.out.println(trie.wordCount(wd2));// 0
50         System.out.println(trie.wordCount(wd3));// 1
51         System.out.println(trie.getTreeSize());// 4
52         System.out.println(trie.searchPrefixWord("a"));// [1,4]
53         System.out.println();
54 
55         trie.updateWord(wd3, wd4, 4);
56         // wd1,wd3
57         System.out.println(trie.searchPrefixWord("a"));// [1,4]
58         System.out.println(trie.wordCount(wd2));// 0
59         System.out.println(trie.wordCount(wd3));// 1
60         System.out.println(trie.wordCount(wd4));// 0
61         System.out.println(trie.getTreeSize());// 4
62         System.out.println();
63 
64         trie.updateWord(wd4, wd3, 4);
65         // wd1,wd4
66         System.out.println(trie.searchPrefixWord("a"));// [1,4]
67         System.out.println(trie.wordCount(wd2));// 0
68         System.out.println(trie.wordCount(wd3));// 0
69         System.out.println(trie.wordCount(wd4));// 1
70         System.out.println(trie.getTreeSize());// 4
71         System.out.println();
72     }
73 }
View Code

3、參考資料

http://www.cnblogs.com/huangxincheng/archive/2012/11/25/2788268.html

相關文章