1、Trie樹
Trie樹即字典樹或字首樹,
2、實踐
程式碼實踐如下:
1 package cn.edu.buaa.trie; 2 3 import java.util.HashSet; 4 5 /** 6 * @author zsm 7 * @date 2016年10月25日 上午11:03:13 8 * @version 1.0 9 * @parameter 10 * @return 11 */ 12 public class Trie { 13 private TrieNode trieRoot; 14 private int treeSize; 15 16 public Trie() { 17 trieRoot = new TrieNode(); 18 treeSize = 0; 19 } 20 21 public TrieNode getRoot() { 22 return trieRoot; 23 } 24 25 public int getTreeSize() { 26 return treeSize; 27 } 28 29 /** 30 * 新增單詞 31 */ 32 public void addWord(String word, int wordId) { 33 addWord(trieRoot, word, wordId); 34 } 35 36 public void addWord(TrieNode root, String word, int wordId) { 37 // 輸入單詞為空 38 if (null == word || word.length() == 0) { 39 return; 40 } 41 42 // 確定第一個字元在放在哪個孩子節點中 43 int k = TrieNode.getCharPosition(word.charAt(0)); 44 45 // 該孩子為空,初始化 46 if (root.childNodes[k] == null) { 47 root.childNodes[k] = new TrieNode(); 48 treeSize++; 49 root.childNodes[k].nodeChar = word.charAt(0); 50 } 51 // 單詞出現在該孩子節點中 52 root.childNodes[k].wordSet.add(wordId); 53 54 word = word.substring(1); 55 // 說明是最後一個字元,該詞詞頻數加1 56 if (word.length() == 0) { 57 root.childNodes[k].freq++; 58 } else { 59 addWord(root.childNodes[k], word, wordId); 60 } 61 } 62 63 /** 64 * 刪除單詞 65 */ 66 public void deleteWord(String word, int wordId) { 67 deleteWord(trieRoot, word, wordId); 68 } 69 70 enum DELETERES { 71 FAIL_EMPTYWORLD, FAIL_WORLD_NOT_EXIST, SUCCESS 72 }; 73 74 public DELETERES deleteWord(TrieNode root, String word, int wordId) { 75 // 輸入單詞為空 76 if (null == word || word.length() == 0) { 77 return DELETERES.FAIL_EMPTYWORLD; 78 } 79 80 int k = TrieNode.getCharPosition(word.charAt(0)); 81 82 // 第一個字元不在樹中,說明沒有要刪除的單詞 83 if (root.childNodes[k] == null) { 84 return DELETERES.FAIL_WORLD_NOT_EXIST; 85 } 86 87 // 第一個字元在樹中 88 DELETERES res; 89 { 90 word = word.substring(1); 91 // 找到該單詞 92 if (word.length() == 0 && root.childNodes[k].freq > 0) { 93 root.childNodes[k].freq--; 94 res = DELETERES.SUCCESS; 95 } else { 96 res = deleteWord(root.childNodes[k], word, wordId); 97 } 98 99 if (res == DELETERES.SUCCESS) { 100 // 從沿途節點移除該單詞 101 root.childNodes[k].wordSet.remove(wordId); 102 // 沒單詞了,釋放節點 103 if (root.childNodes[k].wordSet.size() == 0) { 104 root.childNodes[k] = null; 105 treeSize--; 106 } 107 } 108 return res; 109 } 110 } 111 112 /** 113 * 修改單詞 114 */ 115 public void updateWord(String newWord, String oldWord, int wordId) { 116 updateWord(trieRoot, newWord, oldWord, wordId); 117 } 118 119 public void updateWord(TrieNode root, String newWord, String oldWord, int wordId) { 120 if (deleteWord(root, oldWord, wordId) == DELETERES.SUCCESS) { 121 addWord(root, newWord, wordId); 122 } 123 } 124 125 /** 126 * 找以給定單詞為字首的所有單詞的id 127 */ 128 public HashSet<Integer> searchPrefixWord(String word) { 129 return searchPrefixWord(trieRoot, word); 130 } 131 132 public HashSet<Integer> searchPrefixWord(TrieNode root, String word) { 133 134 HashSet<Integer> wordSet = new HashSet<Integer>(); 135 136 // 輸入單詞為空 137 if (null == word || word.length() == 0) { 138 return wordSet; 139 } 140 141 int k = TrieNode.getCharPosition(word.charAt(0)); 142 // 單詞裡某個字元在樹種不存在,說明沒有該單詞 143 if (root.childNodes[k] == null) { 144 return wordSet; 145 } 146 147 word = word.substring(1); 148 149 if (word.length() == 0) { 150 wordSet = root.childNodes[k].wordSet; 151 } else { 152 wordSet = searchPrefixWord(root.childNodes[k], word); 153 } 154 return wordSet; 155 } 156 157 /** 158 * 統計給定單詞出現的次數 159 */ 160 public int wordCount(String word) { 161 return wordCount(trieRoot, word); 162 } 163 164 public int wordCount(TrieNode root, String word) { 165 166 // 輸入單詞為空 167 if (null == word || word.length() == 0) { 168 return 0; 169 } 170 171 int k = TrieNode.getCharPosition(word.charAt(0)); 172 // 單詞裡某個字元在樹種不存在,說明沒有該單詞 173 if (root.childNodes[k] == null) { 174 return 0; 175 } 176 177 int count = 0; 178 word = word.substring(1); 179 180 if (word.length() == 0) { 181 count = root.childNodes[k].freq; 182 } else { 183 count = wordCount(root.childNodes[k], word); 184 } 185 186 return count; 187 } 188 } 189 190 /** 191 * Trie樹的節點<br> 192 * 假定單詞都由26個英文字母組成,Trie樹根節點不存字元 193 */ 194 class TrieNode { 195 // 孩子節點 196 public TrieNode[] childNodes; 197 // 該節點的字元 198 public char nodeChar; 199 200 // 以該節點為結束的單詞的詞頻 201 public int freq; 202 // 包含該節點的單詞的id 203 public HashSet<Integer> wordSet; 204 205 // 初始化 206 public TrieNode() { 207 childNodes = new TrieNode[CHILD_NUM]; 208 freq = 0; 209 wordSet = new HashSet<Integer>(); 210 } 211 212 private static final int CHILD_NUM = 26; 213 214 public static int getCharPosition(char ch) { 215 return (ch - 'a'); 216 } 217 }
測試:
1 package cn.edu.buaa.trie; 2 3 /** 4 * @author zsm 5 * @date 2016年10月25日 下午3:12:02 6 * @version 1.0 7 * @parameter 8 * @return 9 */ 10 public class Main_Trie { 11 12 public static void main(String[] args) { 13 // TODO Auto-generated method stub 14 Trie trie = new Trie(); 15 String wd1 = "ab"; 16 String wd2 = "ac"; 17 String wd3 = "acd"; 18 19 String wd4 = "add"; 20 21 trie.addWord(wd1, 1); 22 trie.addWord(wd2, 2); 23 trie.addWord(wd2, 3); 24 trie.addWord(wd3, 4); 25 26 // wd1,wd2,wd2,wd3 27 System.out.println(trie.wordCount(wd2));// 2 28 System.out.println(trie.wordCount(wd3));// 1 29 System.out.println(trie.getTreeSize());// 4 30 System.out.println(); 31 32 trie.deleteWord(wd3, 4); 33 // wd1,wd2,wd2 34 System.out.println(trie.wordCount(wd2));// 2 35 System.out.println(trie.wordCount(wd3));// 0 36 System.out.println(trie.getTreeSize());// 3 37 System.out.println(); 38 39 trie.addWord(wd3, 4); 40 // wd1,wd2,wd2,wd3 41 System.out.println(trie.wordCount(wd2));// 2 42 System.out.println(trie.wordCount(wd3));// 1 43 System.out.println(trie.getTreeSize());// 4 44 System.out.println(); 45 46 trie.deleteWord(wd2, 2); 47 trie.deleteWord(wd2, 3); 48 // wd1,wd3 49 System.out.println(trie.wordCount(wd2));// 0 50 System.out.println(trie.wordCount(wd3));// 1 51 System.out.println(trie.getTreeSize());// 4 52 System.out.println(trie.searchPrefixWord("a"));// [1,4] 53 System.out.println(); 54 55 trie.updateWord(wd3, wd4, 4); 56 // wd1,wd3 57 System.out.println(trie.searchPrefixWord("a"));// [1,4] 58 System.out.println(trie.wordCount(wd2));// 0 59 System.out.println(trie.wordCount(wd3));// 1 60 System.out.println(trie.wordCount(wd4));// 0 61 System.out.println(trie.getTreeSize());// 4 62 System.out.println(); 63 64 trie.updateWord(wd4, wd3, 4); 65 // wd1,wd4 66 System.out.println(trie.searchPrefixWord("a"));// [1,4] 67 System.out.println(trie.wordCount(wd2));// 0 68 System.out.println(trie.wordCount(wd3));// 0 69 System.out.println(trie.wordCount(wd4));// 1 70 System.out.println(trie.getTreeSize());// 4 71 System.out.println(); 72 } 73 }
3、參考資料
http://www.cnblogs.com/huangxincheng/archive/2012/11/25/2788268.html