Java敏感詞過濾

541732025發表於2014-04-18

點選(此處)摺疊或開啟

  1. package test.java.lang;

  2. import java.io.IOException;
  3. import java.util.ArrayList;
  4. import java.util.HashMap;
  5. import java.util.List;

  6. /**
  7.  * @title KeywordFilter
  8.  * @description TODO
  9.  * @author
  10.  * @date 2014-4-17
  11.  * @version 1.0
  12.  */
  13. public class KeywordFilter {
  14.     /** 敏感詞集合
  15.      * {法={isEnd=0,={isEnd=1}},={isEnd=0,={isEnd=0,={isEnd=1},={isEnd=0,={isEnd=1}}}}}
  16.      * */
  17.     private HashMap keysMap = new HashMap();

  18.     /**
  19.      * 新增敏感詞
  20.      * @param keywords
  21.      */
  22.     public void addKeywords(List<String> keywords) {
  23.         for (int i = 0; i < keywords.size(); i++) {
  24.             String key = keywords.get(i).trim();
  25.             HashMap nowhash = keysMap;//初始從最外層遍歷
  26.             for (int j = 0; j < key.length(); j++) {
  27.                 char word = key.charAt(j);
  28.                 Object wordMap = nowhash.get(word);
  29.                 if (wordMap != null) {
  30.                     nowhash = (HashMap) wordMap;
  31.                 } else {
  32.                     HashMap<String, String> newWordHash = new HashMap<String, String>();
  33.                     newWordHash.put(\"isEnd\", \"0\");
  34.                     nowhash.put(word, newWordHash);
  35.                     nowhash = newWordHash;
  36.                 }
  37.                 if (j == key.length() - 1) {
  38.                     nowhash.put(\"isEnd\", \"1\");
  39.                 }
  40.             }
  41.         }
  42.     }

  43.     /**
  44.      * 檢查一個字串從begin位置起開始是否有keyword符合,
  45.      * 如果沒有,則返回0
  46.      * 如果有符合的keyword值,繼續遍歷,直至遇到isEnd = 1,返回匹配的keyword的長度,
  47.      */
  48.     private int checkKeyWords(String txt, int begin) {
  49.         HashMap nowhash = keysMap;
  50.         int res = 0;
  51.         for (int i = begin; i < txt.length(); i++) {
  52.             char word = txt.charAt(i);
  53.             Object wordMap = nowhash.get(word);//得到該字元對應的HashMap
  54.             if (wordMap == null) {
  55.                 return 0;//如果該字元沒有對應的HashMap,return 0
  56.             }

  57.             res++;//如果該字元對應的HashMap不為null,說明匹配到了一個字元,+1
  58.             nowhash = (HashMap) wordMap;//將遍歷的HashMap指向該字元對應的HashMap
  59.             if (((String) nowhash.get(\"isEnd\")).equals(\"1\")) {//如果該字元為敏感詞的結束字元,直接返回
  60.                 return res;
  61.             } else {
  62.                 continue;
  63.             }
  64.         }
  65.         return res;
  66.     }

  67.     /**
  68.      * 判斷txt中是否有關鍵字
  69.      */
  70.     public boolean isContentKeyWords(String txt) {
  71.         for (int i = 0; i < txt.length(); i++) {
  72.             int len = checkKeyWords(txt, i);
  73.             if (len > 0) {
  74.                 return true;
  75.             }
  76.         }
  77.         return false;
  78.     }

  79.     /**
  80.      * 返回txt中關鍵字的列表
  81.      */
  82.     public List<String> getTxtKeyWords(String txt) {
  83.         List<String> list = new ArrayList<String>();
  84.         int l = txt.length();
  85.         for (int i = 0; i < l;) {
  86.             int len = checkKeyWords(txt, i);
  87.             if (len > 0) {
  88.                 String tt = txt.substring(i, i + len);
  89.                 list.add(tt);
  90.                 i += len;
  91.             } else {
  92.                 i++;
  93.             }
  94.         }
  95.         return list;
  96.     }

  97.     /**
  98.      * 初始化敏感詞列表
  99.      * */
  100.     public void initfiltercode() {
  101.         List<String> keywords = new ArrayList<String>();
  102.         keywords.add(\"中國人\");
  103.         keywords.add(\"中國男人\");
  104.         keywords.add(\"法輪\");
  105.         this.addKeywords(keywords);
  106.     }

  107.     public static void main(String[] args) throws IOException {
  108.         KeywordFilter filter = new KeywordFilter();
  109.         filter.initfiltercode();
  110.         String txt = \"哈哈,反倒是 法輪熱舞功,中國人,\";
  111.         boolean boo = filter.isContentKeyWords(txt);
  112.         System.out.println(boo);
  113.         List<String> set = filter.getTxtKeyWords(txt);
  114.         System.out.println(\"包含的敏感詞如下:\" + set);
  115.     }
  116. }


來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/28912557/viewspace-1144669/,如需轉載,請註明出處,否則將追究法律責任。

相關文章