前一段時間被大佬問到如何實現一個敏感詞的匹配,所以寫了一個簡版的敏感詞匹配結構,還待後期優化!
程式碼地址:sensitive_words
github.com/tianye/sensitive_words
注:暫時不可以投放到生產環境使用
有喜歡的可以提交程式碼給我,或者拿走自己維護
希望各位大佬指點 謝謝, 關鍵詞為網上找到的所以不是很全,望大家海涵謝謝!
使用方法:
#####1.先構建敏感關鍵詞樹 (應為樹在初始化時候建立完成 後期可以直接使用,減少後期構建過程)
var tree = decision.CreateTree()
//初始化 構建 敏感詞數
func init() {
strList := [] string{
"考前答案","答案","前答",
}
for _, strItem := range strList {
//構建敏感詞樹
decision.BuildTrue(strItem, tree)
}
}
####2.匹配敏感關鍵詞
//等待匹配的資料
str := "啊前答我是一個文字啦啦啦考前答案答案前答我也是文答案字啦"
//匹配關鍵詞
res, loc := decision.MatchingSensitiveWords(tree, str)
fmt.Println("\n結果:", "是否存在敏感詞:", res, "敏感詞每個字所在位置:", loc)
輸出結果:
結果: 是否存在敏感詞: true 敏感詞每個字所在位置: [2 3 13 14 15 16 17 18 19 20 25 26]
使用例項: build_decision.go 檔案檢視,謝謝
原始碼實現方式:
package decision
import (
"fmt"
"strings"
)
type Node struct {
Word string
Node [] *Node
Location int
IsSensitive bool
}
type Tree struct {
TreeNode [] *Node
}
//建立一個葉子節點
func CreateNode(word string, location int, isSensitive bool) *Node {
return &Node{Word: word, Location: location, IsSensitive: isSensitive}
}
//建立一個樹
func CreateTree() *Tree {
tree := &Tree{}
return tree
}
//查詢一個Node節點
func SearchNode(str string, nodeList [] *Node) *Node {
//查詢當前層級的所有node
for _, v := range nodeList {
//存在則直接返回當前node
if v.Word == str {
return v
}
}
return nil
}
//插入一個子節點
func AppendNode(nowNode, newNode *Node) (*Node) {
nowNode.Node = append(nowNode.Node, newNode)
return newNode
}
//Build一個樹
func BuildTrue(str string, tree *Tree) *Tree {
end := strings.Count(str, "") - 1
var nowNode = &Node{}
var i = 0
for _, val := range str {
i++
isSensitive := false
if i == end {
isSensitive = true
}
newNode := CreateNode(string(val), i, isSensitive)
if i == 1 {
nowNode = SearchNode(newNode.Word, tree.TreeNode)
if nowNode != nil {
continue
}
tree.TreeNode = append(tree.TreeNode, newNode)
nowNode = newNode
continue
}
if nowNode.Node != nil {
nowNode := SearchNode(newNode.Word, nowNode.Node)
if nowNode != nil {
continue
}
}
nowNode = AppendNode(nowNode, newNode)
}
return tree
}
//匹配的敏感詞彙
func MatchingSensitiveWords(tree *Tree, str string) (isSensitive bool, allLocationStr []int) {
node := tree.TreeNode
isSensitive = false
locationStr := make([]int, 0)
allLocationStr = make([]int, 0)
var i = 0
for _, v := range str {
i++
node, isSensitive = SearchLeavesNode(string(v), node)
//沒有下一個節點了 並且當前不是敏感詞
if node == nil && isSensitive == false {
//節點回到最初
node = tree.TreeNode
//當前子重新匹配
node, isSensitive = SearchLeavesNode(string(v), node)
//記錄新的本次匹配地址
locationStr = []int{i}
} else {
//追加記錄位置
locationStr = append(locationStr, i)
}
//如果是敏感詞則記錄到位置中
if isSensitive == true {
node = tree.TreeNode
allLocationStr = append(allLocationStr, locationStr...)
locationStr = []int{}
}
}
//匹配到了關鍵詞
if len(allLocationStr) > 0 {
isSensitive = true
}
return isSensitive, allLocationStr
}
//搜尋層級關鍵字
func SearchLeavesNode(str string, params []*Node) (node []*Node, isSensitive bool) {
for _, node := range params {
if node.Word == str {
return node.Node, node.IsSensitive
}
}
return nil, false
}
//觀察樹結構
func WatchPrint(params []*Node) {
for _, watch := range params {
fmt.Print("watch.Word:", " ", watch.Word, " ", watch.Location, watch.IsSensitive, "-----", watch, "\n")
if watch.Node != nil {
WatchPrint(watch.Node)
}
}
}
本作品採用《CC 協議》,轉載必須註明作者和本文連結