匹配關鍵詞和敏感詞

不忘初心發表於2020-07-05

前一段時間被大佬問到如何實現一個敏感詞的匹配,所以寫了一個簡版的敏感詞匹配結構,還待後期優化!

程式碼地址:sensitive_words

github.com/tianye/sensitive_words

注:暫時不可以投放到生產環境使用

有喜歡的可以提交程式碼給我,或者拿走自己維護

希望各位大佬指點 謝謝, 關鍵詞為網上找到的所以不是很全,望大家海涵謝謝!

使用方法:

#####1.先構建敏感關鍵詞樹 (應為樹在初始化時候建立完成 後期可以直接使用,減少後期構建過程)

var tree = decision.CreateTree()

//初始化 構建 敏感詞數
func init() {
    strList := [] string{
        "考前答案","答案","前答",
    }

    for _, strItem := range strList {
        //構建敏感詞樹
        decision.BuildTrue(strItem, tree)
    }
}

####2.匹配敏感關鍵詞

    //等待匹配的資料
    str := "啊前答我是一個文字啦啦啦考前答案答案前答我也是文答案字啦"

    //匹配關鍵詞
    res, loc := decision.MatchingSensitiveWords(tree, str)

    fmt.Println("\n結果:", "是否存在敏感詞:", res, "敏感詞每個字所在位置:", loc)

輸出結果:

結果: 是否存在敏感詞: true 敏感詞每個字所在位置: [2 3 13 14 15 16 17 18 19 20 25 26]

使用例項: build_decision.go 檔案檢視,謝謝

原始碼實現方式:

package decision

import (
    "fmt"
    "strings"
)

type Node struct {
    Word     string
    Node     [] *Node
    Location int

    IsSensitive bool
}

type Tree struct {
    TreeNode [] *Node
}

//建立一個葉子節點
func CreateNode(word string, location int, isSensitive bool) *Node {
    return &Node{Word: word, Location: location, IsSensitive: isSensitive}
}

//建立一個樹
func CreateTree() *Tree {
    tree := &Tree{}

    return tree
}

//查詢一個Node節點
func SearchNode(str string, nodeList [] *Node) *Node {
    //查詢當前層級的所有node
    for _, v := range nodeList {
        //存在則直接返回當前node
        if v.Word == str {
            return v
        }
    }

    return nil
}

//插入一個子節點
func AppendNode(nowNode, newNode *Node) (*Node) {
    nowNode.Node = append(nowNode.Node, newNode)
    return newNode
}

//Build一個樹
func BuildTrue(str string, tree *Tree) *Tree {
    end := strings.Count(str, "") - 1
    var nowNode = &Node{}

    var i = 0
    for _, val := range str {
        i++

        isSensitive := false
        if i == end {
            isSensitive = true
        }

        newNode := CreateNode(string(val), i, isSensitive)

        if i == 1 {
            nowNode = SearchNode(newNode.Word, tree.TreeNode)
            if nowNode != nil {
                continue
            }

            tree.TreeNode = append(tree.TreeNode, newNode)
            nowNode = newNode

            continue
        }

        if nowNode.Node != nil {
            nowNode := SearchNode(newNode.Word, nowNode.Node)
            if nowNode != nil {
                continue
            }
        }

        nowNode = AppendNode(nowNode, newNode)
    }

    return tree
}

//匹配的敏感詞彙
func MatchingSensitiveWords(tree *Tree, str string) (isSensitive bool, allLocationStr []int) {
    node := tree.TreeNode
    isSensitive = false
    locationStr := make([]int, 0)
    allLocationStr = make([]int, 0)

    var i = 0
    for _, v := range str {
        i++
        node, isSensitive = SearchLeavesNode(string(v), node)

        //沒有下一個節點了 並且當前不是敏感詞
        if node == nil && isSensitive == false {
            //節點回到最初
            node = tree.TreeNode
            //當前子重新匹配
            node, isSensitive = SearchLeavesNode(string(v), node)
            //記錄新的本次匹配地址
            locationStr = []int{i}
        } else {
            //追加記錄位置
            locationStr = append(locationStr, i)
        }

        //如果是敏感詞則記錄到位置中
        if isSensitive == true {
            node = tree.TreeNode

            allLocationStr = append(allLocationStr, locationStr...)
            locationStr = []int{}
        }
    }

    //匹配到了關鍵詞
    if len(allLocationStr) > 0 {
        isSensitive = true
    }

    return isSensitive, allLocationStr
}

//搜尋層級關鍵字
func SearchLeavesNode(str string, params []*Node) (node []*Node, isSensitive bool) {
    for _, node := range params {
        if node.Word == str {
            return node.Node, node.IsSensitive
        }
    }

    return nil, false
}

//觀察樹結構
func WatchPrint(params []*Node) {
    for _, watch := range params {
        fmt.Print("watch.Word:", " ", watch.Word, " ", watch.Location, watch.IsSensitive, "-----", watch, "\n")
        if watch.Node != nil {
            WatchPrint(watch.Node)
        }
    }
}
本作品採用《CC 協議》,轉載必須註明作者和本文連結

相關文章