- 公司新專案素材編輯功能需要提供敏感詞過濾功能,於是上網查了下,很多都是基於trie演算法的,但基於PHP寫的卻少有,或者部分存在bug。所以,自己在別人的基礎上進行了完善。
敏感詞過濾演算法實現
class TreeMap
{
public $data; // 節點字元
public $children = []; // 存放子節點引用(因為有任意個子節點,所以靠陣列來儲存)
public $isEndingChar = false; // 是否是字串結束字元
public function __construct($data)
{
$this->data = $data;
}
}
class TrieTree
{
/**
* 敏感詞陣列
*
* @var array
* @author qpf
*/
public $trieTreeMap = array();
public function __construct()
{
$this->trieTreeMap = new TreeMap('/');
}
/**
* 獲取敏感詞Map
*
* @return array
* @author qpf
*/
public function getTreeMap()
{
return $this->trieTreeMap;
}
/**
* 新增敏感詞
*
* @param array $txtWords
* @author qpf
*/
public function addWords(array $wordsList)
{
foreach ($wordsList as $words) {
$trieTreeMap = $this->trieTreeMap;
$len = mb_strlen($words);
for ($i = 0; $i < $len; $i++) {
$word = mb_substr($words, $i, 1);
if(!isset($trieTreeMap->children[$word])){
$newNode = new TreeMap($word);
$trieTreeMap->children[$word] = $newNode;
}
$trieTreeMap = $trieTreeMap->children[$word];
}
$trieTreeMap->isEndingChar = true;
}
}
/**
* 查詢對應敏感詞
*
* @param string $txt
* @return array
* @author qpf
*/
public function search($txt)
{
$wordsList = array();
$txtLength = mb_strlen($txt);
for ($i = 0; $i < $txtLength; $i++) {
$wordLength = $this->checkWord($txt, $i, $txtLength);
if($wordLength > 0) {
echo $wordLength;
$words = mb_substr($txt, $i, $wordLength);
$wordsList[] = $words;
$i += $wordLength - 1;
}
}
return $wordsList;
}
/**
* 敏感詞檢測
*
* @param $txt
* @param $beginIndex
* @param $length
* @return int
*/
private function checkWord($txt, $beginIndex, $length)
{
$flag = false;
$wordLength = 0;
$trieTree = $this->trieTreeMap; //獲取敏感詞樹
for ($i = $beginIndex; $i < $length; $i++) {
$word = mb_substr($txt, $i, 1); //檢驗單個字
if (!isset($trieTree->children[$word])) { //如果樹中不存在,結束
break;
}
//如果存在
$wordLength++;
$trieTree = $trieTree->children[$word];
if ($trieTree->isEndingChar === true) {
$flag = true;
break;
}
}
if($beginIndex > 0) {
$flag || $wordLength = 0; //如果$flag == false 賦值$wordLenth為0
}
return $wordLength;
}
}
$data = ['白粉', '白粉人', '白粉人嫩','不該大'];
$wordObj = new TrieTree();
$wordObj->addWords($data);
$txt = "白粉啊,白粉人,我不該大啊";
$words = $wordObj->search($txt);
var_dump($words);die;
複製程式碼