SQL Server中檢索語句中Like的演算法實現 (轉)[@more@]

本文主要對字串匹配Like的演算法實現，在 SERVER中Like的匹配中主要有表現為對兩個萬用字元的處理，分別為“_”代表一個字元，“%”代表任意個字元。由於“%”在匹配過程中的位置任意性，所以完全匹配、萬用字元“_”匹配與此不應該一起參與匹配運算，所以我們決定在匹配前先將子串按“%”分段，進行逐段匹配，顯然降低了匹配演算法的難度，下面講解一下演算法的實現過程：（後附實現原始碼）

１. 確定第一個“%”的位置，

目的：確定匹配的方式

a> 是以“%”開頭則不需要左匹配（左匹配即要求子串的第一個字元必須與原串的第一個字元相一致）

b> 不是以“%”開頭則需要進行左匹配

２. 進行ＫＭＰ*演算法進行模式匹配

ＫＭＰ演算法不能完全地實現本文提到的匹配演算法，我們必須對此加以修正，主要是模式因子不適合，在這裡必須認為“_”的因子值為與前一個任意字元一致，所以“_”越多，匹配時的回退的可能性將越少，當然其匹配速度比教課書上的模式查詢要快。

３. 繼續下一個子串段的匹配工作。

下面提供演算法的原始碼，分為兩個，

１. _strat

為ＫＭＰ*模式串查詢函式，函式前有其使用說明。

２. _strlike

為Like的實現過程，內部用到_strat模式串匹配函式，實現的關鍵是對模式串的分段，來降低匹配的難度。

:namespace prefix = o ns = "urn:schemas--com::office" />

////////////////////////////////////////////////////////////////////////////////

// 函式名稱：int _strat(

// char * chText,

// char * chPattern,

// int nbegpos,

// int nlen

// bool bleft )

// 實現功能：模式串搜尋

// 對全域性變數的影響：無

// 引數說明：

// chText 原串

// chPattern 模式串

// nbegpos 起始位置

// nlen 原串相對長度

// bleft 是否左對齊(即第一個字元必須一致)

// 返回結果說明：實際位置

// 待一：回退數不得大於nlen - len(chPattern)，即回退後無法導致完全匹配

// 待最佳化二：計算模式串與字串搜尋程式碼合併，減少計算量

////////////////////////////////////////////////////////////////////////////////

int _strat(char * chText , char * chPattern , int nbegp/* = 0 */ , int nlen /* = -1 */ , bool bleft /* = false */)

{

int nPatternLen = _tcslen(chPattern);

int nTextLen = _tcslen(chText);

if(nlen >= 0)

{

if(nbegpos + nlen < nTextLen)

nTextLen = nbegpos + nlen;

}

if(nbegpos + nPatternLen > nTextLen || nPatternLen > MAXLEN_PATTERN)

return -1;

if(nPatternLen == 0)

return nbegpos;

else

{

int nGeneralLen = 0;

short chNext[MAXLEN_PATTERN] = { -1 };

int nPattPos = 0 , nNext = -1;

if(!bleft)

{

//生成模式回退值

while(nPattPos < nPatternLen)

{

if( nNext == -1 || chPattern[nPattPos] == '_' || chPattern[nPattPos] == chPattern[nNext])

{

nPattPos ++;

nNext ++;

chNext[nPattPos] = nNext;

}

else

nNext = chNext[nNext];

}

int nTextPos = nbegpos;

nPattPos = 0;

//進行模式匹配

while(nPattPos < nPatternLen && nTextPos < nTextLen)

{

if(nPattPos == -1 || chPattern[nPattPos] == '_' || chPattern[nPattPos] == chText[nTextPos])

{

nPattPos ++;

nTextPos ++;

}

else

{

//要求左對齊時，不允許回退（回退時肯定不是左對齊的）

if(bleft)

return -1;

else

nPattPos = chNext[nPattPos];

}

//判斷模式串是否已經完全被匹配，否則返回-1

if(nPattPos == nPatternLen)

return nTextPos - nPattPos;

else

return -1;

}

////////////////////////////////////////////////////////////////////////////////

// 函式名稱：bool _strlike(

// char * chText,

// char * chPattern,

// int nbegpos )

// 實現功能：兩個字串的匹配演算法，帶萬用字元

// 對全域性變數的影響：無

// 引數說明：

// chText 原字串

// chPattern 模式串

// nbegpos 起始位置

// 返回結果說明：

// =true 表示相似或一致

// =false 表示不相似或不一致

////////////////////////////////////////////////////////////////////////////////

bool _strlike(char * chText , char * chPattern , int nbegpos /* = 0 */)

{

bool bLeftMatch = true , bLast = false;

int nTextLen = _tcslen(chText);

//作最基礎的匹配，即存在模式串的情況下再作比較

if(_tcslen(chPattern) == 0)

if(_tcslen(chText) == 0)

return true;

else

return false;

do

{

char * chFirstPattern , * chSecondPattern;

if(chPattern[0] == '%')

{

do

{

chPattern ++;

}while(chPattern[0] == '%');

if(chPattern == NULL || _tcslen(chPattern) == 0)

return true;

bLeftMatch = false;

}

else

bLeftMatch = true;

//初始化模式串

chSecondPattern = _tcschr(chPattern , '%');

int nPatternLen;

if(chSecondPattern == NULL)

{

bLast = true;

nPatternLen = _tcslen(chPattern);

if(!bLeftMatch)

{

//若以%開頭，並且沒有剩餘模式串時，只要考慮右對齊匹配的方式即可(實際上也是左對齊)

if(nbegpos + nPatternLen <= nTextLen)

{

nbegpos = nTextLen - nPatternLen;

bLeftMatch = true;

}

else

return false;

}

else

if(nbegpos + nPatternLen != nTextLen)

return false;

}

else

{

//模式串不得長於原串

nPatternLen = chSecondPattern - chPattern;

if(nbegpos + nPatternLen > nTextLen)

return false;

}

//初始化模式串與修改剩餘串

chFirstPattern = new char[nPatternLen + 1];

memcpy(chFirstPattern , chPattern , nPatternLen);

chFirstPattern[nPatternLen] = 0;

chPattern = chSecondPattern;

int npos = _strat(chText , chFirstPattern , nbegpos , bLeftMatch ? nPatternLen : nTextLen - nbegpos , bLeftMatch);

delete chFirstPattern;

if(npos < 0)

{

return false;

}

else

{

//定下一查詢位置的起點

if(bLeftMatch)

{

if(npos != nbegpos)

return false;

}

else

nbegpos = npos;

if(bLast)

{

if(nPatternLen + npos == nTextLen)

return true;

else

return false;

}

else

nbegpos += nPatternLen;

}

}while(true);

}

SQL Server中檢索語句中Like的演算法實現 (轉)

相關文章