前言
技術棧
Elasticsearch 7.17.2 python 3.8 httpx 0.22.0 loguru 0.6.0
hao
分詞器:https://github.com/tenlee2012...- 有時更新
ES
分詞器或遠端詞典後,不確定每個節點是否都已更新到位,沒找到直接的命令來校驗,故寫了一份Python
指令碼來做校驗 - 原理是利用 index.routing.allocation.include._ip 將索引分配到具體某個節點的分片上
程式碼建立了
test_{nodeName}
的索引,測試完後手動刪除DELETE test_*
程式碼
# encoding: utf8
# author: qbit
# date: 202-06-16
# summary: 遍歷 ES 資料節點校驗分詞結果
import pprint
import httpx
from loguru import logger
coordnode = 'http://192.168.2.67:9200' # ES 協調節點地址
esuser = 'elastic' # ES 叢集賬號
espwd = 'xxxx' # ES 叢集密碼
analyzer = "hao_index_mode" # 分詞器
intext = "燕雀安知鴻鵠之志" # 分詞文字
outtext = "燕雀;安;知;鴻鵠之志;鴻鵠" # 分詞結果
def GetNodeList():
r""" 獲取 ES 叢集節點列表 """
url = f'{coordnode}/_cat/nodes?v=true&h=name,ip,master,node.role&s=name&format=json'
r = httpx.get(url, auth=(esuser, espwd))
result = r.json()
for dic in result:
logger.debug(dic)
return result
def CheckOneNodeAnalyzer(nodeDict: dict, expected: str):
r""" 在某個節點建立索引,並測試分詞 """
nodeName = nodeDict['name']
nodeIP = nodeDict['ip']
indexName = f"test_{nodeName}"
url = f"{coordnode}/{indexName}"
logger.info(f"{nodeName}, {nodeIP}, {indexName}")
dic = {
"settings": {
"index": {
"number_of_shards": 1,
"number_of_replicas": 0,
"routing.allocation.include._ip": nodeIP
}
}
}
r = httpx.put(url, auth=(esuser, espwd), json=dic) # 建立索引
logger.debug(r)
url = f"{coordnode}/{indexName}/_analyze"
dic = {
"analyzer": analyzer,
"text": intext
}
r = httpx.post(url, auth=(esuser, espwd), json=dic) # 驗證分詞
logger.debug(r)
tokenList = list()
for dic in r.json()['tokens']:
# logger.debug(dic)
tokenList.append(dic['token'])
tokenLine = ';'.join(tokenList)
logger.info(tokenLine)
if tokenLine == expected:
return ['ok', nodeName, nodeIP, tokenLine]
else:
return ['no', nodeName, nodeIP, tokenLine]
if __name__ == '__main__':
nodeList = GetNodeList()
okList = list()
noList = list()
for node in nodeList:
if 'd' in node['node.role']: # 資料節點
result = CheckOneNodeAnalyzer(node, outtext)
if result[0] == 'ok':
okList.append(result)
else:
noList.append(result)
print('------')
logger.info(f"okList size: {len(okList)}")
pprint.pprint(okList)
logger.info(f"noList size: {len(noList)}")
pprint.pprint(noList)
qbit snap