遍歷 ES 節點校驗分詞(qbit)

qbit發表於2022-06-22

前言

  • 技術棧

    Elasticsearch 7.17.2
    python 3.8
    httpx  0.22.0
    loguru 0.6.0
  • hao 分詞器:https://github.com/tenlee2012...
  • 有時更新 ES 分詞器或遠端詞典後,不確定每個節點是否都已更新到位,沒找到直接的命令來校驗,故寫了一份 Python 指令碼來做校驗
  • 原理是利用 index.routing.allocation.include._ip 將索引分配到具體某個節點的分片上
  • 程式碼建立了 test_{nodeName} 的索引,測試完後手動刪除

    DELETE test_*

程式碼

# encoding: utf8
# author: qbit
# date: 202-06-16
# summary:  遍歷 ES 資料節點校驗分詞結果
import pprint
import httpx
from loguru import logger

coordnode = 'http://192.168.2.67:9200'      # ES 協調節點地址
esuser = 'elastic'                          # ES 叢集賬號
espwd = 'xxxx'              # ES 叢集密碼
analyzer = "hao_index_mode"                 # 分詞器
intext = "燕雀安知鴻鵠之志"                    # 分詞文字 
outtext = "燕雀;安;知;鴻鵠之志;鴻鵠"           # 分詞結果
 
def GetNodeList():
    r""" 獲取 ES 叢集節點列表 """
    url = f'{coordnode}/_cat/nodes?v=true&h=name,ip,master,node.role&s=name&format=json'
    r = httpx.get(url, auth=(esuser, espwd))
    result = r.json()
    for dic in result:
        logger.debug(dic)

    return result

def CheckOneNodeAnalyzer(nodeDict: dict, expected: str):
    r""" 在某個節點建立索引,並測試分詞 """
    nodeName = nodeDict['name']
    nodeIP = nodeDict['ip']
    indexName = f"test_{nodeName}"
    url = f"{coordnode}/{indexName}"
    logger.info(f"{nodeName}, {nodeIP}, {indexName}")
    dic = {
        "settings": {
            "index": {
                "number_of_shards": 1,
                "number_of_replicas": 0,
                "routing.allocation.include._ip": nodeIP
            }
        }
    }
    r = httpx.put(url, auth=(esuser, espwd), json=dic)      # 建立索引
    logger.debug(r)

    url = f"{coordnode}/{indexName}/_analyze"
    dic = {
        "analyzer": analyzer,
        "text": intext
    }
    r = httpx.post(url, auth=(esuser, espwd), json=dic)      # 驗證分詞
    logger.debug(r)
    tokenList = list()
    for dic in r.json()['tokens']:
        # logger.debug(dic)
        tokenList.append(dic['token'])
    tokenLine = ';'.join(tokenList)
    logger.info(tokenLine)
    if tokenLine == expected:
        return ['ok', nodeName, nodeIP, tokenLine]
    else:
        return ['no', nodeName, nodeIP, tokenLine]

if __name__ == '__main__':
    nodeList = GetNodeList()
    okList = list()
    noList = list()
    for node in nodeList:
        if 'd' in node['node.role']:   # 資料節點
            result = CheckOneNodeAnalyzer(node, outtext)
            if result[0] == 'ok':
                okList.append(result)
            else:
                noList.append(result)
            print('------')
    logger.info(f"okList size: {len(okList)}")
    pprint.pprint(okList)
    logger.info(f"noList size: {len(noList)}")
    pprint.pprint(noList)
qbit snap

相關文章