python 小指令碼 (實現 elasticsearch 匯出匯入)

肖軍發表於2020-07-23

好久沒來了,前段時間專案測試需要,需要把現網的es資料匯出匯入測試環境方便測試。就寫了一個小指令碼。拿出來,說不定以後有童鞋有需要呢。直接上乾貨了。。。
#匯出指令碼

import json
import os
import time
import requests


class exportEsData():
size = 10000
def __init__(self, url,index,type):
self.url = url+"/"+index+"/"+type+"/_search"
self.urlput=url+"/"+index+"/_settings"
self.index = index
self.type = type
def exportData(self):
print("export data begin...")
puthead={"Content-Type": "application/json"}
param={ "index.max_result_window" :"1000000"} #修改index max_result_window資料超過100萬,一般是根據實際情況,進行修改
pload=json.dumps(param)
requests.put(url=self.urlput,data=pload,headers=puthead)
begin = time.time()
try:
os.remove(self.index+"_"+self.type+".json")
except:
pass
msg = requests.get(self.url).text
print(msg)
obj = json.loads(msg)
num = obj["hits"]["total"]
print(num)
start = 0
end = num/self.size+1
while(start<end):
msg =requests.get(self.url+"?from="+str(start*self.size)+"&size="+str(self.size)).text
self.writeFile(msg)
start=start+1
print("export data end!!!\n\t total consuming time:"+str(time.time()-begin)+"s")
def writeFile(self,msg):
obj = json.loads(msg)
vals = obj["hits"]["hits"]
try:
f = open(self.index+"_"+self.type+".json","a")
for val in vals:
a = json.dumps(val["_source"],ensure_ascii=False)
f.write(a+"\n")
finally:
f.flush()
f.close()


if __name__ == '__main__':
exportEsData("http://ip:port","index","type").exportData() #ip,port,index,type根據實際情況替換

#匯入指令碼

# coding: utf-8

from elasticsearch import Elasticsearch
import json
import requests
from elasticsearch import helpers

class importEsData():
def __init__(self,url,index,type):
self.url = url
self.urlputindex=url+"/"+index
self.urlputmapping=url+"/"+index+"/"+type+"/_mapping"
self.index = index
self.type = type
def importData(self):
es=Elasticsearch(self.url)
requests.put(self.urlputindex) #建立index
param={mappings} #這個可以用 http://ip:port/index 獲取mappings 來替換mappings內容
pload=json.dumps(param)
requests.put(self.urlputmapping,pload) #建立mappings

actions=[] #收集效能資料集合
f = open(self.index+"_"+self.type+".json",encoding='gbk')

while 1:
line=f.readline()
if not line:
break
lined=json.loads(line.encode())
properties=lined["properties"] #properties根據實際資料進行替換
action = {
"_index": self.index,
"_type": self.type,
"_source": {
'properties': properties #properties根據實際資料進行替換
}
}
actions.append(action)
if len(actions)==10000:
helpers.bulk(es, actions)
del actions[0:len(actions)]
f.close()
helpers.bulk(es, actions)

if __name__ == '__main__':
importEsData("http://ip:port","index","type").importData() #ip,port,index,type根據實際情況替換

相關文章