再次強調,我安裝的Elasticsearch 版本是 7.8.0 ,C# 操作 Elasticsearch 的驅動有 NEST、Elasticsearch.net 、PlainElastic.Net 等,當然要注意版本是否匹配,
PlainElastic.Net 是比較舊的操作 Elasticsearch 的方式 ,但是看到不少園友用這個,所以本篇也用 PlainElastic.Net ,稍後有時間會給出 NEST 、 Elasticsearch.net 的 Demo
PlainElastic.Net 的參考文件、使用方法可以看 https://github.com/Yegoroff/PlainElastic.Net
完整的 Demo 見 https://github.com/fhrddx/ES_Query ,前端 UI 框架是 aceadmin ,見 http://ace.jeka.by/
先看一下效果圖
1、首先是引用 PlainElastic.Net ,然後封裝 ElasticSearchHelper
public class ElasticSearchHelper { public static readonly ElasticSearchHelper Intance = new ElasticSearchHelper(); private ElasticConnection Client; private ElasticSearchHelper() { Client = new ElasticConnection("localhost", 9200); } }
2、在 ElasticSearchHelper 設定欄位mapping 並生成索引庫 db_student_test1
public bool BuildStudentMapping() { var mapping = new { mappings = new { properties = new { name = new { type = "text", analyzer = "standard" }, school = new { type = "text", analyzer = "ik_max_word" }, desc = new { type = "text", analyzer = "ik_max_word" }, @class = new { type = "integer" }, chinese = new { type = "integer" }, english = new { type = "integer" }, math = new { type = "integer" } } } }; string jsonDocument = new JsonNetSerializer().Serialize(mapping); OperationResult operationResult = Client.Put("db_student_test1", jsonDocument); CommandResult result = new JsonNetSerializer().ToCommandResult(operationResult.Result); if (result?.acknowledged != null) return result.acknowledged; return false; }
3、在 ElasticSearchHelper中,給與索引庫 db_student_test1 一個別名 student_test1
public bool Alias() { OperationResult operationResult = Client.Put("db_student_test1/_alias/student_test1"); CommandResult result = new JsonNetSerializer().ToCommandResult(operationResult.Result); if (result?.acknowledged != null) return result.acknowledged; return false; }
4、建立索引文件
public IndexResult CreateIndex(string indexName, string id, string jsonDocument) { var serializer = new JsonNetSerializer(); //注意ES版本是8.7.0,type只能是預設的、唯一的 _doc string cmd = new IndexCommand(indexName, "_doc", id); Client.Timeout = 30000; OperationResult result = Client.Put(cmd, jsonDocument); var indexResult = serializer.ToIndexResult(result.Result); return indexResult; } public IndexResult CreateIndex(string indexName, string id, object document) { var serializer = new JsonNetSerializer(); var jsonDocument = serializer.Serialize(document); return CreateIndex(indexName, id, jsonDocument); }
5、隨機生成測試資料,網上下載一篇txt的長篇小說,隨機擷取欄位,然後寫入Elasticsearch
測試資料如下:
public class TestData { public static string xing = @"王李張劉陳楊趙黃周吳徐孫胡朱高林何郭馬羅樑宋鄭謝韓唐馮於董蕭程曹袁鄧許傅沈曾彭呂蘇盧蔣蔡賈丁魏薛葉閻餘潘杜戴夏鍾汪田任姜範方石姚譚廖鄒熊金陸郝孔白崔康毛邱秦江史顧侯邵孟龍萬段雷錢湯尹黎易常武喬賀賴龔文龐樊蘭殷施陶洪翟安顏倪嚴牛溫蘆季俞章魯葛伍韋申尤畢聶叢焦向柳邢路嶽齊沿梅莫莊辛管祝左塗谷祁時舒耿牟卜肖詹關苗凌費紀靳盛童歐甄項曲成遊陽裴席衛查屈鮑位覃霍翁隋植甘景薄單包司柏寧柯阮桂閔歐陽解強柴華車冉房邊淨陰閆佘練駱付代麥容悲初瞿褚班全名井米談宮虞奚佟符蒲穆漆卞東儲黨從艾苻厲岑燕吉冷仇伊首鬱婁楚鄺歷狄簡胥連帥封危支原滕苑信索慄官沙池藏師國鞏刁茅杭巫居竇皮戈麻饒習巴曠宗荊榮孝藺廉員西寇刃見底區酈卓琚續樸蒙敖花應喻冀尚頓菅嵇雒弓忻權諶卿扈海冼倫鹿宿山桑裘達麼智宣尉遲東方么郎農戚屠樓步鞠仲尉藍招攀欒籍壽鄔莢稅逄加勾由福緱欽鮮于但邸逢況鄢古樂斯鈕蓋旦毅邰哈鄂商英遲仝亓玄黑騰晏禹諸苟湛殳亢奉佔聞粟種匡賓勞申屠伏過水真宇巢計羌相辜展醜銀豐矯上昝繩臧舍郅布糜烏衣來恆那滿門司徒皋旺公言藤釋堯繆幹闞靖渠契晉六束良鶚貝邴沃竺揚勵歸上官荃焉多都果郜隆諸葛令狐慕禮祖翦力朗撖修呼富明站虢冶茹禚笪雲肇平弋候爾姬寶暢冒邾延禪浦敬頡南巍補"; public static string name = @"帆棟祜權錕坤允騫諦初盛炳初澤榮喆恆鶴禮華帝宇中鑫彬槐禧允翱鵬皓中偉炳皓槐帆芃欣鑫振杰誠錕濰吉軒福宇初柏芃翰浩峰延帆欣帆奇鬱爍卓仕吉帝濰釗傑鑫星諦鑫銘鋒沛芃澤祿勇峰欣延鶴鬱信俠翰邦寅軒澤哲佑福翱恆文楓澄棟翰中震杞斌凱錦升逸延騰諦權盛弘爍俊強博祿中欣權浩陽裕延盛平暢沛吉強駿起華炳騰柏佑暢傑凱鴻斌加振晨沛祥祜盛濡彬成弘天福錦穎嘉茜芸格美漫慧漫妍鈺琪玥沛玥鑫潔嵐採曼珍雪昕婷碧弦雪潔馨昕香弦帆芳菲楠俊月珊函蔚帆靈靈蓮優蔚碧文蕾婭林婧妮婷薇馨淑惠杉美梔怡薇琪曦雲漫瑤韻楠妮穎妮杉媛詩芳菲錦錦蕾芸歡珍嵐鶴莉優雲舒舒璇慧依菡雅妍楠雅慧靈陽漫珠帆媛可雅欣鑫妮雯霞柔芳芝琳彩冰林媛柔初倩玉冰薇潔妍潔璐採彩穎呈雪雲歡琪璟紫靜蓓薇歡薇柔晨萱雲歆鑫月陽婭媛露露琳"; public static string[] school = new string[] { "中山大學", "暨南大學", "汕頭大學", "華南理工大學", "華南農業大學", "廣東海洋大學", "廣州醫科大學", "廣州中醫藥大學", "華南師範大學", "韶關學院", "深圳大學", "廣東財經大學", "廣東工業大學", "東莞理工學院", "南方科技大學", "香港中文大學", "廣州商學院", "上海交通大學", "同濟大學", "復旦大學", "上海大學", "上海財經大學", "北京大學", "清華大學", "北京郵電大學", "中國人民大學", "北京理工大學" }; public static string content = @"第一回 甄士隱夢幻識通靈 賈雨村風塵懷閨秀() 此開卷第一回也.作者自雲:因曾歷過一番夢幻之後,故將真事隱去,而借”通靈”之說,撰此《石頭記》一書也.故曰”甄士隱”云云.但書所記何事何人?自又云:“今風塵碌碌,一事無成,忽念及當日所有之女子,一一細考較去,覺其行止見識,皆出於我之上.何我堂堂鬚眉,誠不若彼裙釵哉?實愧則有餘,悔又無益之大無可如何之日也!當此,則自欲將已往所賴天恩祖德,錦衣紈絝之時,飫甘饜肥之日,背父兄教育之恩,負師友規談之德,以至今日一技無成,半生潦倒之罪,編述一集,以告天下人:我之罪固不免,然閨閣本自歷歷有人,萬不可因我之不肖,自護己短,一併使其泯滅也.雖今日之茅椽蓬牖,瓦灶繩床,其晨夕風露,階柳庭花,亦未有妨我之襟懷筆墨者.雖我未學,下筆無,又何妨用假語村言,敷演出一段故事來,亦可使閨閣昭傳,復可悅世之目,破人愁悶,不亦宜乎?”故曰”賈雨村”云云. ...... ......
由來同一夢,休笑世人痴!"; }
隨機生成學生記錄的程式碼如下:
int xing_length = TestData.xing.Length; int name_length = TestData.name.Length; int school_length = TestData.school.Length; int content_length = TestData.content.Length; ParallelOptions _po = new ParallelOptions(); _po.MaxDegreeOfParallelism = 4; Parallel.For(0, 100000000, _po, c => { Random r = new Random(c); Random r2 = new Random(); try { string desc = TestData.content.Substring((r.Next(0, content_length - 700)), 20).Trim().Replace("/r/n", string.Empty); Student model = new Student() { name = TestData.xing[r.Next(0, xing_length)].ToString() + TestData.name.Substring(r.Next(0, name_length / 2) * 2, 2), school = TestData.school[r.Next(0, school_length)], chinese = r.Next(25, 80) + r2.Next(0, 20), math = r.Next(15, 60) + r2.Next(0, 40), english = r.Next(21, 70) + r2.Next(0, 30), @class = c, desc = desc + TestData.school[r2.Next(0, school_length)] }; ElasticSearchHelper.Intance.CreateIndex("db_student_test1", Guid.NewGuid().ToString(), model); } catch (Exception ex) { Console.Write(ex.ToString()); } });
我總共跑了大概是5千萬條資料,可以開啟 head 外掛或者是 Kibana 看到資料總共有多少
Kibana 如下
6、單個詞語查詢,例如查詢滿足以下條件的文件:(1) desc 包含 “黛玉” ;(2)chinese、math、english 都大於90;(3)分頁,取前10條記錄;(4)關鍵詞高亮;(5)排序按照語文、數學、英語倒序
controller 程式碼如下
public ActionResult Index(string key = "黛玉") { Stopwatch sw = new Stopwatch(); sw.Restart(); var model = ElasticSearchHelper.Intance.Term(key.Trim(), 0, 10); sw.Stop(); ViewBag.Message = $"共耗時{sw.ElapsedMilliseconds}毫秒"; return View(model); }
ElasticsearchHelper 程式碼如下
public ElasticsearchResult<Student> Term(string key, int from = 0, int size = 10) { if (string.IsNullOrEmpty(key)) return null; key = key.Trim(); string cmd = new SearchCommand("student_test1", "_doc"); var query = new QueryBuilder<Student>().Query( b => b.Bool(m => m.Must(t => t.Term(d => d.Field("desc").Value(key)) .Range(d => d.Field("chinese").From("90").To("100")) .Range(d => d.Field("math").Gt("90")) .Range(d => d.Field("english").Gt("90")) ) ) ) .From(from) .Size(size) .Sort(s => s.Field("chinese", SortDirection.desc).Field("math", SortDirection.desc).Field("english", SortDirection.desc)) .Highlight(h => h .PreTags("<span class=\"label label-sm label-danger\">") .PostTags("</span>") .Fields( f => f.FieldName("desc").Order(HighlightOrder.score) ) ) .Build(); string result = Client.Post(cmd, query); var list = new JsonNetSerializer().Deserialize<ElasticsearchResult<Student>>(result); return list; }
效果是
7、語句匹配查詢,關鍵詞是“黛玉”,當然是可以查出來,但是如果使用者知道紅樓夢有個情節,是關於大觀園裡眾人舉辦螃蟹宴,作詩玩樂的,想把相關文段查詢出來,這時使用者輸入的關鍵詞是 “寶玉黛玉螃蟹宴作詩” ,這時候會查詢出什麼呢?
首先改一下邏輯,controller 程式碼為
public ActionResult Query(string key = "寶玉黛玉螃蟹宴作詩") { Stopwatch sw = new Stopwatch(); sw.Restart(); var model = ElasticSearchHelper.Intance.Query(key.Trim(), 0, 10); sw.Stop(); ViewBag.Message = $"共耗時{sw.ElapsedMilliseconds}毫秒"; return View("~/Views/ES/Index.cshtml", model); }
ElasticsearchHelper 程式碼如下
public ElasticsearchResult<Student> Query(string key, int from = 0, int size = 10) { if (string.IsNullOrEmpty(key)) return null; key = key.Trim(); string cmd = new SearchCommand("student_test1", "_doc"); var query = new QueryBuilder<Student>().Query( b => b.Bool(m => m.Must(t => //其實也是可以用 t.match() 的,可以試一下 t.QueryString(d => d.DefaultField("desc").Query(key)) .Range(d => d.Field("chinese").From("90").To("100")) .Range(d => d.Field("math").Gt("90")) .Range(d => d.Field("english").Gt("90")) ) ) ) .From(from) .Size(size) //這裡不再按照分數來排序,這時ES會根據關鍵詞匹配度來排序,出現在最前的,應該是最匹配的 //.Sort(s => s.Field("chinese", SortDirection.desc).Field("math", SortDirection.desc).Field("english", SortDirection.desc)) .Highlight(h => h .PreTags("<span class=\"label label-sm label-danger\">") .PostTags("</span>") .Fields( f => f.FieldName("desc").Order(HighlightOrder.score) ) ) .Build(); string result = Client.Post(cmd, query); var list = new JsonNetSerializer().Deserialize<ElasticsearchResult<Student>>(result); return list; }
查詢結果是
可以看到,ES 會把 “寶玉黛玉螃蟹宴作詩” 進行分詞,然後進行文字匹配。
PlainElastic.Net 是比較舊的版本了,不是很適合ES版本 7.8.0 可以用 NEST 或者 Elasticsearch.net ,我這裡是參考一些園友,然後寫個Demo出來測試一下。
(未完,ES 聚合統計的,待續)