版本
PHP 操作 ElasticSearch 的索引,文件
- PHP版本: 7.1.9
- ElasticSearch版本:7.8.1
- ElasticSearch-PHP
根據 PHP 版本選擇對應的 ElasticSearch-PHP 版本
PHP 安裝 ElasticSearch-PHP
compose 安裝 ElasticSearch-PHP
composer require elasticsearch/elasticsearch
建立 ElasticSearch 客戶端
$params = [ 'host' => "127.0.0.1:9200" ]; //設定主機,並設定重連次數 $client = ClientBuilder::create()->setHosts($params)->setRetries(2)->build();
使用
PHP 操作索引(增刪改查)
建立索引
建立一個 users 索引,並新增欄位對映
$params = [
'host' => "127.0.0.1:9200"
];
//設定主機,並設定重連次數
$client = ClientBuilder::create()->setHosts($params)->setRetries(2)->build();
$indexCreateParams = [
'index' => 'users', //定義索引名字
'body' => [
'settings' => [
'number_of_shards' => 3, //設定索引分片數量
'number_of_replicas' => 2 //設定索引副本數量
],
'mappings' => [
'properties' => [
'name' => [
'type' => 'keyword',
'index' => true, //可以被索引
],
'age' => [
'type' => 'integer'
],
'mobile' => [
'type' => 'text',
'index' => 'true',
],
'email' => [
'type' => 'text',
'index' => 'true',
],
'address' => [
'type' => 'text',
'index' => true,
'analyzer' => 'ik_max_word' //使用ik分詞器進行分詞
],
'desc' => [
'type' => 'text',
'index' => true,
'analyzer' => 'ik_max_word'
]
]
]
]
];
$res = $client->indices()->create($indexCreateParams);
dd($res);
檢視索引
//獲取欄位對映
$res = $client->indices()->getMapping([
'index' => 'users'
]);
dd($res);
------------------------------返回結果-----------------------------------
array:1 [
"users" => array:1 [
"mappings" => array:1 [
"properties" => array:6 [
"address" => array:2 [
"type" => "text"
"analyzer" => "ik_max_word"
]
"age" => array:1 [
"type" => "integer"
]
"desc" => array:2 [
"type" => "text"
"analyzer" => "ik_max_word"
]
"email" => array:1 [
"type" => "text"
]
"mobile" => array:1 [
"type" => "text"
]
"name" => array:1 [
"type" => "keyword"
]
]
]
]
]
獲取索引設定資訊
//獲取設定資訊
$setting = $client->indices()->getSettings([
'index' => 'users'
]);
dd($setting);
---------------------------------返回結果------------------------------------
array:1 [
"users" => array:1 [
"settings" => array:1 [
"index" => array:6 [
"creation_date" => "1630484859730"
"number_of_shards" => "3"
"number_of_replicas" => "2"
"uuid" => "IbvJ_CgtT3monuB8IyEPTQ"
"version" => array:1 [
"created" => "7080199"
]
"provided_name" => "users"
]
]
]
]
更改索引
$params = [
'index' => 'users',
'body' => [
'settings' => [
'number_of_replicas' => 1,//更改索引的副本為 1
]
]
];
$res = $client->indices()->putSettings($params);
dd($res);
ElasticSearch 是不支援索引欄位型別變更的,原因是一個欄位的型別進行修改之後,ES 會重新建立對這個欄位的索引資訊,影響到ES對該欄位分詞方式,相關度,TF/IDF倒排建立等
刪除索引
$param = [
'index' => 'users'
];
$res = $client->indices()->delete($param);
dd($res);
PHP 操作文件
建立一條資料的文件
$params = [
'index' => 'users',
'id' => 1, //指定文件生成的id,如果不指定,則 es 自動生成
'body' => [
'name' => '張三',
'age' => 21,
'mobile' => '16621111111',
'email' => "16621111111@qq.com",
'address' => '北京-西二旗',
'desc' => '一個技術宅男,強迫症,愛好美食,電影'
]
];
$res = $client->index($params);
dd($res);
-------------------------返回結果----------------------------
array:8 [
"_index" => "users"
"_type" => "_doc"
"_id" => "1"
"_version" => 1
"result" => "created"
"_shards" => array:3 [
"total" => 3
"successful" => 1
"failed" => 0
]
"_seq_no" => 0
"_primary_term" => 1
]
建立多條資料的文件(bulk)
$data = [
['name' => '李四', 'age' => '22', 'mobile' => '16622222222','email' => '16622222222@qq.com', 'address' => '上海-閔行', 'desc' => '運動,動漫,遊戲,電影'],
['name' => '王五', 'age' => '22', 'mobile' => '16622222223','email' => '16622222223@qq.com', 'address' => '上海-浦東', 'desc' => '運動,日漫,電影,技術控'],
['name' => '趙六', 'age' => '20', 'mobile' => '16622222224','email' => '16622222224@qq.com', 'address' => '上海-長寧', 'desc' => '宅男,小說,遊戲,睡覺'],
['name' => '李華', 'age' => '23', 'mobile' => '16622222225','email' => '16622222225@qq.com', 'address' => '上海-寶山', 'desc' => '運動,小說,睡覺'],
];
foreach ($data as $k => $document) {
$params['body'][] = [
'index' => [
'_index' => 'users',
'_id' => $k+1
]
];
$params['body'][] = $document;
}
$res = $client->bulk($params);
dd($res);
獲取文件
Elasticsearch 提供實時獲取文件的方法。這意味著只要文件被索引且客戶端收到訊息確認後,你就可以立即在任何的分片中檢索文件。Get 操作透過 index/type/id
方式請求一個文件資訊:
$params = [
'index' => 'users',
'id' => 1
];
$res = $client->get($params);
dd($res);
-------------------------------返回結果--------------------------------
array:8 [
"_index" => "users"
"_type" => "_doc"
"_id" => "1"
"_version" => 2
"_seq_no" => 1
"_primary_term" => 1
"found" => true
"_source" => array:6 [
"name" => "李四"
"age" => "22"
"mobile" => "16622222222"
"email" => "16622222222@qq.com"
"address" => "上海-閔行"
"desc" => "運動,動漫,遊戲,電影"
]
]
更改文件
如果你要部分更新文件(如更改現存欄位
或 新增新欄位
),你可以在 body 引數中指定一個 doc 引數。這樣 doc 引數內的欄位會與現存欄位進行合併。
$params = [
'index' => 'users',
'id' => 1, //對id為1的記錄修改
'body' => [
'doc' => [
'age' => 19, //修改年齡為19
'mobile' => '16633333334' //修改手機號
],
],
];
$res = $client->update($params);
dd($res);
----------------------------再次查詢id=1記錄--------------------------------
array:8 [
"_index" => "users"
"_type" => "_doc"
"_id" => "1"
"_version" => 4
"_seq_no" => 3
"_primary_term" => 1
"found" => true
"_source" => array:6 [
"name" => "李四"
"age" => 19
"mobile" => "16633333334"
"email" => "16622222222@qq.com"
"address" => "上海-閔行"
"desc" => "運動,動漫,遊戲,電影"
]
]
刪除文件
可以透過id進行刪除
$params = [
'index' => 'users',
'id' => 1,
];
$res = $client->delete($params);
dd($res);
搜尋查詢
獲取所有資料
$params = [
'index' => 'users',
];
$res = $client->search($params);
dd($res);
--------------------------------返回結果-------------------------------
array:4 [
"took" => 0 //查詢花費時間,單位毫秒
"timed_out" => false //是否超時
"_shards" => array:4 [ //分片資訊
"total" => 3 //分片總數
"successful" => 3 //成功
"skipped" => 0 //忽略
"failed" => 0 //失敗
]
"hits" => array:3 [ //搜尋命中結果
"total" => array:2 [ //搜尋條件匹配的文件總數
"value" => 3 //總命中計數的值
"relation" => "eq" //計數規則 eq 標識計數準確,gte 標識計數不準確
]
"max_score" => 1.0 //匹配度分支
"hits" => array:3 [ //命中結果集合
0 => array:5 [
"_index" => "users"
"_type" => "_doc"
"_id" => "2"
"_score" => 1.0
"_source" => array:6 [
"name" => "王五"
"age" => "22"
"mobile" => "16622222223"
"email" => "16622222223@qq.com"
"address" => "上海-浦東"
"desc" => "運動,日漫,電影,技術控"
]
]
1 => array:5 [
"_index" => "users"
"_type" => "_doc"
"_id" => "3"
"_score" => 1.0
"_source" => array:6 [
"name" => "趙六"
"age" => "20"
"mobile" => "16622222224"
"email" => "16622222224@qq.com"
"address" => "上海-長寧"
"desc" => "宅男,小說,遊戲,睡覺"
]
]
2 => array:5 [
"_index" => "users"
"_type" => "_doc"
"_id" => "4"
"_score" => 1.0
"_source" => array:6 [
"name" => "李華"
"age" => "23"
"mobile" => "16622222225"
"email" => "16622222225@qq.com"
"address" => "上海-寶山"
"desc" => "運動,小說,睡覺"
]
]
]
]
]
匹配查詢
match
匹配型別查詢,Es 先把
查詢條件進行分詞
,然後依據分詞進行查詢
,多個詞條之間是 or 的關係
$params = [
'index' => 'users',
'body' => [
'query' => [
//match 會先把查詢條件進行分詞
'match' => [
//desc 的型別是text,進行分詞查詢,所以查詢的時候會先分詞成 運動,電影,技術控等片語,然後依據片語進行匹配
'desc' => '運動,日漫,電影,技術控'
]
]
]
];
$res = $client->search($params);
dd($res);
--------------------------返回結果----------------------------
array:4 [
"took" => 3
"timed_out" => false
"_shards" => array:4 [
"total" => 3
"successful" => 3
"skipped" => 0
"failed" => 0
]
"hits" => array:3 [
"total" => array:2 [
"value" => 2
"relation" => "eq"
]
"max_score" => 4.811739
"hits" => array:2 [
0 => array:5 [
"_index" => "users"
"_type" => "_doc"
"_id" => "2"
"_score" => 4.811739
"_source" => array:6 [
"name" => "王五"
"age" => "22"
"mobile" => "16622222223"
"email" => "16622222223@qq.com"
"address" => "上海-浦東"
"desc" => "運動,日漫,電影,技術控" //es在儲存的時候會進行分詞,此次查詢匹配上了運動這個片語
]
]
1 => array:5 [
"_index" => "users"
"_type" => "_doc"
"_id" => "4"
"_score" => 0.5504225
"_source" => array:6 [
"name" => "李華"
"age" => "23"
"mobile" => "16622222225"
"email" => "16622222225@qq.com"
"address" => "上海-寶山"
"desc" => "運動,小說,睡覺"
]
]
]
]
]
精準查詢
- term 查詢,精確的關鍵詞匹配查詢,
不對查詢條件進行分詞
$params = [
'index' => 'users',
'body' => [
'query' => [
'term' => [
//不會對 運動 進行分詞操作,而是直接使用 運動 進行匹配查詢
'desc' => '運動'
]
]
]
];
$res = $client->search($params);
dd($res);
--------------------------------結果---------------------------------
array:4 [
"took" => 0
"timed_out" => false
"_shards" => array:4 [
"total" => 3
"successful" => 3
"skipped" => 0
"failed" => 0
]
"hits" => array:3 [
"total" => array:2 [
"value" => 2
"relation" => "eq"
]
"max_score" => 0.5504225
"hits" => array:2 [
0 => array:5 [
"_index" => "users"
"_type" => "_doc"
"_id" => "4"
"_score" => 0.5504225
"_source" => array:6 [
"name" => "李華"
"age" => "23"
"mobile" => "16622222225"
"email" => "16622222225@qq.com"
"address" => "上海-寶山"
"desc" => "運動,小說,睡覺" //es儲存的倒排索引中有運動這個分詞片語,所以可以匹配上
]
]
1 => array:5 [
"_index" => "users"
"_type" => "_doc"
"_id" => "2"
"_score" => 0.42081726
"_source" => array:6 [
"name" => "王五"
"age" => "22"
"mobile" => "16622222223"
"email" => "16622222223@qq.com"
"address" => "上海-浦東"
"desc" => "運動,日漫,電影,技術控"
]
]
]
]
]
- 多關鍵字精確查詢
terms
查詢和 term 查詢一樣,但它允許你指定多值進行匹配。
如果這個欄位包含了指定值中的任何一個值,那麼這個文件滿足條件,類似於 mysql 的 in
$params = [
'index' => 'users',
'body' => [
'query' => [
'terms' => [
//不會對 運動 進行分詞操作,而是直接使用 運動 進行匹配查詢
'desc' => ['運動', '遊戲']
]
]
]
];
$res = $client->search($params);
dd($res);
---------------------------------結果-----------------------------
array:4 [
"took" => 2
"timed_out" => false
"_shards" => array:4 [
"total" => 3
"successful" => 3
"skipped" => 0
"failed" => 0
]
"hits" => array:3 [
"total" => array:2 [
"value" => 3
"relation" => "eq"
]
"max_score" => 1.0
"hits" => array:3 [
0 => array:5 [
"_index" => "users"
"_type" => "_doc"
"_id" => "2"
"_score" => 1.0
"_source" => array:6 [
"name" => "王五"
"age" => "22"
"mobile" => "16622222223"
"email" => "16622222223@qq.com"
"address" => "上海-浦東"
"desc" => "運動,日漫,電影,技術控" //包含 運動
]
]
1 => array:5 [
"_index" => "users"
"_type" => "_doc"
"_id" => "3"
"_score" => 1.0
"_source" => array:6 [
"name" => "趙六"
"age" => "20"
"mobile" => "16622222224"
"email" => "16622222224@qq.com"
"address" => "上海-長寧"
"desc" => "宅男,小說,遊戲,睡覺" //包含 遊戲
]
]
2 => array:5 [
"_index" => "users"
"_type" => "_doc"
"_id" => "4"
"_score" => 1.0
"_source" => array:6 [
"name" => "李華"
"age" => "23"
"mobile" => "16622222225"
"email" => "16622222225@qq.com"
"address" => "上海-寶山"
"desc" => "運動,小說,睡覺" //包含 運動
]
]
]
]
]
組合查詢
bool
把各種其它查詢透過must
(必須 )、must_not
(必須不)、should
(應該)的方式進行組合
語句 | 情況 |
---|---|
多個 must | 查詢 must 的交集 |
must + should | 查詢 must 交集,如果交集裡面包含 should 的部分,則增加打分 |
must + must_not | 查詢 must 交集,但是會排除 must_not 條件 |
多個 should | 查詢 should 的並集,也就是 a = 1 or a = 2 |
should + must_not | should 的並集,並排除 must_not 的部分 |
must + must_not + should | must 交集,並排除 must_not 部分,返回結果裡面有 should 部分,會增加打分 |
//查詢 desc 包含 小說,運動的片語,但是排除年齡是20的,排除之後的結果集中如果名字 = 李華,則增加 _score 分數
$params = [
'index' => 'users',
'body' => [
'query' => [
'bool' => [
'must' => [
[
'match' => [
'desc' => '小說'
]
],
[
'term' => [
'desc' => '運動'
]
]
],
'must_not' => [
[
'term' => [
'age' => '20'
]
]
],
'should' => [
[
'term' => [
'name' => '李華'
]
]
]
]
]
]
];
$res = $client->search($params);
dd($res);
------------------------------------返回結果---------------------------------
array:4 [
"took" => 8
"timed_out" => false
"_shards" => array:4 [
"total" => 3
"successful" => 3
"skipped" => 0
"failed" => 0
]
"hits" => array:3 [
"total" => array:2 [
"value" => 1
"relation" => "eq"
]
"max_score" => 2.081674
"hits" => array:1 [
0 => array:5 [
"_index" => "users"
"_type" => "_doc"
"_id" => "4"
"_score" => 2.081674 //沒有加 should 的分數 = 1.100845,加完 should 分值直接提高 1
"_source" => array:6 [
"name" => "李華"
"age" => "23" //年齡不等於20
"mobile" => "16622222225"
"email" => "16622222225@qq.com"
"address" => "上海-寶山"
"desc" => "運動,小說,睡覺" //包含小說,運動 片語
]
]
]
]
]
返回指定欄位
預設情況下,Elasticsearch 在搜尋的結果中,會把文件中儲存在 _source 的所有欄位都返回。
如果我們只想獲取其中的部分欄位,我們可以新增_source 的過濾
$params = [
'index' => 'users',
'_source' => ['name', 'desc'], //只返回 _source 儲存的 name,desc 欄位
'body' => [
'query' => [
'terms' => [
//不會對 運動 進行分詞操作,而是直接使用 運動 進行匹配查詢
'desc' => ['運動', '遊戲']
]
]
]
];
$res = $client->search($params);
dd($res);
------------------------------------結果-----------------------------------
array:4 [
"took" => 0
"timed_out" => false
"_shards" => array:4 [
"total" => 3
"successful" => 3
"skipped" => 0
"failed" => 0
]
"hits" => array:3 [
"total" => array:2 [
"value" => 3
"relation" => "eq"
]
"max_score" => 1.0
"hits" => array:3 [
0 => array:5 [
"_index" => "users"
"_type" => "_doc"
"_id" => "2"
"_score" => 1.0
"_source" => array:2 [
"name" => "王五"
"desc" => "運動,日漫,電影,技術控"
]
]
1 => array:5 [
"_index" => "users"
"_type" => "_doc"
"_id" => "3"
"_score" => 1.0
"_source" => array:2 [
"name" => "趙六"
"desc" => "宅男,小說,遊戲,睡覺"
]
]
2 => array:5 [
"_index" => "users"
"_type" => "_doc"
"_id" => "4"
"_score" => 1.0
"_source" => array:2 [
"name" => "李華"
"desc" => "運動,小說,睡覺"
]
]
]
]
]
範圍查詢
range 查詢找出那些落在指定區間內的數字或者時間。range 查詢允許以下字元
符號 | 說明 |
---|---|
gt | > |
gte | >= |
lt | < |
lte | <= |
//查詢年齡大於20小於30的資料
$params = [
'index' => 'users',
'body' => [
'query' => [
'range' => [
'age' => [
'gt' => 20,
'lt' => 30
]
]
]
]
];
$res = $client->search($params);
dd($res);
----------------------------------結果---------------------------------
array:4 [
"took" => 0
"timed_out" => false
"_shards" => array:4 [
"total" => 3
"successful" => 3
"skipped" => 0
"failed" => 0
]
"hits" => array:3 [
"total" => array:2 [
"value" => 2
"relation" => "eq"
]
"max_score" => 1.0
"hits" => array:2 [
0 => array:5 [
"_index" => "users"
"_type" => "_doc"
"_id" => "2"
"_score" => 1.0
"_source" => array:6 [
"name" => "王五"
"age" => "22"
"mobile" => "16622222223"
"email" => "16622222223@qq.com"
"address" => "上海-浦東"
"desc" => "運動,日漫,電影,技術控"
]
]
1 => array:5 [
"_index" => "users"
"_type" => "_doc"
"_id" => "4"
"_score" => 1.0
"_source" => array:6 [
"name" => "李華"
"age" => "23"
"mobile" => "16622222225"
"email" => "16622222225@qq.com"
"address" => "上海-寶山"
"desc" => "運動,小說,睡覺"
]
]
]
]
]
排序
sort 可以讓我們按照不同的欄位進行排序,並且透過 order 指定排序的方式。desc 降序,asc升序。
- 根據年齡進行排序
$params = [
'index' => 'users',
'body' => [
'query' => [
'match' => [
'desc' => '運動,小說'
]
],
'sort' => [
[
'age' => [
'order' => 'desc'
]
]
]
],
];
----------------------------------------結果--------------------------------
array:4 [
"took" => 9
"timed_out" => false
"_shards" => array:4 [
"total" => 3
"successful" => 3
"skipped" => 0
"failed" => 0
]
"hits" => array:3 [
"total" => array:2 [
"value" => 3
"relation" => "eq"
]
"max_score" => null
"hits" => array:3 [
0 => array:6 [
"_index" => "users"
"_type" => "_doc"
"_id" => "4"
"_score" => null
"_source" => array:6 [
"name" => "李華"
"age" => "23"
"mobile" => "16622222225"
"email" => "16622222225@qq.com"
"address" => "上海-寶山"
"desc" => "運動,小說,睡覺"
]
"sort" => array:1 [
0 => 23
]
]
1 => array:6 [
"_index" => "users"
"_type" => "_doc"
"_id" => "2"
"_score" => null
"_source" => array:6 [
"name" => "王五"
"age" => "22"
"mobile" => "16622222223"
"email" => "16622222223@qq.com"
"address" => "上海-浦東"
"desc" => "運動,日漫,電影,技術控"
]
"sort" => array:1 [
0 => 22
]
]
2 => array:6 [
"_index" => "users"
"_type" => "_doc"
"_id" => "3"
"_score" => null
"_source" => array:6 [
"name" => "趙六"
"age" => "20"
"mobile" => "16622222224"
"email" => "16622222224@qq.com"
"address" => "上海-長寧"
"desc" => "宅男,小說,遊戲,睡覺"
]
"sort" => array:1 [
0 => 20
]
]
]
]
]
- 多個欄位排序,先根據年齡進行排序,如果年齡相等,在根據
_score
分數排序
$params = [
'index' => 'users',
'body' => [
'query' => [
'match' => [
'desc' => '運動,小說'
]
],
'sort' => [
[
'age' => [
'order' => 'desc'
]
],
[
'_score' => [
'order' => "desc"
]
]
]
],
];
$res = $client->search($params);
dd($res);
--------------------------------結果-------------------------------------
array:4 [
"took" => 1
"timed_out" => false
"_shards" => array:4 [
"total" => 3
"successful" => 3
"skipped" => 0
"failed" => 0
]
"hits" => array:3 [
"total" => array:2 [
"value" => 4
"relation" => "eq"
]
"max_score" => null
"hits" => array:4 [
0 => array:6 [
"_index" => "users"
"_type" => "_doc"
"_id" => "4"
"_score" => 1.100845
"_source" => array:6 [
"name" => "李華"
"age" => "23"
"mobile" => "16622222225"
"email" => "16622222225@qq.com"
"address" => "上海-寶山"
"desc" => "運動,小說,睡覺"
]
"sort" => array:2 [
0 => 23
1 => 1.100845
]
]
1 => array:6 [
"_index" => "users"
"_type" => "_doc"
"_id" => "2"
"_score" => 0.42081726 //和韓梅梅的年齡相等,依據 分數進行排序
"_source" => array:6 [
"name" => "王五"
"age" => "22"
"mobile" => "16622222223"
"email" => "16622222223@qq.com"
"address" => "上海-浦東"
"desc" => "運動,日漫,電影,技術控"
]
"sort" => array:2 [
0 => 22
1 => 0.42081726
]
]
2 => array:6 [
"_index" => "users"
"_type" => "_doc"
"_id" => "1"
"_score" => 0.2876821
"_source" => array:6 [
"name" => "韓梅梅"
"age" => "22"
"mobile" => "16622222278"
"email" => "16622222278@qq.com"
"address" => "上海-閔行"
"desc" => "運動,美食,遊戲,電影"
]
"sort" => array:2 [
0 => 22
1 => 0.2876821
]
]
3 => array:6 [
"_index" => "users"
"_type" => "_doc"
"_id" => "3"
"_score" => 0.45665967
"_source" => array:6 [
"name" => "趙六"
"age" => "20"
"mobile" => "16622222224"
"email" => "16622222224@qq.com"
"address" => "上海-長寧"
"desc" => "宅男,小說,遊戲,睡覺"
]
"sort" => array:2 [
0 => 20
1 => 0.45665967
]
]
]
]
]
分頁查詢
from:當前頁的起始索引,預設從 0 開始。 from = (pageNum - 1) * size
size:每頁顯示多少條
$params = [
'index' => 'users',
'body' => [
'from' => 0, //從0開始
'size' => 1 //查詢1條資料
],
];
$res = $client->search($params);
dd($res);
---------------------------結果---------------------------------------
array:4 [
"took" => 7
"timed_out" => false
"_shards" => array:4 [
"total" => 3
"successful" => 3
"skipped" => 0
"failed" => 0
]
"hits" => array:3 [
"total" => array:2 [
"value" => 4
"relation" => "eq"
]
"max_score" => 1.0
"hits" => array:1 [
0 => array:5 [
"_index" => "users"
"_type" => "_doc"
"_id" => "2"
"_score" => 1.0
"_source" => array:6 [
"name" => "王五"
"age" => "22"
"mobile" => "16622222223"
"email" => "16622222223@qq.com"
"address" => "上海-浦東"
"desc" => "運動,日漫,電影,技術控"
]
]
]
]
]
PHP 聚合操作
ElasticSearch除了致力於搜尋之外,也提供了聚合實時分析資料的功能,它的實時性高,所有的計算結果都是即時返回。
聚合的兩個主要的概念,分別是 桶
和 指標
桶(Buckets): 簡單來說就是滿足特定條件的文件的集合。類似SQL中的GROUP BY語法
當聚合開始被執行,每個文件會決定符合哪個桶的條件,如果匹配到,文件將放入相應的桶並接著進行聚合操作
桶可以被巢狀在其他桶裡面,像是北京能放在中國桶裡,而中國桶能放在亞洲桶裡
指標(Metrics) : 對桶內的文件進行統計計算(如計算最大值、最小值、平均值等等)
桶能讓我們劃分文件到有意義的集合, 但是最終我們需要的是對這些桶內的文件進行一些指標的計算
指標通常是簡單的數學運算(像是min、max、avg、sum),而這些是透過當前桶中的文件的值來計算的,利用指標能讓你計算像平均薪資、最高出售價格、95%的查詢延遲這樣的資料
PHP 中聚合的格式大致為:
$params = [
'index' => 'employees',
'body' => [
//查詢 age 最大的是多少歲
'aggs' => [ //es 聚合操作關鍵字 aggs 或者 aggregations 都可以
'age_max' => [ //es 返回的欄位名稱,自己隨意定義
"max" => [ // 聚合的型別,關鍵詞
'field' => 'age' //對哪個欄位進行聚合
]
]
],
],
];
建立案列資料
- 建立
employees
索引$indexCreateParams = [ 'index' => 'employees', //建立一個員工表 'body' => [ 'mappings' => [ 'properties' => [ 'name' => [ 'type' => 'keyword' ], 'age' => [ 'type' => 'integer' ], 'gender' => [ 'type' => 'keyword' ], 'job' => [ 'type' => 'text', 'fields' => [ //設定欄位能關鍵詞搜尋及資料聚合. 'keyword' => [ 'type' => 'keyword', 'ignore_above' => 50 ] ] ], 'salary' => [ 'type' => 'integer' ] ] ] ] ]; $res = $client->indices()->create($indexCreateParams); dd($res);
- 新增文件資料
$data = [ ['name' => '張三', 'age' => 20, 'gender' => '男', 'job' => 'PHP', 'salary' => '1000'], ['name' => '李四', 'age' => 25, 'gender' => '男', 'job' => 'PHP', 'salary' => '2500'], ['name' => '王五', 'age' => 26, 'gender' => '男', 'job' => 'PHP', 'salary' => '2600'], ['name' => '趙六', 'age' => 29, 'gender' => '男', 'job' => 'PHP', 'salary' => '4500'], ['name' => '韓梅梅', 'age' => 24, 'gender' => '女', 'job' => 'UI', 'salary' => '3500'], ['name' => '李華', 'age' => 27, 'gender' => '男', 'job' => '產品', 'salary' => '3600'], ['name' => '李銳', 'age' => 25, 'gender' => '男', 'job' => '產品', 'salary' => '2800'], ['name' => '趙雲', 'age' => 28, 'gender' => '男', 'job' => '產品', 'salary' => '5000'], ['name' => '李雷', 'age' => 27, 'gender' => '男', 'job' => '測試', 'salary' => '3600'], ['name' => '李想', 'age' => 23, 'gender' => '男', 'job' => '測試', 'salary' => '2500'], ['name' => '趙雷', 'age' => 30, 'gender' => '男', 'job' => 'PHP', 'salary' => '5500'], ['name' => '張宇', 'age' => 27, 'gender' => '男', 'job' => 'JAVA', 'salary' => '4600'], ['name' => '楊建', 'age' => 24, 'gender' => '男', 'job' => 'JAVA', 'salary' => '3500'], ]; foreach ($data as $k => $document) { $params['body'][] = [ 'index' => [ '_index' => 'employees', '_id' => $k+1 ] ]; $params['body'][] = $document; } $res = $client->bulk($params); dd($res);
avg 平均值
計算員工的平均薪資
//查詢員工平均工資
$params = [
'index' => 'employees',
'body' => [
'aggs' => [
'avg_salary' => [
'avg' => [
'field' => 'salary'
]
]
],
'size' => 0,
],
];
$res = $client->search($params);
dd($res);
----------------------------------返回結果-------------------------------
array:5 [
"took" => 14
"timed_out" => false
"_shards" => array:4 [
"total" => 1
"successful" => 1
"skipped" => 0
"failed" => 0
]
"hits" => array:3 [
"total" => array:2 [
"value" => 13
"relation" => "eq"
]
"max_score" => null
"hits" => []
]
"aggregations" => array:1 [
"avg_salary" => array:1 [
"value" => 3476.9230769231
]
]
]
min,max 最大,最小
//查詢員工最高或最低的工資
$params = [
'index' => 'employees',
'body' => [
'aggs' => [
'max_salary' => [
'max' => [
'field' => 'salary'
]
]
],
'size' => 0,
],
];
$res = $client->search($params);
dd($res);
----------------------------------結果--------------------------------
array:5 [
"took" => 3
"timed_out" => false
"_shards" => array:4 [
"total" => 1
"successful" => 1
"skipped" => 0
"failed" => 0
]
"hits" => array:3 [
"total" => array:2 [
"value" => 13
"relation" => "eq"
]
"max_score" => null
"hits" => []
]
"aggregations" => array:1 [
"max_salary" => array:1 [
"value" => 5500.0
]
]
]
sum 求和
//查詢員工總的薪資
$params = [
'index' => 'employees',
'body' => [
'aggs' => [
'sum_salary' => [
'sum' => [
'field' => 'salary'
]
]
],
'size' => 0,
],
];
----------------------------------結果---------------------------------
array:5 [
"took" => 2
"timed_out" => false
"_shards" => array:4 [
"total" => 1
"successful" => 1
"skipped" => 0
"failed" => 0
]
"hits" => array:3 [
"total" => array:2 [
"value" => 13
"relation" => "eq"
]
"max_score" => null
"hits" => []
]
"aggregations" => array:1 [
"sum_salary" => array:1 [
"value" => 45200.0
]
]
]
cardinality 去重
cardinality 計算不重複的欄位有多少(相當於mysql中的distinct)
//cardinality 去重操作,然後在進行統計,統計工種數量
$params = [
'index' => 'employees',
'body' => [
'aggs' => [
'cardinality_info' => [
'cardinality' => [
'field' => 'job.keyword'
]
]
],
'size' => 0,
],
];
$res = $client->search($params);
dd($res);
------------------------------------結果---------------------------------
array:5 [
"took" => 1009
"timed_out" => false
"_shards" => array:4 [
"total" => 1
"successful" => 1
"skipped" => 0
"failed" => 0
]
"hits" => array:3 [
"total" => array:2 [
"value" => 13
"relation" => "eq"
]
"max_score" => null
"hits" => []
]
"aggregations" => array:1 [
"cardinality_info" => array:1 [
"value" => 5
]
]
]
status 計算各種聚合
透過 status,可以同時返回 count,max,min,sum,avg 的結果
//計算 salary 的總數,最小,最大,平均,總和
$params = [
'index' => 'employees',
'body' => [
'aggs' => [
'salary_stats' => [
'stats' => [
'field' => 'salary'
]
]
],
'size' => 0,
],
];
$res = $client->search($params);
dd($res);
-------------------------------------結果-----------------------------------
array:5 [
"took" => 1
"timed_out" => false
"_shards" => array:4 [
"total" => 1
"successful" => 1
"skipped" => 0
"failed" => 0
]
"hits" => array:3 [
"total" => array:2 [
"value" => 13
"relation" => "eq"
]
"max_score" => null
"hits" => []
]
"aggregations" => array:1 [
"salary_stats" => array:5 [
"count" => 13
"min" => 1000.0
"max" => 5500.0
"avg" => 3476.9230769231
"sum" => 45200.0
]
]
]
percentiles 百分比統計
對指定欄位的值按從小到大累計每個值對應的文件數的佔比,返回 指定佔比比例對應的值
。
$params = [
'index' => 'employees',
'body' => [
'aggs' => [
'percentiles' => [
'percentiles' => [
'field' => 'salary'
]
]
],
'size' => 0,
],
];
$res = $client->search($params);
dd($res);
------------------------------結果--------------------------------------
array:5 [
"took" => 2
"timed_out" => false
"_shards" => array:4 [
"total" => 1
"successful" => 1
"skipped" => 0
"failed" => 0
]
"hits" => array:3 [
"total" => array:2 [
"value" => 13
"relation" => "eq"
]
"max_score" => null
"hits" => []
]
"aggregations" => array:1 [
"percentiles" => array:1 [
"values" => array:7 [
"1.0" => 1000.0
"5.0" => 1225.0
"25.0" => 2575.0
"50.0" => 3500.0
"75.0" => 4525.0
"95.0" => 5425.0
"99.0" => 5500.0
]
]
]
]
上面的查詢可以理解為 1% 的人薪資在1000以下,5% 的人薪資在1225 以下……
薪資範圍 | 佔比 |
---|---|
0 ~ 1000 | 1% |
0 ~ 1225.0 | 5% |
0 ~ 2575.0 | 25% |
0 ~ 3500.0 | 50% |
0 ~ 4525.0 | 75% |
0 ~ 5425.0 | 95% |
0 ~ 5500.0 | 99% |
預設返回的是 1,5,25,50,75,95,99 對應的文件的值,可以使用 percents
指定某一處的分值
$params = [
'index' => 'employees',
'body' => [
'aggs' => [
'percentiles' => [
'percentiles' => [
'field' => 'salary',
"percents" => [46, 70, 80] //獲取佔比 46 的薪資範圍
]
]
],
'size' => 0,
],
];
$res = $client->search($params);
dd($res);
---------------------------結果------------------------------------
array:5 [
"took" => 10
"timed_out" => false
"_shards" => array:4 [
"total" => 1
"successful" => 1
"skipped" => 0
"failed" => 0
]
"hits" => array:3 [
"total" => array:2 [
"value" => 13
"relation" => "eq"
]
"max_score" => null
"hits" => []
]
"aggregations" => array:1 [
"percentiles" => array:1 [
"values" => array:3 [
"46.0" => 3500.0
"70.0" => 4140.0
"80.0" => 4590.0
]
]
]
]
percentile_ranks
使用 percentile_ranks 可以翻過來,計算某一值在總數中的佔比
統計年齡小於25和年齡小於30的文件的佔比
$params = [
'index' => 'employees',
'body' => [
'aggs' => [
'percentile_ranks_example' => [
'percentile_ranks' => [
'field' => 'age',
"values" => [22,25] //獲取年齡小於22 歲,小於25歲的佔比
]
]
],
'size' => 0,
],
];
$res = $client->search($params);
dd($res);
-------------------------------返回結果--------------------------------
array:5 [
"took" => 9
"timed_out" => false
"_shards" => array:4 [
"total" => 1
"successful" => 1
"skipped" => 0
"failed" => 0
]
"hits" => array:3 [
"total" => array:2 [
"value" => 13
"relation" => "eq"
]
"max_score" => null
"hits" => []
]
"aggregations" => array:1 [
"percentile_ranks_example" => array:1 [
"values" => array:2 [
"22.0" => 9.6153846153846
"25.0" => 38.461538461538
]
]
]
]
top_hits 取分桶後的前n條
獲取到每組前n條資料,相當於sql 中Top(group by 後取出前n條)。
//按照工作進行分組,在對分組後的文件按照年齡進行升序,然後透過top_hits取每組的第一條
$params = [
'index' => 'employees',
'body' => [
'aggs' => [
'job_info' => [ //自己隨意起名
'terms' => [ //這裡的 terms 表示對 job進行分組,相當於 (group by job)
'field' => 'job.keyword',
],
//對 job 的分組資料執行一下操作
'aggs' => [
'top_job' => [
'top_hits' => [
'sort' => [ //對分組資料進行年齡的升序
[
'age' => [
'order' => 'desc'
]
]
],
"size" => 1 //取每組的一條資料
]
]
]
],
],
'size' => 0,
],
];
$res = $client->search($params);
dd($res);
---------------------------------結果-------------------------------------
array:5 [
"took" => 15
"timed_out" => false
"_shards" => array:4 [
"total" => 1
"successful" => 1
"skipped" => 0
"failed" => 0
]
"hits" => array:3 [
"total" => array:2 [
"value" => 13
"relation" => "eq"
]
"max_score" => null
"hits" => []
]
"aggregations" => array:1 [
"job_info" => array:3 [
"doc_count_error_upper_bound" => 0
"sum_other_doc_count" => 0
"buckets" => array:5 [
0 => array:3 [
"key" => "PHP"
"doc_count" => 5
"top_job" => array:1 [
"hits" => array:3 [
"total" => array:2 [
"value" => 5
"relation" => "eq"
]
"max_score" => null
"hits" => array:1 [
0 => array:6 [
"_index" => "employees"
"_type" => "_doc"
"_id" => "11"
"_score" => null
"_source" => array:5 [
"name" => "趙雷"
"age" => 30
"gender" => "男"
"job" => "PHP"
"salary" => "5500"
]
"sort" => array:1 [
0 => 30
]
]
]
]
]
]
1 => array:3 [
"key" => "產品"
"doc_count" => 3
"top_job" => array:1 [
"hits" => array:3 [
"total" => array:2 [
"value" => 3
"relation" => "eq"
]
"max_score" => null
"hits" => array:1 [
0 => array:6 [
"_index" => "employees"
"_type" => "_doc"
"_id" => "8"
"_score" => null
"_source" => array:5 [
"name" => "趙雲"
"age" => 28
"gender" => "男"
"job" => "產品"
"salary" => "5000"
]
"sort" => array:1 [
0 => 28
]
]
]
]
]
]
2 => array:3 [
"key" => "JAVA"
"doc_count" => 2
"top_job" => array:1 [
"hits" => array:3 [
"total" => array:2 [
"value" => 2
"relation" => "eq"
]
"max_score" => null
"hits" => array:1 [
0 => array:6 [
"_index" => "employees"
"_type" => "_doc"
"_id" => "12"
"_score" => null
"_source" => array:5 [
"name" => "張宇"
"age" => 27
"gender" => "男"
"job" => "JAVA"
"salary" => "4600"
]
"sort" => array:1 [
0 => 27
]
]
]
]
]
]
3 => array:3 [
"key" => "測試"
"doc_count" => 2
"top_job" => array:1 [
"hits" => array:3 [
"total" => array:2 [
"value" => 2
"relation" => "eq"
]
"max_score" => null
"hits" => array:1 [
0 => array:6 [
"_index" => "employees"
"_type" => "_doc"
"_id" => "9"
"_score" => null
"_source" => array:5 [
"name" => "李雷"
"age" => 27
"gender" => "男"
"job" => "測試"
"salary" => "3600"
]
"sort" => array:1 [
0 => 27
]
]
]
]
]
]
4 => array:3 [
"key" => "UI"
"doc_count" => 1
"top_job" => array:1 [
"hits" => array:3 [
"total" => array:2 [
"value" => 1
"relation" => "eq"
]
"max_score" => null
"hits" => array:1 [
0 => array:6 [
"_index" => "employees"
"_type" => "_doc"
"_id" => "5"
"_score" => null
"_source" => array:5 [
"name" => "韓梅梅"
"age" => 24
"gender" => "女"
"job" => "UI"
"salary" => "3500"
]
"sort" => array:1 [
0 => 24
]
]
]
]
]
]
]
]
]
]
terms
terms 相當於 group by field
$params = [
'index' => 'employees',
'body' => [
'aggs' => [
'gorup_by_age' => [
'terms' => [ //使用 terms 對age進行分桶操作 (group by age)
'field' => 'age'
]
]
],
'size' => 0,
],
];
$res = $client->search($params);
dd($res);
---------------------------------結果----------------------------------------
array:5 [
"took" => 23
"timed_out" => false
"_shards" => array:4 [
"total" => 1
"successful" => 1
"skipped" => 0
"failed" => 0
]
"hits" => array:3 [
"total" => array:2 [
"value" => 13
"relation" => "eq"
]
"max_score" => null
"hits" => []
]
"aggregations" => array:1 [
"gorup_by_age" => array:3 [
"doc_count_error_upper_bound" => 0
"sum_other_doc_count" => 0
"buckets" => array:9 [
0 => array:2 [
"key" => 27
"doc_count" => 3
]
1 => array:2 [
"key" => 24
"doc_count" => 2
]
2 => array:2 [
"key" => 25
"doc_count" => 2
]
3 => array:2 [
"key" => 20
"doc_count" => 1
]
4 => array:2 [
"key" => 23
"doc_count" => 1
]
5 => array:2 [
"key" => 26
"doc_count" => 1
]
6 => array:2 [
"key" => 28
"doc_count" => 1
]
7 => array:2 [
"key" => 29
"doc_count" => 1
]
8 => array:2 [
"key" => 30
"doc_count" => 1
]
]
]
]
]
本作品採用《CC 協議》,轉載必須註明作者和本文連結