PHP 操作 ElasticSearch7.8.1

善良的死神發表於2021-09-03

版本

PHP 操作 ElasticSearch 的索引,文件

根據 PHP 版本選擇對應的 ElasticSearch-PHP 版本
ElasticSearch7.8.1 Docker 安裝,介紹與PHP操作

PHP 安裝 ElasticSearch-PHP

  1. compose 安裝 ElasticSearch-PHP

     composer require elasticsearch/elasticsearch
  2. 建立 ElasticSearch 客戶端

         $params = [
             'host' => "127.0.0.1:9200"
         ];
         //設定主機,並設定重連次數
         $client = ClientBuilder::create()->setHosts($params)->setRetries(2)->build();

使用

PHP 操作索引(增刪改查)

建立索引

建立一個 users 索引,並新增欄位對映

        $params = [
            'host' => "127.0.0.1:9200"
        ];
        //設定主機,並設定重連次數
        $client = ClientBuilder::create()->setHosts($params)->setRetries(2)->build();
        $indexCreateParams = [
            'index' => 'users', //定義索引名字
            'body' => [
                'settings' => [
                    'number_of_shards' => 3, //設定索引分片數量
                    'number_of_replicas' => 2 //設定索引副本數量
                ],
                'mappings' => [
                    'properties' => [
                        'name' => [
                            'type' => 'keyword',
                            'index' => true, //可以被索引
                        ],
                        'age' => [
                            'type' => 'integer'
                        ],
                        'mobile' => [
                            'type' => 'text',
                            'index' => 'true',
                        ],
                        'email' => [
                            'type' => 'text',
                            'index' => 'true',
                        ],
                        'address' => [
                            'type' => 'text',
                            'index' => true,
                            'analyzer' => 'ik_max_word' //使用ik分詞器進行分詞
                        ],
                        'desc' => [
                            'type' => 'text',
                            'index' => true,
                            'analyzer' => 'ik_max_word'
                        ]
                    ]
                ]
            ]
        ];
        $res = $client->indices()->create($indexCreateParams);
        dd($res);

檢視索引

        //獲取欄位對映
        $res = $client->indices()->getMapping([
            'index' => 'users'
        ]);
        dd($res);

------------------------------返回結果-----------------------------------
        array:1 [
          "users" => array:1 [
            "mappings" => array:1 [
              "properties" => array:6 [
                "address" => array:2 [
                  "type" => "text"
                  "analyzer" => "ik_max_word"
                ]
                "age" => array:1 [
                  "type" => "integer"
                ]
                "desc" => array:2 [
                  "type" => "text"
                  "analyzer" => "ik_max_word"
                ]
                "email" => array:1 [
                  "type" => "text"
                ]
                "mobile" => array:1 [
                  "type" => "text"
                ]
                "name" => array:1 [
                  "type" => "keyword"
                ]
              ]
            ]
          ]
        ]

獲取索引設定資訊

        //獲取設定資訊
        $setting = $client->indices()->getSettings([
            'index' => 'users'
        ]);
        dd($setting);


---------------------------------返回結果------------------------------------
        array:1 [
          "users" => array:1 [
            "settings" => array:1 [
              "index" => array:6 [
                "creation_date" => "1630484859730"
                "number_of_shards" => "3"
                "number_of_replicas" => "2"
                "uuid" => "IbvJ_CgtT3monuB8IyEPTQ"
                "version" => array:1 [
                  "created" => "7080199"
                ]
                "provided_name" => "users"
              ]
            ]
          ]
        ]

更改索引

        $params = [
            'index' => 'users',
            'body' => [
                'settings' => [
                    'number_of_replicas' => 1,//更改索引的副本為 1
                ]
            ]
        ];

        $res = $client->indices()->putSettings($params);
        dd($res);

ElasticSearch 是不支援索引欄位型別變更的,原因是一個欄位的型別進行修改之後,ES 會重新建立對這個欄位的索引資訊,影響到ES對該欄位分詞方式,相關度,TF/IDF倒排建立等

刪除索引

        $param = [
            'index' => 'users'
        ];
        $res = $client->indices()->delete($param);
        dd($res);

PHP 操作文件

建立一條資料的文件

        $params = [
            'index' => 'users',
            'id' => 1, //指定文件生成的id,如果不指定,則 es 自動生成
            'body' => [
                'name' => '張三',
                'age' => 21,
                'mobile' => '16621111111',
                'email' => "16621111111@qq.com",
                'address' => '北京-西二旗',
                'desc' => '一個技術宅男,強迫症,愛好美食,電影'
            ]
        ];
        $res = $client->index($params);
        dd($res);

-------------------------返回結果----------------------------
array:8 [
  "_index" => "users"
  "_type" => "_doc"
  "_id" => "1"
  "_version" => 1
  "result" => "created"
  "_shards" => array:3 [
    "total" => 3
    "successful" => 1
    "failed" => 0
  ]
  "_seq_no" => 0
  "_primary_term" => 1
]

建立多條資料的文件(bulk)

        $data = [
            ['name' => '李四', 'age' => '22', 'mobile' => '16622222222','email' => '16622222222@qq.com', 'address' => '上海-閔行', 'desc' => '運動,動漫,遊戲,電影'],
            ['name' => '王五', 'age' => '22', 'mobile' => '16622222223','email' => '16622222223@qq.com', 'address' => '上海-浦東', 'desc' => '運動,日漫,電影,技術控'],
            ['name' => '趙六', 'age' => '20', 'mobile' => '16622222224','email' => '16622222224@qq.com', 'address' => '上海-長寧', 'desc' => '宅男,小說,遊戲,睡覺'],
            ['name' => '李華', 'age' => '23', 'mobile' => '16622222225','email' => '16622222225@qq.com', 'address' => '上海-寶山', 'desc' => '運動,小說,睡覺'],
        ];
        foreach ($data as $k => $document) {
            $params['body'][] = [
                'index' => [
                    '_index' => 'users',
                    '_id' => $k+1
                ]
            ];
            $params['body'][] = $document;
        }
        $res = $client->bulk($params);
        dd($res);

獲取文件

Elasticsearch 提供實時獲取文件的方法。這意味著只要文件被索引且客戶端收到訊息確認後,你就可以立即在任何的分片中檢索文件。Get 操作透過 index/type/id 方式請求一個文件資訊:

        $params = [
            'index' => 'users',
            'id' => 1
        ];
        $res = $client->get($params);
        dd($res);

-------------------------------返回結果--------------------------------
array:8 [
  "_index" => "users"
  "_type" => "_doc"
  "_id" => "1"
  "_version" => 2
  "_seq_no" => 1
  "_primary_term" => 1
  "found" => true
  "_source" => array:6 [
    "name" => "李四"
    "age" => "22"
    "mobile" => "16622222222"
    "email" => "16622222222@qq.com"
    "address" => "上海-閔行"
    "desc" => "運動,動漫,遊戲,電影"
  ]
]

更改文件

如果你要部分更新文件(如更改現存欄位新增新欄位),你可以在 body 引數中指定一個 doc 引數。這樣 doc 引數內的欄位會與現存欄位進行合併。

        $params = [
            'index' => 'users',
            'id' => 1, //對id為1的記錄修改
            'body' => [
                'doc' => [
                    'age' => 19, //修改年齡為19
                    'mobile' => '16633333334' //修改手機號
                ],
            ],
        ];
        $res = $client->update($params);
        dd($res);

----------------------------再次查詢id=1記錄--------------------------------
array:8 [
  "_index" => "users"
  "_type" => "_doc"
  "_id" => "1"
  "_version" => 4
  "_seq_no" => 3
  "_primary_term" => 1
  "found" => true
  "_source" => array:6 [
    "name" => "李四"
    "age" => 19
    "mobile" => "16633333334"
    "email" => "16622222222@qq.com"
    "address" => "上海-閔行"
    "desc" => "運動,動漫,遊戲,電影"
  ]
]

刪除文件

可以透過id進行刪除

        $params = [
            'index' => 'users',
            'id' => 1,
        ];
        $res = $client->delete($params);
        dd($res);

搜尋查詢

獲取所有資料
        $params = [
            'index' => 'users',
        ];
        $res = $client->search($params);
        dd($res);

--------------------------------返回結果-------------------------------
array:4 [
  "took" => 0 //查詢花費時間,單位毫秒
  "timed_out" => false //是否超時
  "_shards" => array:4 [ //分片資訊
    "total" => 3 //分片總數
    "successful" => 3 //成功
    "skipped" => 0 //忽略
    "failed" => 0 //失敗
  ]
  "hits" => array:3 [ //搜尋命中結果
    "total" => array:2 [ //搜尋條件匹配的文件總數
      "value" => 3 //總命中計數的值
      "relation" => "eq" //計數規則 eq 標識計數準確,gte 標識計數不準確
    ]
    "max_score" => 1.0 //匹配度分支
    "hits" => array:3 [ //命中結果集合
      0 => array:5 [
        "_index" => "users"
        "_type" => "_doc"
        "_id" => "2"
        "_score" => 1.0
        "_source" => array:6 [
          "name" => "王五"
          "age" => "22"
          "mobile" => "16622222223"
          "email" => "16622222223@qq.com"
          "address" => "上海-浦東"
          "desc" => "運動,日漫,電影,技術控"
        ]
      ]
      1 => array:5 [
        "_index" => "users"
        "_type" => "_doc"
        "_id" => "3"
        "_score" => 1.0
        "_source" => array:6 [
          "name" => "趙六"
          "age" => "20"
          "mobile" => "16622222224"
          "email" => "16622222224@qq.com"
          "address" => "上海-長寧"
          "desc" => "宅男,小說,遊戲,睡覺"
        ]
      ]
      2 => array:5 [
        "_index" => "users"
        "_type" => "_doc"
        "_id" => "4"
        "_score" => 1.0
        "_source" => array:6 [
          "name" => "李華"
          "age" => "23"
          "mobile" => "16622222225"
          "email" => "16622222225@qq.com"
          "address" => "上海-寶山"
          "desc" => "運動,小說,睡覺"
        ]
      ]
    ]
  ]
]
匹配查詢

match 匹配型別查詢,Es 先把查詢條件進行分詞,然後依據分詞進行查詢,多個詞條之間是 or 的關係

        $params = [
            'index' => 'users',
            'body' => [
                'query' => [
                    //match 會先把查詢條件進行分詞
                    'match' => [
                        //desc 的型別是text,進行分詞查詢,所以查詢的時候會先分詞成 運動,電影,技術控等片語,然後依據片語進行匹配
                        'desc' => '運動,日漫,電影,技術控'
                    ]
                ]
            ]
        ];
        $res = $client->search($params);
        dd($res);

--------------------------返回結果----------------------------
array:4 [
  "took" => 3
  "timed_out" => false
  "_shards" => array:4 [
    "total" => 3
    "successful" => 3
    "skipped" => 0
    "failed" => 0
  ]
  "hits" => array:3 [
    "total" => array:2 [
      "value" => 2
      "relation" => "eq"
    ]
    "max_score" => 4.811739
    "hits" => array:2 [
      0 => array:5 [
        "_index" => "users"
        "_type" => "_doc"
        "_id" => "2"
        "_score" => 4.811739
        "_source" => array:6 [
          "name" => "王五"
          "age" => "22"
          "mobile" => "16622222223"
          "email" => "16622222223@qq.com"
          "address" => "上海-浦東"
          "desc" => "運動,日漫,電影,技術控" //es在儲存的時候會進行分詞,此次查詢匹配上了運動這個片語
        ]
      ]
      1 => array:5 [
        "_index" => "users"
        "_type" => "_doc"
        "_id" => "4"
        "_score" => 0.5504225
        "_source" => array:6 [
          "name" => "李華"
          "age" => "23"
          "mobile" => "16622222225"
          "email" => "16622222225@qq.com"
          "address" => "上海-寶山"
          "desc" => "運動,小說,睡覺"
        ]
      ]
    ]
  ]
]
精準查詢
  1. term 查詢,精確的關鍵詞匹配查詢,不對查詢條件進行分詞
        $params = [
            'index' => 'users',
            'body' => [
                'query' => [
                    'term' => [
                        //不會對 運動 進行分詞操作,而是直接使用 運動 進行匹配查詢
                        'desc' => '運動'
                    ]
                ]
            ]
        ];
        $res = $client->search($params);
        dd($res);

--------------------------------結果---------------------------------
array:4 [
  "took" => 0
  "timed_out" => false
  "_shards" => array:4 [
    "total" => 3
    "successful" => 3
    "skipped" => 0
    "failed" => 0
  ]
  "hits" => array:3 [
    "total" => array:2 [
      "value" => 2
      "relation" => "eq"
    ]
    "max_score" => 0.5504225
    "hits" => array:2 [
      0 => array:5 [
        "_index" => "users"
        "_type" => "_doc"
        "_id" => "4"
        "_score" => 0.5504225
        "_source" => array:6 [
          "name" => "李華"
          "age" => "23"
          "mobile" => "16622222225"
          "email" => "16622222225@qq.com"
          "address" => "上海-寶山"
          "desc" => "運動,小說,睡覺" //es儲存的倒排索引中有運動這個分詞片語,所以可以匹配上
        ]
      ]
      1 => array:5 [
        "_index" => "users"
        "_type" => "_doc"
        "_id" => "2"
        "_score" => 0.42081726
        "_source" => array:6 [
          "name" => "王五"
          "age" => "22"
          "mobile" => "16622222223"
          "email" => "16622222223@qq.com"
          "address" => "上海-浦東"
          "desc" => "運動,日漫,電影,技術控"
        ]
      ]
    ]
  ]
]
  1. 多關鍵字精確查詢
    terms 查詢和 term 查詢一樣,但它允許你指定多值進行匹配。
    如果這個欄位包含了指定值中的任何一個值,那麼這個文件滿足條件,類似於 mysql 的 in
        $params = [
            'index' => 'users',
            'body' => [
                'query' => [
                    'terms' => [
                        //不會對 運動 進行分詞操作,而是直接使用 運動 進行匹配查詢
                        'desc' => ['運動', '遊戲']
                    ]
                ]
            ]
        ];
        $res = $client->search($params);
        dd($res);
---------------------------------結果-----------------------------
array:4 [
  "took" => 2
  "timed_out" => false
  "_shards" => array:4 [
    "total" => 3
    "successful" => 3
    "skipped" => 0
    "failed" => 0
  ]
  "hits" => array:3 [
    "total" => array:2 [
      "value" => 3
      "relation" => "eq"
    ]
    "max_score" => 1.0
    "hits" => array:3 [
      0 => array:5 [
        "_index" => "users"
        "_type" => "_doc"
        "_id" => "2"
        "_score" => 1.0
        "_source" => array:6 [
          "name" => "王五"
          "age" => "22"
          "mobile" => "16622222223"
          "email" => "16622222223@qq.com"
          "address" => "上海-浦東"
          "desc" => "運動,日漫,電影,技術控" //包含 運動
        ]
      ]
      1 => array:5 [
        "_index" => "users"
        "_type" => "_doc"
        "_id" => "3"
        "_score" => 1.0
        "_source" => array:6 [
          "name" => "趙六"
          "age" => "20"
          "mobile" => "16622222224"
          "email" => "16622222224@qq.com"
          "address" => "上海-長寧"
          "desc" => "宅男,小說,遊戲,睡覺" //包含 遊戲
        ]
      ]
      2 => array:5 [
        "_index" => "users"
        "_type" => "_doc"
        "_id" => "4"
        "_score" => 1.0
        "_source" => array:6 [
          "name" => "李華"
          "age" => "23"
          "mobile" => "16622222225"
          "email" => "16622222225@qq.com"
          "address" => "上海-寶山"
          "desc" => "運動,小說,睡覺" //包含 運動
        ]
      ]
    ]
  ]
]
組合查詢

bool把各種其它查詢透過must(必須 )、must_not(必須不)、should(應該)的方式進行組合

語句 情況
多個 must 查詢 must 的交集
must + should 查詢 must 交集,如果交集裡面包含 should 的部分,則增加打分
must + must_not 查詢 must 交集,但是會排除 must_not 條件
多個 should 查詢 should 的並集,也就是 a = 1 or a = 2
should + must_not should 的並集,並排除 must_not 的部分
must + must_not + should must 交集,並排除 must_not 部分,返回結果裡面有 should 部分,會增加打分
        //查詢 desc 包含 小說,運動的片語,但是排除年齡是20的,排除之後的結果集中如果名字 = 李華,則增加 _score 分數
        $params = [
            'index' => 'users',
            'body' => [
                'query' => [
                    'bool' => [
                        'must' => [
                            [
                                'match' => [
                                    'desc' => '小說'
                                ]
                            ],
                            [
                                'term' => [
                                    'desc' => '運動'
                                ]
                            ]
                        ],
                        'must_not' => [
                            [
                                'term' => [
                                    'age' => '20'
                                ]
                            ]
                        ],
                        'should' => [
                            [
                                'term' => [
                                    'name' => '李華'
                                ]
                            ]
                        ]
                    ]
                ]
            ]
        ];
        $res = $client->search($params);
        dd($res);

------------------------------------返回結果---------------------------------
array:4 [
  "took" => 8
  "timed_out" => false
  "_shards" => array:4 [
    "total" => 3
    "successful" => 3
    "skipped" => 0
    "failed" => 0
  ]
  "hits" => array:3 [
    "total" => array:2 [
      "value" => 1
      "relation" => "eq"
    ]
    "max_score" => 2.081674
    "hits" => array:1 [
      0 => array:5 [
        "_index" => "users"
        "_type" => "_doc"
        "_id" => "4"
        "_score" => 2.081674 //沒有加 should 的分數 = 1.100845,加完 should 分值直接提高 1
        "_source" => array:6 [
          "name" => "李華"
          "age" => "23" //年齡不等於20
          "mobile" => "16622222225"
          "email" => "16622222225@qq.com"
          "address" => "上海-寶山"
          "desc" => "運動,小說,睡覺" //包含小說,運動 片語
        ]
      ]
    ]
  ]
]
返回指定欄位

預設情況下,Elasticsearch 在搜尋的結果中,會把文件中儲存在 _source 的所有欄位都返回。
如果我們只想獲取其中的部分欄位,我們可以新增_source 的過濾

        $params = [
            'index' => 'users',
            '_source' => ['name', 'desc'], //只返回 _source 儲存的 name,desc 欄位
            'body' => [
                'query' => [
                    'terms' => [
                        //不會對 運動 進行分詞操作,而是直接使用 運動 進行匹配查詢
                        'desc' => ['運動', '遊戲']
                    ]
                ]
            ]
        ];
        $res = $client->search($params);
        dd($res);
------------------------------------結果-----------------------------------
array:4 [
  "took" => 0
  "timed_out" => false
  "_shards" => array:4 [
    "total" => 3
    "successful" => 3
    "skipped" => 0
    "failed" => 0
  ]
  "hits" => array:3 [
    "total" => array:2 [
      "value" => 3
      "relation" => "eq"
    ]
    "max_score" => 1.0
    "hits" => array:3 [
      0 => array:5 [
        "_index" => "users"
        "_type" => "_doc"
        "_id" => "2"
        "_score" => 1.0
        "_source" => array:2 [
          "name" => "王五"
          "desc" => "運動,日漫,電影,技術控"
        ]
      ]
      1 => array:5 [
        "_index" => "users"
        "_type" => "_doc"
        "_id" => "3"
        "_score" => 1.0
        "_source" => array:2 [
          "name" => "趙六"
          "desc" => "宅男,小說,遊戲,睡覺"
        ]
      ]
      2 => array:5 [
        "_index" => "users"
        "_type" => "_doc"
        "_id" => "4"
        "_score" => 1.0
        "_source" => array:2 [
          "name" => "李華"
          "desc" => "運動,小說,睡覺"
        ]
      ]
    ]
  ]
]
範圍查詢

range 查詢找出那些落在指定區間內的數字或者時間。range 查詢允許以下字元

符號 說明
gt >
gte >=
lt <
lte <=
        //查詢年齡大於20小於30的資料
        $params = [
            'index' => 'users',
            'body' => [
                'query' => [
                    'range' => [
                        'age' => [
                            'gt' => 20,
                            'lt' => 30
                        ]
                    ]
                ]
            ]
        ];
        $res = $client->search($params);
        dd($res);

----------------------------------結果---------------------------------
array:4 [
  "took" => 0
  "timed_out" => false
  "_shards" => array:4 [
    "total" => 3
    "successful" => 3
    "skipped" => 0
    "failed" => 0
  ]
  "hits" => array:3 [
    "total" => array:2 [
      "value" => 2
      "relation" => "eq"
    ]
    "max_score" => 1.0
    "hits" => array:2 [
      0 => array:5 [
        "_index" => "users"
        "_type" => "_doc"
        "_id" => "2"
        "_score" => 1.0
        "_source" => array:6 [
          "name" => "王五"
          "age" => "22"
          "mobile" => "16622222223"
          "email" => "16622222223@qq.com"
          "address" => "上海-浦東"
          "desc" => "運動,日漫,電影,技術控"
        ]
      ]
      1 => array:5 [
        "_index" => "users"
        "_type" => "_doc"
        "_id" => "4"
        "_score" => 1.0
        "_source" => array:6 [
          "name" => "李華"
          "age" => "23"
          "mobile" => "16622222225"
          "email" => "16622222225@qq.com"
          "address" => "上海-寶山"
          "desc" => "運動,小說,睡覺"
        ]
      ]
    ]
  ]
]
排序

sort 可以讓我們按照不同的欄位進行排序,並且透過 order 指定排序的方式。desc 降序,asc升序。

  1. 根據年齡進行排序
        $params = [
            'index' => 'users',
            'body' => [
                'query' => [
                    'match' => [
                        'desc' => '運動,小說'
                    ]
                ],
                'sort' => [
                    [
                        'age' => [
                            'order' => 'desc'
                        ]
                    ]
                ]
            ],
        ];

----------------------------------------結果--------------------------------
array:4 [
  "took" => 9
  "timed_out" => false
  "_shards" => array:4 [
    "total" => 3
    "successful" => 3
    "skipped" => 0
    "failed" => 0
  ]
  "hits" => array:3 [
    "total" => array:2 [
      "value" => 3
      "relation" => "eq"
    ]
    "max_score" => null
    "hits" => array:3 [
      0 => array:6 [
        "_index" => "users"
        "_type" => "_doc"
        "_id" => "4"
        "_score" => null
        "_source" => array:6 [
          "name" => "李華"
          "age" => "23"
          "mobile" => "16622222225"
          "email" => "16622222225@qq.com"
          "address" => "上海-寶山"
          "desc" => "運動,小說,睡覺"
        ]
        "sort" => array:1 [
          0 => 23
        ]
      ]
      1 => array:6 [
        "_index" => "users"
        "_type" => "_doc"
        "_id" => "2"
        "_score" => null
        "_source" => array:6 [
          "name" => "王五"
          "age" => "22"
          "mobile" => "16622222223"
          "email" => "16622222223@qq.com"
          "address" => "上海-浦東"
          "desc" => "運動,日漫,電影,技術控"
        ]
        "sort" => array:1 [
          0 => 22
        ]
      ]
      2 => array:6 [
        "_index" => "users"
        "_type" => "_doc"
        "_id" => "3"
        "_score" => null
        "_source" => array:6 [
          "name" => "趙六"
          "age" => "20"
          "mobile" => "16622222224"
          "email" => "16622222224@qq.com"
          "address" => "上海-長寧"
          "desc" => "宅男,小說,遊戲,睡覺"
        ]
        "sort" => array:1 [
          0 => 20
        ]
      ]
    ]
  ]
]
  1. 多個欄位排序,先根據年齡進行排序,如果年齡相等,在根據 _score 分數排序
        $params = [
            'index' => 'users',
            'body' => [
                'query' => [
                    'match' => [
                        'desc' => '運動,小說'
                    ]
                ],
                'sort' => [
                    [
                        'age' => [
                            'order' => 'desc'
                        ]
                    ],
                    [
                        '_score' => [
                            'order' => "desc"
                        ]
                    ]
                ]
            ],
        ];
        $res = $client->search($params);
        dd($res);

--------------------------------結果-------------------------------------
array:4 [
  "took" => 1
  "timed_out" => false
  "_shards" => array:4 [
    "total" => 3
    "successful" => 3
    "skipped" => 0
    "failed" => 0
  ]
  "hits" => array:3 [
    "total" => array:2 [
      "value" => 4
      "relation" => "eq"
    ]
    "max_score" => null
    "hits" => array:4 [
      0 => array:6 [
        "_index" => "users"
        "_type" => "_doc"
        "_id" => "4"
        "_score" => 1.100845
        "_source" => array:6 [
          "name" => "李華"
          "age" => "23"
          "mobile" => "16622222225"
          "email" => "16622222225@qq.com"
          "address" => "上海-寶山"
          "desc" => "運動,小說,睡覺"
        ]
        "sort" => array:2 [
          0 => 23
          1 => 1.100845
        ]
      ]
      1 => array:6 [
        "_index" => "users"
        "_type" => "_doc"
        "_id" => "2"
        "_score" => 0.42081726 //和韓梅梅的年齡相等,依據 分數進行排序
        "_source" => array:6 [
          "name" => "王五"
          "age" => "22"
          "mobile" => "16622222223"
          "email" => "16622222223@qq.com"
          "address" => "上海-浦東"
          "desc" => "運動,日漫,電影,技術控"
        ]
        "sort" => array:2 [
          0 => 22
          1 => 0.42081726
        ]
      ]
      2 => array:6 [
        "_index" => "users"
        "_type" => "_doc"
        "_id" => "1"
        "_score" => 0.2876821
        "_source" => array:6 [
          "name" => "韓梅梅"
          "age" => "22"
          "mobile" => "16622222278"
          "email" => "16622222278@qq.com"
          "address" => "上海-閔行"
          "desc" => "運動,美食,遊戲,電影"
        ]
        "sort" => array:2 [
          0 => 22
          1 => 0.2876821
        ]
      ]
      3 => array:6 [
        "_index" => "users"
        "_type" => "_doc"
        "_id" => "3"
        "_score" => 0.45665967
        "_source" => array:6 [
          "name" => "趙六"
          "age" => "20"
          "mobile" => "16622222224"
          "email" => "16622222224@qq.com"
          "address" => "上海-長寧"
          "desc" => "宅男,小說,遊戲,睡覺"
        ]
        "sort" => array:2 [
          0 => 20
          1 => 0.45665967
        ]
      ]
    ]
  ]
]
分頁查詢

from:當前頁的起始索引,預設從 0 開始。 from = (pageNum - 1) * size
size:每頁顯示多少條

        $params = [
            'index' => 'users',
            'body' => [
                'from' => 0, //從0開始
                'size' => 1 //查詢1條資料
            ],
        ];
        $res = $client->search($params);
        dd($res);

---------------------------結果---------------------------------------
array:4 [
  "took" => 7
  "timed_out" => false
  "_shards" => array:4 [
    "total" => 3
    "successful" => 3
    "skipped" => 0
    "failed" => 0
  ]
  "hits" => array:3 [
    "total" => array:2 [
      "value" => 4
      "relation" => "eq"
    ]
    "max_score" => 1.0
    "hits" => array:1 [
      0 => array:5 [
        "_index" => "users"
        "_type" => "_doc"
        "_id" => "2"
        "_score" => 1.0
        "_source" => array:6 [
          "name" => "王五"
          "age" => "22"
          "mobile" => "16622222223"
          "email" => "16622222223@qq.com"
          "address" => "上海-浦東"
          "desc" => "運動,日漫,電影,技術控"
        ]
      ]
    ]
  ]
]

PHP 聚合操作

ElasticSearch除了致力於搜尋之外,也提供了聚合實時分析資料的功能,它的實時性高,所有的計算結果都是即時返回。
聚合的兩個主要的概念,分別是 指標

桶(Buckets): 簡單來說就是滿足特定條件的文件的集合。類似SQL中的GROUP BY語法

當聚合開始被執行,每個文件會決定符合哪個桶的條件,如果匹配到,文件將放入相應的桶並接著進行聚合操作

桶可以被巢狀在其他桶裡面,像是北京能放在中國桶裡,而中國桶能放在亞洲桶裡

指標(Metrics) : 對桶內的文件進行統計計算(如計算最大值、最小值、平均值等等)

桶能讓我們劃分文件到有意義的集合, 但是最終我們需要的是對這些桶內的文件進行一些指標的計算
指標通常是簡單的數學運算(像是min、max、avg、sum),而這些是透過當前桶中的文件的值來計算的,利用指標能讓你計算像平均薪資、最高出售價格、95%的查詢延遲這樣的資料

PHP 中聚合的格式大致為:

        $params = [
            'index' => 'employees',
            'body' => [
                //查詢 age 最大的是多少歲
                'aggs' => [ //es 聚合操作關鍵字 aggs 或者 aggregations 都可以
                    'age_max' => [ //es 返回的欄位名稱,自己隨意定義
                        "max" => [ // 聚合的型別,關鍵詞
                            'field' => 'age' //對哪個欄位進行聚合
                        ]
                    ]
                ],
            ],
        ];

建立案列資料

  1. 建立 employees 索引
         $indexCreateParams = [
             'index' => 'employees', //建立一個員工表
             'body' => [
                 'mappings' => [
                     'properties' => [
                         'name' => [
                             'type' => 'keyword'
                         ],
                         'age' => [
                             'type' => 'integer'
                         ],
                         'gender' => [
                             'type' => 'keyword'
                         ],
                         'job' => [
                             'type' => 'text',
                             'fields' => [
                                 //設定欄位能關鍵詞搜尋及資料聚合.
                                 'keyword' => [
                                     'type' => 'keyword',
                                     'ignore_above' => 50
                                 ]
                             ]
                         ],
                         'salary' => [
                             'type' => 'integer'
                         ]
                     ]
                 ]
             ]
         ];
         $res = $client->indices()->create($indexCreateParams);
         dd($res);
  2. 新增文件資料
         $data = [
             ['name' => '張三', 'age' => 20, 'gender' => '男', 'job' => 'PHP', 'salary' => '1000'],
             ['name' => '李四', 'age' => 25, 'gender' => '男', 'job' => 'PHP', 'salary' => '2500'],
             ['name' => '王五', 'age' => 26, 'gender' => '男', 'job' => 'PHP', 'salary' => '2600'],
             ['name' => '趙六', 'age' => 29, 'gender' => '男', 'job' => 'PHP', 'salary' => '4500'],
             ['name' => '韓梅梅', 'age' => 24, 'gender' => '女', 'job' => 'UI', 'salary' => '3500'],
             ['name' => '李華', 'age' => 27, 'gender' => '男', 'job' => '產品', 'salary' => '3600'],
             ['name' => '李銳', 'age' => 25, 'gender' => '男', 'job' => '產品', 'salary' => '2800'],
             ['name' => '趙雲', 'age' => 28, 'gender' => '男', 'job' => '產品', 'salary' => '5000'],
             ['name' => '李雷', 'age' => 27, 'gender' => '男', 'job' => '測試', 'salary' => '3600'],
             ['name' => '李想', 'age' => 23, 'gender' => '男', 'job' => '測試', 'salary' => '2500'],
             ['name' => '趙雷', 'age' => 30, 'gender' => '男', 'job' => 'PHP', 'salary' => '5500'],
             ['name' => '張宇', 'age' => 27, 'gender' => '男', 'job' => 'JAVA', 'salary' => '4600'],
             ['name' => '楊建', 'age' => 24, 'gender' => '男', 'job' => 'JAVA', 'salary' => '3500'],
         ];
         foreach ($data as $k => $document) {
             $params['body'][] = [
                 'index' => [
                     '_index' => 'employees',
                     '_id' => $k+1
                 ]
             ];
             $params['body'][] = $document;
         }
         $res = $client->bulk($params);
         dd($res);

avg 平均值

計算員工的平均薪資

        //查詢員工平均工資
        $params = [
            'index' => 'employees',
            'body' => [
                'aggs' => [
                    'avg_salary' => [
                        'avg' => [
                            'field' => 'salary'
                        ]
                    ]
                ],
                'size' => 0,
            ],
        ];
        $res = $client->search($params);
        dd($res);

----------------------------------返回結果-------------------------------
array:5 [
  "took" => 14
  "timed_out" => false
  "_shards" => array:4 [
    "total" => 1
    "successful" => 1
    "skipped" => 0
    "failed" => 0
  ]
  "hits" => array:3 [
    "total" => array:2 [
      "value" => 13
      "relation" => "eq"
    ]
    "max_score" => null
    "hits" => []
  ]
  "aggregations" => array:1 [
    "avg_salary" => array:1 [
      "value" => 3476.9230769231
    ]
  ]
]

min,max 最大,最小

        //查詢員工最高或最低的工資
        $params = [
            'index' => 'employees',
            'body' => [
                'aggs' => [
                    'max_salary' => [
                        'max' => [
                            'field' => 'salary'
                        ]
                    ]
                ],
                'size' => 0,
            ],
        ];
        $res = $client->search($params);
        dd($res);

----------------------------------結果--------------------------------
array:5 [
  "took" => 3
  "timed_out" => false
  "_shards" => array:4 [
    "total" => 1
    "successful" => 1
    "skipped" => 0
    "failed" => 0
  ]
  "hits" => array:3 [
    "total" => array:2 [
      "value" => 13
      "relation" => "eq"
    ]
    "max_score" => null
    "hits" => []
  ]
  "aggregations" => array:1 [
    "max_salary" => array:1 [
      "value" => 5500.0
    ]
  ]
]

sum 求和

        //查詢員工總的薪資
        $params = [
            'index' => 'employees',
            'body' => [
                'aggs' => [
                    'sum_salary' => [
                        'sum' => [
                            'field' => 'salary'
                        ]
                    ]
                ],
                'size' => 0,
            ],
        ];

----------------------------------結果---------------------------------
array:5 [
  "took" => 2
  "timed_out" => false
  "_shards" => array:4 [
    "total" => 1
    "successful" => 1
    "skipped" => 0
    "failed" => 0
  ]
  "hits" => array:3 [
    "total" => array:2 [
      "value" => 13
      "relation" => "eq"
    ]
    "max_score" => null
    "hits" => []
  ]
  "aggregations" => array:1 [
    "sum_salary" => array:1 [
      "value" => 45200.0
    ]
  ]
]

cardinality 去重

cardinality 計算不重複的欄位有多少(相當於mysql中的distinct)

        //cardinality 去重操作,然後在進行統計,統計工種數量
        $params = [
            'index' => 'employees',
            'body' => [
                'aggs' => [
                    'cardinality_info' => [
                        'cardinality' => [
                            'field' => 'job.keyword'
                        ]
                    ]
                ],
                'size' => 0,
            ],
        ];
        $res = $client->search($params);
        dd($res);

------------------------------------結果---------------------------------
array:5 [
  "took" => 1009
  "timed_out" => false
  "_shards" => array:4 [
    "total" => 1
    "successful" => 1
    "skipped" => 0
    "failed" => 0
  ]
  "hits" => array:3 [
    "total" => array:2 [
      "value" => 13
      "relation" => "eq"
    ]
    "max_score" => null
    "hits" => []
  ]
  "aggregations" => array:1 [
    "cardinality_info" => array:1 [
      "value" => 5
    ]
  ]
]

status 計算各種聚合

透過 status,可以同時返回 count,max,min,sum,avg 的結果

        //計算 salary 的總數,最小,最大,平均,總和
        $params = [
            'index' => 'employees',
            'body' => [
                'aggs' => [
                    'salary_stats' => [
                        'stats' => [
                            'field' => 'salary'
                        ]
                    ]
                ],
                'size' => 0,
            ],
        ];
        $res = $client->search($params);
        dd($res);

-------------------------------------結果-----------------------------------
array:5 [
  "took" => 1
  "timed_out" => false
  "_shards" => array:4 [
    "total" => 1
    "successful" => 1
    "skipped" => 0
    "failed" => 0
  ]
  "hits" => array:3 [
    "total" => array:2 [
      "value" => 13
      "relation" => "eq"
    ]
    "max_score" => null
    "hits" => []
  ]
  "aggregations" => array:1 [
    "salary_stats" => array:5 [
      "count" => 13
      "min" => 1000.0
      "max" => 5500.0
      "avg" => 3476.9230769231
      "sum" => 45200.0
    ]
  ]
]

percentiles 百分比統計

對指定欄位的值按從小到大累計每個值對應的文件數的佔比,返回 指定佔比比例對應的值

        $params = [
            'index' => 'employees',
            'body' => [
                'aggs' => [
                    'percentiles' => [
                        'percentiles' => [
                            'field' => 'salary'
                        ]
                    ]
                ],
                'size' => 0,
            ],
        ];
        $res = $client->search($params);
        dd($res);

------------------------------結果--------------------------------------
array:5 [
  "took" => 2
  "timed_out" => false
  "_shards" => array:4 [
    "total" => 1
    "successful" => 1
    "skipped" => 0
    "failed" => 0
  ]
  "hits" => array:3 [
    "total" => array:2 [
      "value" => 13
      "relation" => "eq"
    ]
    "max_score" => null
    "hits" => []
  ]
  "aggregations" => array:1 [
    "percentiles" => array:1 [
      "values" => array:7 [
        "1.0" => 1000.0
        "5.0" => 1225.0
        "25.0" => 2575.0
        "50.0" => 3500.0
        "75.0" => 4525.0
        "95.0" => 5425.0
        "99.0" => 5500.0
      ]
    ]
  ]
]

上面的查詢可以理解為 1% 的人薪資在1000以下,5% 的人薪資在1225 以下……

薪資範圍 佔比
0 ~ 1000 1%
0 ~ 1225.0 5%
0 ~ 2575.0 25%
0 ~ 3500.0 50%
0 ~ 4525.0 75%
0 ~ 5425.0 95%
0 ~ 5500.0 99%

預設返回的是 1,5,25,50,75,95,99 對應的文件的值,可以使用 percents 指定某一處的分值

        $params = [
            'index' => 'employees',
            'body' => [
                'aggs' => [
                    'percentiles' => [
                        'percentiles' => [
                            'field' => 'salary',
                            "percents"  => [46, 70, 80] //獲取佔比 46 的薪資範圍
                        ]
                    ]
                ],
                'size' => 0,
            ],
        ];
        $res = $client->search($params);
        dd($res);

---------------------------結果------------------------------------
array:5 [
  "took" => 10
  "timed_out" => false
  "_shards" => array:4 [
    "total" => 1
    "successful" => 1
    "skipped" => 0
    "failed" => 0
  ]
  "hits" => array:3 [
    "total" => array:2 [
      "value" => 13
      "relation" => "eq"
    ]
    "max_score" => null
    "hits" => []
  ]
  "aggregations" => array:1 [
    "percentiles" => array:1 [
      "values" => array:3 [
        "46.0" => 3500.0
        "70.0" => 4140.0
        "80.0" => 4590.0
      ]
    ]
  ]
]

percentile_ranks

使用 percentile_ranks 可以翻過來,計算某一值在總數中的佔比

統計年齡小於25和年齡小於30的文件的佔比

        $params = [
            'index' => 'employees',
            'body' => [
                'aggs' => [
                    'percentile_ranks_example' => [
                        'percentile_ranks' => [
                            'field' => 'age',
                            "values"  => [22,25] //獲取年齡小於22 歲,小於25歲的佔比
                        ]
                    ]
                ],
                'size' => 0,
            ],
        ];
        $res = $client->search($params);
        dd($res);

-------------------------------返回結果--------------------------------
array:5 [
  "took" => 9
  "timed_out" => false
  "_shards" => array:4 [
    "total" => 1
    "successful" => 1
    "skipped" => 0
    "failed" => 0
  ]
  "hits" => array:3 [
    "total" => array:2 [
      "value" => 13
      "relation" => "eq"
    ]
    "max_score" => null
    "hits" => []
  ]
  "aggregations" => array:1 [
    "percentile_ranks_example" => array:1 [
      "values" => array:2 [
        "22.0" => 9.6153846153846
        "25.0" => 38.461538461538
      ]
    ]
  ]
]

top_hits 取分桶後的前n條

獲取到每組前n條資料,相當於sql 中Top(group by 後取出前n條)。

        //按照工作進行分組,在對分組後的文件按照年齡進行升序,然後透過top_hits取每組的第一條
        $params = [
            'index' => 'employees',
            'body' => [
                'aggs' => [
                    'job_info' => [ //自己隨意起名
                        'terms' => [ //這裡的 terms 表示對 job進行分組,相當於 (group by job)
                            'field' => 'job.keyword',
                        ],
                        //對 job 的分組資料執行一下操作
                        'aggs' => [
                            'top_job' => [
                                'top_hits' => [
                                    'sort' => [ //對分組資料進行年齡的升序
                                        [
                                            'age' => [
                                                'order' => 'desc'
                                            ]
                                        ]
                                    ],
                                    "size" => 1 //取每組的一條資料
                                ]
                            ]
                        ]
                    ],

                ],
                'size' => 0,
            ],
        ];
        $res = $client->search($params);
        dd($res);

---------------------------------結果-------------------------------------
array:5 [
  "took" => 15
  "timed_out" => false
  "_shards" => array:4 [
    "total" => 1
    "successful" => 1
    "skipped" => 0
    "failed" => 0
  ]
  "hits" => array:3 [
    "total" => array:2 [
      "value" => 13
      "relation" => "eq"
    ]
    "max_score" => null
    "hits" => []
  ]
  "aggregations" => array:1 [
    "job_info" => array:3 [
      "doc_count_error_upper_bound" => 0
      "sum_other_doc_count" => 0
      "buckets" => array:5 [
        0 => array:3 [
          "key" => "PHP"
          "doc_count" => 5
          "top_job" => array:1 [
            "hits" => array:3 [
              "total" => array:2 [
                "value" => 5
                "relation" => "eq"
              ]
              "max_score" => null
              "hits" => array:1 [
                0 => array:6 [
                  "_index" => "employees"
                  "_type" => "_doc"
                  "_id" => "11"
                  "_score" => null
                  "_source" => array:5 [
                    "name" => "趙雷"
                    "age" => 30
                    "gender" => "男"
                    "job" => "PHP"
                    "salary" => "5500"
                  ]
                  "sort" => array:1 [
                    0 => 30
                  ]
                ]
              ]
            ]
          ]
        ]
        1 => array:3 [
          "key" => "產品"
          "doc_count" => 3
          "top_job" => array:1 [
            "hits" => array:3 [
              "total" => array:2 [
                "value" => 3
                "relation" => "eq"
              ]
              "max_score" => null
              "hits" => array:1 [
                0 => array:6 [
                  "_index" => "employees"
                  "_type" => "_doc"
                  "_id" => "8"
                  "_score" => null
                  "_source" => array:5 [
                    "name" => "趙雲"
                    "age" => 28
                    "gender" => "男"
                    "job" => "產品"
                    "salary" => "5000"
                  ]
                  "sort" => array:1 [
                    0 => 28
                  ]
                ]
              ]
            ]
          ]
        ]
        2 => array:3 [
          "key" => "JAVA"
          "doc_count" => 2
          "top_job" => array:1 [
            "hits" => array:3 [
              "total" => array:2 [
                "value" => 2
                "relation" => "eq"
              ]
              "max_score" => null
              "hits" => array:1 [
                0 => array:6 [
                  "_index" => "employees"
                  "_type" => "_doc"
                  "_id" => "12"
                  "_score" => null
                  "_source" => array:5 [
                    "name" => "張宇"
                    "age" => 27
                    "gender" => "男"
                    "job" => "JAVA"
                    "salary" => "4600"
                  ]
                  "sort" => array:1 [
                    0 => 27
                  ]
                ]
              ]
            ]
          ]
        ]
        3 => array:3 [
          "key" => "測試"
          "doc_count" => 2
          "top_job" => array:1 [
            "hits" => array:3 [
              "total" => array:2 [
                "value" => 2
                "relation" => "eq"
              ]
              "max_score" => null
              "hits" => array:1 [
                0 => array:6 [
                  "_index" => "employees"
                  "_type" => "_doc"
                  "_id" => "9"
                  "_score" => null
                  "_source" => array:5 [
                    "name" => "李雷"
                    "age" => 27
                    "gender" => "男"
                    "job" => "測試"
                    "salary" => "3600"
                  ]
                  "sort" => array:1 [
                    0 => 27
                  ]
                ]
              ]
            ]
          ]
        ]
        4 => array:3 [
          "key" => "UI"
          "doc_count" => 1
          "top_job" => array:1 [
            "hits" => array:3 [
              "total" => array:2 [
                "value" => 1
                "relation" => "eq"
              ]
              "max_score" => null
              "hits" => array:1 [
                0 => array:6 [
                  "_index" => "employees"
                  "_type" => "_doc"
                  "_id" => "5"
                  "_score" => null
                  "_source" => array:5 [
                    "name" => "韓梅梅"
                    "age" => 24
                    "gender" => "女"
                    "job" => "UI"
                    "salary" => "3500"
                  ]
                  "sort" => array:1 [
                    0 => 24
                  ]
                ]
              ]
            ]
          ]
        ]
      ]
    ]
  ]
]

terms

terms 相當於 group by field

        $params = [
            'index' => 'employees',
            'body' => [
                'aggs' => [
                    'gorup_by_age' => [
                        'terms' => [ //使用 terms 對age進行分桶操作 (group by age)
                            'field' => 'age'
                        ]
                    ]
                ],
                'size' => 0,
            ],
        ];
        $res = $client->search($params);
        dd($res);

---------------------------------結果----------------------------------------
array:5 [
  "took" => 23
  "timed_out" => false
  "_shards" => array:4 [
    "total" => 1
    "successful" => 1
    "skipped" => 0
    "failed" => 0
  ]
  "hits" => array:3 [
    "total" => array:2 [
      "value" => 13
      "relation" => "eq"
    ]
    "max_score" => null
    "hits" => []
  ]
  "aggregations" => array:1 [
    "gorup_by_age" => array:3 [
      "doc_count_error_upper_bound" => 0
      "sum_other_doc_count" => 0
      "buckets" => array:9 [
        0 => array:2 [
          "key" => 27
          "doc_count" => 3
        ]
        1 => array:2 [
          "key" => 24
          "doc_count" => 2
        ]
        2 => array:2 [
          "key" => 25
          "doc_count" => 2
        ]
        3 => array:2 [
          "key" => 20
          "doc_count" => 1
        ]
        4 => array:2 [
          "key" => 23
          "doc_count" => 1
        ]
        5 => array:2 [
          "key" => 26
          "doc_count" => 1
        ]
        6 => array:2 [
          "key" => 28
          "doc_count" => 1
        ]
        7 => array:2 [
          "key" => 29
          "doc_count" => 1
        ]
        8 => array:2 [
          "key" => 30
          "doc_count" => 1
        ]
      ]
    ]
  ]
]
本作品採用《CC 協議》,轉載必須註明作者和本文連結

相關文章