使用 https://github.com/taowen/es-monitor 可以用 SQL 進行 elasticsearch 的查詢。最簡單的聚合是把整個index當作一個巨大的bucket(桶),然後去數這個桶裡的數字。複雜一些的聚合我們先要能夠把文件分到不同的桶裡,然後去分別數每個桶的數字。統計聚合需要 分桶 (GROUP BY XXX INTO BUCKET),也需要 指標(METRIC)
GROUP BY exchange => COUNT(*)
SQL
$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select exchange, count(*) from symbol group by exchange
EOF
{"count(*)": 3240, "exchange": "nyse"}
{"count(*)": 3089, "exchange": "nasdaq"}
{"count(*)": 385, "exchange": "nyse mkt"}
Elasticsearch
{
"aggs": {
"exchange": {
"terms": {
"field": "exchange",
"size": 0
},
"aggs": {}
}
},
"size": 0
}
{
"hits": {
"hits": [],
"total": 6714,
"max_score": 0.0
},
"_shards": {
"successful": 1,
"failed": 0,
"total": 1
},
"took": 23,
"aggregations": {
"exchange": {
"buckets": [
{
"key": "nyse",
"doc_count": 3240
},
{
"key": "nasdaq",
"doc_count": 3089
},
{
"key": "nyse mkt",
"doc_count": 385
}
],
"sum_other_doc_count": 0,
"doc_count_error_upper_bound": 0
}
},
"timed_out": false
}
Profile
[
{
"query": [
{
"query_type": "MatchAllDocsQuery",
"lucene": "*:*",
"time": "0.2003150000ms",
"breakdown": {
"score": 0,
"create_weight": 8029,
"next_doc": 164079,
"match": 0,
"build_scorer": 28207,
"advance": 0
}
}
],
"rewrite_time": 2657,
"collector": [
{
"name": "MultiCollector",
"reason": "search_multi",
"time": "1.245165000ms",
"children": [
{
"name": "TotalHitCountCollector",
"reason": "search_count",
"time": "0.2540820000ms"
},
{
"name": "GlobalOrdinalsStringTermsAggregator: [exchange]",
"reason": "aggregation",
"time": "0.3493420000ms"
}
]
}
]
}
]
GlobalOrdinalsStringTermsAggregator 這個 GROUP BY 的方式非常快
GROUP BY ipo_year => MAX(market_cap)
MAX(market_cap)
和COUNT(*)
的區別在於,COUNT(*)
不需要新增額外的metric聚合,每個GROUP BY自帶了doc_count的計算結果。
SQL
$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select exchange, max(market_cap) from symbol group by exchange
EOF
{"max(market_cap)": 87022480321.0, "exchange": "nyse"}
{"max(market_cap)": 522690000000.0, "exchange": "nasdaq"}
{"max(market_cap)": 9586866593.0, "exchange": "nyse mkt"}
Elasticsearch
{
"aggs": {
"exchange": {
"terms": {
"field": "exchange",
"size": 0
},
"aggs": {
"max(market_cap)": {
"max": {
"field": "market_cap"
}
}
}
}
},
"size": 0
}
{
"hits": {
"hits": [],
"total": 6714,
"max_score": 0.0
},
"_shards": {
"successful": 1,
"failed": 0,
"total": 1
},
"took": 6,
"aggregations": {
"exchange": {
"buckets": [
{
"max(market_cap)": {
"value": 87022480321.0
},
"key": "nyse",
"doc_count": 3240
},
{
"max(market_cap)": {
"value": 522690000000.0
},
"key": "nasdaq",
"doc_count": 3089
},
{
"max(market_cap)": {
"value": 9586866593.0
},
"key": "nyse mkt",
"doc_count": 385
}
],
"sum_other_doc_count": 0,
"doc_count_error_upper_bound": 0
}
},
"timed_out": false
}
Profile
[
{
"query": [
{
"query_type": "MatchAllDocsQuery",
"lucene": "*:*",
"time": "0.2007250000ms",
"breakdown": {
"score": 0,
"create_weight": 8264,
"next_doc": 164363,
"match": 0,
"build_scorer": 28098,
"advance": 0
}
}
],
"rewrite_time": 2678,
"collector": [
{
"name": "MultiCollector",
"reason": "search_multi",
"time": "1.741733000ms",
"children": [
{
"name": "TotalHitCountCollector",
"reason": "search_count",
"time": "0.2256530000ms"
},
{
"name": "GlobalOrdinalsStringTermsAggregator: [exchange]",
"reason": "aggregation",
"time": "0.8099970000ms"
}
]
}
]
}
]
下鑽 GROUP BY exchange, sector
SQL裡分桶的方式可以是多級的,先按exchange分,接著再按sector進一步細分
$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select exchange, sector, max(market_cap) from symbol group by exchange, sector
EOF
{"sector": "n/a", "max(market_cap)": 1409695805.0, "exchange": "nyse"}
{"sector": "Consumer Services", "max(market_cap)": 46884855259.0, "exchange": "nyse"}
{"sector": "Finance", "max(market_cap)": 35827719192.0, "exchange": "nyse"}
{"sector": "Energy", "max(market_cap)": 86091463700.0, "exchange": "nyse"}
{"sector": "Public Utilities", "max(market_cap)": 24117360000.0, "exchange": "nyse"}
{"sector": "Basic Industries", "max(market_cap)": 27347618968.0, "exchange": "nyse"}
{"sector": "Capital Goods", "max(market_cap)": 82469796110.0, "exchange": "nyse"}
{"sector": "Technology", "max(market_cap)": 47882944000.0, "exchange": "nyse"}
{"sector": "Health Care", "max(market_cap)": 82041827564.0, "exchange": "nyse"}
{"sector": "Consumer Non-Durables", "max(market_cap)": 87022480321.0, "exchange": "nyse"}
{"sector": "Consumer Durables", "max(market_cap)": 8233713549.0, "exchange": "nyse"}
{"sector": "Transportation", "max(market_cap)": 9094527055.0, "exchange": "nyse"}
{"sector": "Miscellaneous", "max(market_cap)": 54171930444.0, "exchange": "nyse"}
{"sector": "Finance", "max(market_cap)": 30620000000.0, "exchange": "nasdaq"}
{"sector": "Health Care", "max(market_cap)": 126540000000.0, "exchange": "nasdaq"}
{"sector": "Technology", "max(market_cap)": 522690000000.0, "exchange": "nasdaq"}
{"sector": "Consumer Services", "max(market_cap)": 230940000000.0, "exchange": "nasdaq"}
{"sector": "n/a", "max(market_cap)": 34620000000.0, "exchange": "nasdaq"}
{"sector": "Capital Goods", "max(market_cap)": 20310000000.0, "exchange": "nasdaq"}
{"sector": "Consumer Non-Durables", "max(market_cap)": 87500000000.0, "exchange": "nasdaq"}
{"sector": "Miscellaneous", "max(market_cap)": 51420000000.0, "exchange": "nasdaq"}
{"sector": "Consumer Durables", "max(market_cap)": 7690000000.0, "exchange": "nasdaq"}
{"sector": "Basic Industries", "max(market_cap)": 9170000000.0, "exchange": "nasdaq"}
{"sector": "Energy", "max(market_cap)": 5760000000.0, "exchange": "nasdaq"}
{"sector": "Public Utilities", "max(market_cap)": 77810000000.0, "exchange": "nasdaq"}
{"sector": "Transportation", "max(market_cap)": 23530000000.0, "exchange": "nasdaq"}
{"sector": "n/a", "max(market_cap)": 971774087.0, "exchange": "nyse mkt"}
{"sector": "Basic Industries", "max(market_cap)": 424184478.0, "exchange": "nyse mkt"}
{"sector": "Health Care", "max(market_cap)": 93765452.0, "exchange": "nyse mkt"}
{"sector": "Energy", "max(market_cap)": 5199118597.0, "exchange": "nyse mkt"}
{"sector": "Consumer Services", "max(market_cap)": 99940496.0, "exchange": "nyse mkt"}
{"sector": "Capital Goods", "max(market_cap)": 49720054.0, "exchange": "nyse mkt"}
{"sector": "Technology", "max(market_cap)": 44979980.0, "exchange": "nyse mkt"}
{"sector": "Consumer Non-Durables", "max(market_cap)": 9586866593.0, "exchange": "nyse mkt"}
{"sector": "Finance", "max(market_cap)": 259074010.0, "exchange": "nyse mkt"}
{"sector": "Public Utilities", "max(market_cap)": 968077000.0, "exchange": "nyse mkt"}
{"sector": "Consumer Durables", "max(market_cap)": 50452938.0, "exchange": "nyse mkt"}
{"sector": "Miscellaneous", "max(market_cap)": 66549988.0, "exchange": "nyse mkt"}
{"sector": "Transportation", "max(market_cap)": 47179899.0, "exchange": "nyse mkt"}
Elasticsearch
{
"aggs": {
"exchange": {
"terms": {
"field": "exchange",
"size": 0
},
"aggs": {
"sector": {
"terms": {
"field": "sector",
"size": 0
},
"aggs": {
"max(market_cap)": {
"max": {
"field": "market_cap"
}
}
}
}
}
}
},
"size": 0
}
{
"hits": {
"hits": [],
"total": 6714,
"max_score": 0.0
},
"_shards": {
"successful": 1,
"failed": 0,
"total": 1
},
"took": 11,
"aggregations": {
"exchange": {
"buckets": [
{
"sector": {
"buckets": [
{
"max(market_cap)": {
"value": 1409695805.0
},
"key": "n/a",
"doc_count": 963
},
{
"max(market_cap)": {
"value": 46884855259.0
},
"key": "Consumer Services",
"doc_count": 468
},
{
"max(market_cap)": {
"value": 35827719192.0
},
"key": "Finance",
"doc_count": 384
},
{
"max(market_cap)": {
"value": 86091463700.0
},
"key": "Energy",
"doc_count": 230
},
{
"max(market_cap)": {
"value": 24117360000.0
},
"key": "Public Utilities",
"doc_count": 221
},
{
"max(market_cap)": {
"value": 27347618968.0
},
"key": "Basic Industries",
"doc_count": 200
},
{
"max(market_cap)": {
"value": 82469796110.0
},
"key": "Capital Goods",
"doc_count": 188
},
{
"max(market_cap)": {
"value": 47882944000.0
},
"key": "Technology",
"doc_count": 186
},
{
"max(market_cap)": {
"value": 82041827564.0
},
"key": "Health Care",
"doc_count": 113
},
{
"max(market_cap)": {
"value": 87022480321.0
},
"key": "Consumer Non-Durables",
"doc_count": 111
},
{
"max(market_cap)": {
"value": 8233713549.0
},
"key": "Consumer Durables",
"doc_count": 66
},
{
"max(market_cap)": {
"value": 9094527055.0
},
"key": "Transportation",
"doc_count": 60
},
{
"max(market_cap)": {
"value": 54171930444.0
},
"key": "Miscellaneous",
"doc_count": 50
}
],
"sum_other_doc_count": 0,
"doc_count_error_upper_bound": 0
},
"key": "nyse",
"doc_count": 3240
},
{
"sector": {
"buckets": [
{
"max(market_cap)": {
"value": 30620000000.0
},
"key": "Finance",
"doc_count": 637
},
{
"max(market_cap)": {
"value": 126540000000.0
},
"key": "Health Care",
"doc_count": 621
},
{
"max(market_cap)": {
"value": 522690000000.0
},
"key": "Technology",
"doc_count": 449
},
{
"max(market_cap)": {
"value": 230940000000.0
},
"key": "Consumer Services",
"doc_count": 354
},
{
"max(market_cap)": {
"value": 34620000000.0
},
"key": "n/a",
"doc_count": 287
},
{
"max(market_cap)": {
"value": 20310000000.0
},
"key": "Capital Goods",
"doc_count": 179
},
{
"max(market_cap)": {
"value": 87500000000.0
},
"key": "Consumer Non-Durables",
"doc_count": 111
},
{
"max(market_cap)": {
"value": 51420000000.0
},
"key": "Miscellaneous",
"doc_count": 97
},
{
"max(market_cap)": {
"value": 7690000000.0
},
"key": "Consumer Durables",
"doc_count": 82
},
{
"max(market_cap)": {
"value": 9170000000.0
},
"key": "Basic Industries",
"doc_count": 78
},
{
"max(market_cap)": {
"value": 5760000000.0
},
"key": "Energy",
"doc_count": 70
},
{
"max(market_cap)": {
"value": 77810000000.0
},
"key": "Public Utilities",
"doc_count": 68
},
{
"max(market_cap)": {
"value": 23530000000.0
},
"key": "Transportation",
"doc_count": 56
}
],
"sum_other_doc_count": 0,
"doc_count_error_upper_bound": 0
},
"key": "nasdaq",
"doc_count": 3089
},
{
"sector": {
"buckets": [
{
"max(market_cap)": {
"value": 971774087.0
},
"key": "n/a",
"doc_count": 123
},
{
"max(market_cap)": {
"value": 424184478.0
},
"key": "Basic Industries",
"doc_count": 52
},
{
"max(market_cap)": {
"value": 93765452.0
},
"key": "Health Care",
"doc_count": 52
},
{
"max(market_cap)": {
"value": 5199118597.0
},
"key": "Energy",
"doc_count": 32
},
{
"max(market_cap)": {
"value": 99940496.0
},
"key": "Consumer Services",
"doc_count": 28
},
{
"max(market_cap)": {
"value": 49720054.0
},
"key": "Capital Goods",
"doc_count": 25
},
{
"max(market_cap)": {
"value": 44979980.0
},
"key": "Technology",
"doc_count": 20
},
{
"max(market_cap)": {
"value": 9586866593.0
},
"key": "Consumer Non-Durables",
"doc_count": 15
},
{
"max(market_cap)": {
"value": 259074010.0
},
"key": "Finance",
"doc_count": 13
},
{
"max(market_cap)": {
"value": 968077000.0
},
"key": "Public Utilities",
"doc_count": 12
},
{
"max(market_cap)": {
"value": 50452938.0
},
"key": "Consumer Durables",
"doc_count": 5
},
{
"max(market_cap)": {
"value": 66549988.0
},
"key": "Miscellaneous",
"doc_count": 5
},
{
"max(market_cap)": {
"value": 47179899.0
},
"key": "Transportation",
"doc_count": 3
}
],
"sum_other_doc_count": 0,
"doc_count_error_upper_bound": 0
},
"key": "nyse mkt",
"doc_count": 385
}
],
"sum_other_doc_count": 0,
"doc_count_error_upper_bound": 0
}
},
"timed_out": false
}
Profile
[
{
"query": [
{
"query_type": "MatchAllDocsQuery",
"lucene": "*:*",
"time": "0.2216380000ms",
"breakdown": {
"score": 0,
"create_weight": 11316,
"next_doc": 180546,
"match": 0,
"build_scorer": 29776,
"advance": 0
}
}
],
"rewrite_time": 2456,
"collector": [
{
"name": "MultiCollector",
"reason": "search_multi",
"time": "3.587216000ms",
"children": [
{
"name": "TotalHitCountCollector",
"reason": "search_count",
"time": "0.2345190000ms"
},
{
"name": "GlobalOrdinalsStringTermsAggregator: [exchange]",
"reason": "aggregation",
"time": "2.605895000ms"
}
]
}
]
}
]