【06】把 Elasticsearch 當資料庫使:CASE WHEN 聚合

TaoWen發表於2016-02-19

使用 https://github.com/taowen/es-monitor 可以用 SQL 進行 elasticsearch 的查詢。前面histogram的聚合要求範圍是固定的interval,而有的時候我們希望自己定義的range來做group by。這個時候就需要用CASE WHEN語句來把欄位人為地分為幾類。也就是相當於把這個欄位先做了一個轉換,然後再用轉換過的欄位進行聚合。

CASE WHEN ipo_year >= 2000

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200
SELECT ipo_year_range, COUNT(*) FROM symbol GROUP BY 
    CASE WHEN ipo_year >= 2000 THEN `post_2000` 
    WHEN ipo_year < 2000 THEN `pre_2000` END AS ipo_year_range
EOF

有多少WHEN THEN就有多少個bucket

{"ipo_year_range": "pre_2000", "COUNT(*)": 758}
{"ipo_year_range": "post_2000", "COUNT(*)": 2140}

Elasticsearch

{
  "aggs": {
    "ipo_year_range": {
      "range": {
        "ranges": [
          {
            "from": 2000.0, 
            "key": "post_2000"
          }, 
          {
            "to": 2000.0, 
            "key": "pre_2000"
          }
        ], 
        "field": "ipo_year"
      }, 
      "aggs": {}
    }
  }, 
  "size": 0
}

這裡特別要注意from是包含這個值的,也就是>=,而to是不包含的,也就是<。

{
  "hits": {
    "hits": [], 
    "total": 6714, 
    "max_score": 0.0
  }, 
  "_shards": {
    "successful": 1, 
    "failed": 0, 
    "total": 1
  }, 
  "took": 32, 
  "aggregations": {
    "ipo_year_range": {
      "buckets": [
        {
          "to": 2000.0, 
          "to_as_string": "2000.0", 
          "key": "pre_2000", 
          "doc_count": 758
        }, 
        {
          "from_as_string": "2000.0", 
          "from": 2000.0, 
          "key": "post_2000", 
          "doc_count": 2140
        }
      ]
    }
  }, 
  "timed_out": false
}

Profile

[
  {
    "query": [
      {
        "query_type": "MatchAllDocsQuery",
        "lucene": "*:*",
        "time": "1.318725000ms",
        "breakdown": {
          "score": 0,
          "create_weight": 8817,
          "next_doc": 1219528,
          "match": 0,
          "build_scorer": 90380,
          "advance": 0
        }
      }
    ],
    "rewrite_time": 2954,
    "collector": [
      {
        "name": "MultiCollector",
        "reason": "search_multi",
        "time": "4.342172000ms",
        "children": [
          {
            "name": "TotalHitCountCollector",
            "reason": "search_count",
            "time": "0.7725600000ms"
          },
          {
            "name": "RangeAggregator: [ipo_year_range]",
            "reason": "aggregation",
            "time": "2.091621000ms"
          }
        ]
      }
    ]
  }
]

CASE WHEN ipo_year > 2000

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200
SELECT ipo_year_range, COUNT(*) FROM symbol GROUP BY
  CASE WHEN ipo_year > 2000 THEN `post_2000` 
  WHEN ipo_year < 2000 THEN `pre_2000`
  ELSE `2000` END AS ipo_year_range
EOF
{"ipo_year_range": "pre_2000", "COUNT(*)": 758}
{"ipo_year_range": "2000", "COUNT(*)": 3874}
{"ipo_year_range": "post_2000", "COUNT(*)": 2082}

Elasticsearch

{
  "aggs": {
    "ipo_year_range": {
      "filters": {
        "filters": {
          "pre_2000": {
            "range": {
              "ipo_year": {
                "lt": 2000
              }
            }
          }, 
          "post_2000": {
            "range": {
              "ipo_year": {
                "gt": 2000
              }
            }
          }
        }, 
        "other_bucket_key": "2000"
      }, 
      "aggs": {}
    }
  }, 
  "size": 0
}

因為 range aggregation 只能表達 >=,所以如果條件是 > 則只能用 filters aggregation來表達任意複雜的範圍。

{
  "hits": {
    "hits": [], 
    "total": 6714, 
    "max_score": 0.0
  }, 
  "_shards": {
    "successful": 1, 
    "failed": 0, 
    "total": 1
  }, 
  "took": 29, 
  "aggregations": {
    "ipo_year_range": {
      "buckets": {
        "pre_2000": {
          "doc_count": 758
        }, 
        "2000": {
          "doc_count": 3874
        }, 
        "post_2000": {
          "doc_count": 2082
        }
      }
    }
  }, 
  "timed_out": false
}

Profile

[
  {
    "query": [
      {
        "query_type": "MultiTermQueryConstantScoreWrapper",
        "lucene": "ipo_year:[* TO 2000}",
        "time": "0.9414620000ms",
        "breakdown": {
          "score": 0,
          "create_weight": 7987,
          "next_doc": 0,
          "match": 0,
          "build_scorer": 741140,
          "advance": 192335
        }
      },
      {
        "query_type": "MultiTermQueryConstantScoreWrapper",
        "lucene": "ipo_year:{2000 TO *]",
        "time": "5.790531000ms",
        "breakdown": {
          "score": 0,
          "create_weight": 2424,
          "next_doc": 0,
          "match": 0,
          "build_scorer": 1505935,
          "advance": 4282172
        }
      },
      {
        "query_type": "MatchAllDocsQuery",
        "lucene": "*:*",
        "time": "0.2945730000ms",
        "breakdown": {
          "score": 0,
          "create_weight": 3720,
          "next_doc": 277440,
          "match": 0,
          "build_scorer": 13413,
          "advance": 0
        }
      },
      {
        "query_type": "BooleanQuery",
        "lucene": "ipo_year:`Q ipo_year:`R ipo_year:`S ipo_year:`T ipo_year:`U ipo_year:`V ipo_year:`W ipo_year:`X ipo_year:`Y ipo_year:`Z ipo_year:`[ ipo_year:` ipo_year:`] ipo_year:`^ ipo_year:`_ ipo_year:``",
        "time": "3.399705000ms",
        "breakdown": {
          "score": 0,
          "create_weight": 290588,
          "next_doc": 0,
          "match": 0,
          "build_scorer": 706965,
          "advance": 1191896
        },
        "children": [
          {
            "query_type": "TermQuery",
            "lucene": "ipo_year:`Q",
            "time": "0.08700500000ms",
            "breakdown": {
              "score": 0,
              "create_weight": 10244,
              "next_doc": 0,
              "match": 0,
              "build_scorer": 35010,
              "advance": 41751
            }
          },
          {
            "query_type": "TermQuery",
            "lucene": "ipo_year:`R",
            "time": "0.06047300000ms",
            "breakdown": {
              "score": 0,
              "create_weight": 4474,
              "next_doc": 0,
              "match": 0,
              "build_scorer": 30064,
              "advance": 25935
            }
          },
          {
            "query_type": "TermQuery",
            "lucene": "ipo_year:`S",
            "time": "0.04610500000ms",
            "breakdown": {
              "score": 0,
              "create_weight": 5833,
              "next_doc": 0,
              "match": 0,
              "build_scorer": 28901,
              "advance": 11371
            }
          },
          {
            "query_type": "TermQuery",
            "lucene": "ipo_year:`T",
            "time": "0.05955900000ms",
            "breakdown": {
              "score": 0,
              "create_weight": 4388,
              "next_doc": 0,
              "match": 0,
              "build_scorer": 35718,
              "advance": 19453
            }
          },
          {
            "query_type": "TermQuery",
            "lucene": "ipo_year:`U",
            "time": "0.05100400000ms",
            "breakdown": {
              "score": 0,
              "create_weight": 4305,
              "next_doc": 0,
              "match": 0,
              "build_scorer": 30884,
              "advance": 15815
            }
          },
          {
            "query_type": "TermQuery",
            "lucene": "ipo_year:`V",
            "time": "0.05104100000ms",
            "breakdown": {
              "score": 0,
              "create_weight": 4312,
              "next_doc": 0,
              "match": 0,
              "build_scorer": 29454,
              "advance": 17275
            }
          },
          {
            "query_type": "TermQuery",
            "lucene": "ipo_year:`W",
            "time": "0.05296000000ms",
            "breakdown": {
              "score": 0,
              "create_weight": 5352,
              "next_doc": 0,
              "match": 0,
              "build_scorer": 28870,
              "advance": 18738
            }
          },
          {
            "query_type": "TermQuery",
            "lucene": "ipo_year:`X",
            "time": "0.03956500000ms",
            "breakdown": {
              "score": 0,
              "create_weight": 4370,
              "next_doc": 0,
              "match": 0,
              "build_scorer": 28989,
              "advance": 6206
            }
          },
          {
            "query_type": "TermQuery",
            "lucene": "ipo_year:`Y",
            "time": "0.04285000000ms",
            "breakdown": {
              "score": 0,
              "create_weight": 4276,
              "next_doc": 0,
              "match": 0,
              "build_scorer": 28735,
              "advance": 9839
            }
          },
          {
            "query_type": "TermQuery",
            "lucene": "ipo_year:`Z",
            "time": "0.1016550000ms",
            "breakdown": {
              "score": 0,
              "create_weight": 4278,
              "next_doc": 0,
              "match": 0,
              "build_scorer": 78106,
              "advance": 19271
            }
          },
          {
            "query_type": "TermQuery",
            "lucene": "ipo_year:`[",
            "time": "0.08262300000ms",
            "breakdown": {
              "score": 0,
              "create_weight": 4309,
              "next_doc": 0,
              "match": 0,
              "build_scorer": 35134,
              "advance": 43180
            }
          },
          {
            "query_type": "TermQuery",
            "lucene": "ipo_year:`",
            "time": "0.1197220000ms",
            "breakdown": {
              "score": 0,
              "create_weight": 4332,
              "next_doc": 0,
              "match": 0,
              "build_scorer": 31697,
              "advance": 83693
            }
          },
          {
            "query_type": "TermQuery",
            "lucene": "ipo_year:`]",
            "time": "0.1254620000ms",
            "breakdown": {
              "score": 0,
              "create_weight": 4215,
              "next_doc": 0,
              "match": 0,
              "build_scorer": 63276,
              "advance": 57971
            }
          },
          {
            "query_type": "TermQuery",
            "lucene": "ipo_year:`^",
            "time": "0.1436460000ms",
            "breakdown": {
              "score": 0,
              "create_weight": 4270,
              "next_doc": 0,
              "match": 0,
              "build_scorer": 36163,
              "advance": 103213
            }
          },
          {
            "query_type": "TermQuery",
            "lucene": "ipo_year:`_",
            "time": "0.1091380000ms",
            "breakdown": {
              "score": 0,
              "create_weight": 4260,
              "next_doc": 0,
              "match": 0,
              "build_scorer": 33857,
              "advance": 71021
            }
          },
          {
            "query_type": "TermQuery",
            "lucene": "ipo_year:``",
            "time": "0.03744800000ms",
            "breakdown": {
              "score": 0,
              "create_weight": 4231,
              "next_doc": 0,
              "match": 0,
              "build_scorer": 29966,
              "advance": 3251
            }
          }
        ]
      }
    ],
    "rewrite_time": 26423,
    "collector": [
      {
        "name": "MultiCollector",
        "reason": "search_multi",
        "time": "26.31135900ms",
        "children": [
          {
            "name": "TotalHitCountCollector",
            "reason": "search_count",
            "time": "0.3411380000ms"
          },
          {
            "name": "FiltersAggregator: [ipo_year_range]",
            "reason": "aggregation",
            "time": "24.97556700ms"
          }
        ]
      }
    ]
  }
]
  }
]

相關文章