1.ES聚合分析是什么
聚合分析是数据库中重要的功能特性,完成对⼀个查询的数据集中数据的聚合计算,如:找 出某字段(或计算表达式的结果)的最⼤值、最⼩值,计算和、平均值等。ES作为搜索引擎 兼数据库,同样提供了强⼤的聚合分析能⼒。
对⼀个数据集求最⼤、最⼩、和、平均值等指标的聚合,在ES中称为指标聚合
⽽关系型数据库中除了有聚合函数外,还可以对查询出的数据进⾏分组group by,再在组上 进⾏指标聚合。在ES中称为桶聚合
2.max min sum avg
求出⽕箭队球员的平均年龄
POST /nba/_search { "query": { "match": { "teamNameEn": "Rockets" } }, "aggs": { "avgAge": { "avg": { "field": "playYear" } } } }
效果:
3.value_count 统计⾮空字段的⽂档数
求出⽕箭队中球员打球时间不为空的数量
POST /nba/_search { "query": { "match": { "teamNameEn": "Rockets" } }, "aggs": { "countPlayreYear": { "value_count": { "field": "playYear" } } }, "size": 1 }
加上size,效果:
{ "took" : 7, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 21, "relation" : "eq" }, "max_score" : 3.2723064, "hits" : [ { "_index" : "nba", "_type" : "_doc", "_id" : "86", "_score" : 3.2723064, "_source" : { "countryEn" : "Switzerland", "teamName" : "火箭", "birthDay" : 769233600000, "country" : "瑞士", "teamCityEn" : "Houston", "code" : "clint_capela", "displayAffiliation" : "Switzerland/Switzerland", "displayName" : "克林特 卡佩拉", "schoolType" : "", "teamConference" : "西部", "teamConferenceEn" : "Western", "weight" : "108.9 公斤", "teamCity" : "休斯顿", "playYear" : 5, "jerseyNo" : "15", "teamNameEn" : "Rockets", "draft" : 2014, "displayNameEn" : "Clint Capela", "heightValue" : 2.08, "birthDayStr" : "1994-05-18", "position" : "中锋", "age" : 25, "playerId" : "203991" } } ] }, "aggregations" : { "countPlayreYear" : { "value" : 21 } } }
4.Cardinality 值去重
计数 查出⽕箭队中年龄不同的数量
POST /nba/_search { "query":{ "term":{ "teamNameEn":{ "value":"Rockets" } } }, "aggs":{ "counAget":{ "cardinality":{ "field":"age" } } }, "size":0 }
5.stats 统计count max min avg sum 5个值
POST /nba/_search { "query": { "match": { "teamNameEn": "Rockets" } }, "aggs": { "statsField": { "stats": { "field": "playYear" } } }, "size": 1 }
效果:
{ "took" : 1, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 21, "relation" : "eq" }, "max_score" : 3.2723064, "hits" : [ { "_index" : "nba", "_type" : "_doc", "_id" : "86", "_score" : 3.2723064, "_source" : { "countryEn" : "Switzerland", "teamName" : "火箭", "birthDay" : 769233600000, "country" : "瑞士", "teamCityEn" : "Houston", "code" : "clint_capela", "displayAffiliation" : "Switzerland/Switzerland", "displayName" : "克林特 卡佩拉", "schoolType" : "", "teamConference" : "西部", "teamConferenceEn" : "Western", "weight" : "108.9 公斤", "teamCity" : "休斯顿", "playYear" : 5, "jerseyNo" : "15", "teamNameEn" : "Rockets", "draft" : 2014, "displayNameEn" : "Clint Capela", "heightValue" : 2.08, "birthDayStr" : "1994-05-18", "position" : "中锋", "age" : 25, "playerId" : "203991" } } ] }, "aggregations" : { "statsField" : { "count" : 21, "min" : 0.0, "max" : 17.0, "avg" : 5.333333333333333, "sum" : 112.0 } } }
6.Extended stats ⽐stats多4个统计结果: 平⽅和、⽅差、标准差、平均值加/减两个标准差的区间
POST /nba/_search { "query": { "match": { "teamNameEn": "Rockets" } }, "aggs": { "statsField": { "extended_stats": { "field": "playYear" } } }, "size": 1 }
效果:
{ "took" : 8, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 21, "relation" : "eq" }, "max_score" : 3.2723064, "hits" : [ { "_index" : "nba", "_type" : "_doc", "_id" : "86", "_score" : 3.2723064, "_source" : { "countryEn" : "Switzerland", "teamName" : "火箭", "birthDay" : 769233600000, "country" : "瑞士", "teamCityEn" : "Houston", "code" : "clint_capela", "displayAffiliation" : "Switzerland/Switzerland", "displayName" : "克林特 卡佩拉", "schoolType" : "", "teamConference" : "西部", "teamConferenceEn" : "Western", "weight" : "108.9 公斤", "teamCity" : "休斯顿", "playYear" : 5, "jerseyNo" : "15", "teamNameEn" : "Rockets", "draft" : 2014, "displayNameEn" : "Clint Capela", "heightValue" : 2.08, "birthDayStr" : "1994-05-18", "position" : "中锋", "age" : 25, "playerId" : "203991" } } ] }, "aggregations" : { "statsField" : { "count" : 21, "min" : 0.0, "max" : 17.0, "avg" : 5.333333333333333, "sum" : 112.0, "sum_of_squares" : 1140.0, "variance" : 25.84126984126984, "std_deviation" : 5.083430912412387, "std_deviation_bounds" : { "upper" : 15.500195158158107, "lower" : -4.833528491491442 } } } }
7.Percentiles 占⽐百分位对应的值统计,默认返回[ 1, 5, 25, 50, 75, 95, 99 ]分位上的值
查出⽕箭的球员的年龄占⽐
POST /nba/_search
{
"query": {
"term": {
"teamNameEn": {
"value": "Rockets"
}
}
},
"aggs": {
"percentAge": {
"percentiles": {
"field": "age",
"percents": [
20,
50,
75
]
}
}
},
"size": 0
}
效果:
{ "took" : 37, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 21, "relation" : "eq" }, "max_score" : null, "hits" : [ ] }, "aggregations" : { "percentAge" : { "values" : { "20.0" : 21.7, "50.0" : 25.0, "75.0" : 30.25 } } } }