# ElasticSearch Bucket Aggregation
# ์ค์ต ํ๊ฒฝ
- ๐ก Elasticsearch 7.9.0
- ๐ก Windows 10
- ๐ก Git Bash
# Aggregation์ด๋?
๊ฐ๋จํ ์ค๋ช ํ๋ฉด ElasticSearch์ Document์ ์กฐํฉ์ ํตํด ๊ฐ์ ๋์ถํ ๋ ์ฐ์ด๋ ๋ฐฉ๋ฒ์ด๋ค.
๊ทธ ์ค Bucket Aggregation์ group by
๋ก ์ดํดํ๋ฉด ๋๋ค.
๊ทธ๋ฃน์ผ๋ก ํน์ ์ง์ ๋ ์ฌ์ฉํ๋ฉด ์ ์ฉํ๋ค.
์ด์ ์ค์ต์์ ๋ง๋ค์๋ basketball
index๋ฅผ ์ญ์ ํ๊ณ ๋ค์ ์์ฑํด์
Mapping ๋ถํฐ ์์ผ๋ณด์.
# index ์ญ์ ํ๊ธฐ
curl -XDELETE http://localhost:9200/basketball
# index ์์ฑํ๊ธฐ
curl -XPUT localhost:9200/basketball
# Type Mapping
์ด์ ์๋ก์ด index์ ๋ฐ์ดํฐ๋ฅผ Mapping
์์ผ๋ณด์.
basketball_mapping.json
ํ์ผ ๋ด์ฉ
{
"record" : {
"properties" : {
"team" : {
"type" : "text",
"fielddata" : true
},
"name" : {
"type" : "text",
"fielddata" : true
},
"points" : {
"type" : "long"
},
"rebounds" : {
"type" : "long"
},
"assists" : {
"type" : "long"
},
"blocks" : {
"type" : "long"
},
"submit_date" : {
"type" : "date",
"format" : "yyyy-MM-dd"
}
}
}
}
recode๋ผ๋ type
์์ ๋ค์ํ properties
๊ฐ ์๋๋ฐ
fielddata : true
๊ฐ์ aggregationํ ๋ ์กฐํํ ์ ์๋๋ก ์ค์ ํด๋์๋ค.
Mapping์ ES์ ์ ์ฉํด๋ณด์.
$ curl -XPUT 'http://localhost:9200/basketball/record/_mapping?include_type_name=true&pretty' -d @basketball_mapping.json -H 'Content-Type: application/json'
# Documents์ Sample data Bulk ํ๊ธฐ
Sample data๋ ์๋์ ๊ฐ๋ค.
twoteam_basketball.json
ํ์ผ ๋ด์ฉ
{ "index" : { "_index" : "basketball", "_type" : "record", "_id" : "1" } }
{"team" : "Chicago","name" : "Michael Jordan", "points" : 30,"rebounds" : 3,"assists" : 4, "blocks" : 3, "submit_date" : "1996-10-11"}
{ "index" : { "_index" : "basketball", "_type" : "record", "_id" : "2" } }
{"team" : "Chicago","name" : "Michael Jordan","points" : 20,"rebounds" : 5,"assists" : 8, "blocks" : 4, "submit_date" : "1996-10-13"}
{ "index" : { "_index" : "basketball", "_type" : "record", "_id" : "3" } }
{"team" : "LA","name" : "Kobe Bryant","points" : 30,"rebounds" : 2,"assists" : 8, "blocks" : 5, "submit_date" : "2014-10-13"}
{ "index" : { "_index" : "basketball", "_type" : "record", "_id" : "4" } }
{"team" : "LA","name" : "Kobe Bryant","points" : 40,"rebounds" : 4,"assists" : 8, "blocks" : 6, "submit_date" : "2014-11-13"}
์์์ ํ์ธํ sample ๋ฐ์ดํฐ๋ฅผ Bulk ํ๊ธฐ ์ํด ๋ค์ ๋ช ๋ น์ ์ ๋ ฅํ์.
# โ Bulk ํ๊ธฐ
$ curl -XPOST http://localhost:9200/_bulk?pretty --data-binary @twoteam_basketball.json -H 'Content-Type: application/json'
JSON ํ์ผ์ ํญ์ ๋ง์ง๋ง์ newline์ ์ฝ์ ํด์ฃผ์.
# Term Aggregation ์ค์ต
# Group by Team!
Team์ผ๋ก ๊ทธ๋ฃน์ ๋๋ ๋ณด์.
terms_aggs.json
ํ์ผ ๋ด์ฉ
{
"size" : 0,
"aggs" : {
"players" : {
"terms" : {
"field" : "team"
}
}
}
}
size:0
- ๋ค๋ฅธ ์ฌ๋ฌ ์ ๋ณด๋ฅผ ํ์ํ์ง ์๊ณ ๊ฒฐ๊ณผ๋ง ๋์ถ
players
- Aggregation name
terms
- term Aggregation์ ์ฌ์ฉํ๋ค๊ณ ์ ์
์๋ ๋ช ๋ น์ด๋ก Term์ ํ์ธํด๋ณด์
$ curl -XGET 'http://localhost:9200/_search?pretty' --data-binary @terms_aggs.json -H 'Content-Type: application/json'
ํ์ฌ ํ ์ํฉ์ ํ๋ก ๋ํ๋ด๋ฉด ๋ค์๊ณผ ๊ฐ๋ค.
Doc1 | Doc2 | Doc3 | Doc4 | |
---|---|---|---|---|
Team | Chicago | Chicago | LA | LA |
Term Aggregraion
๊ฒฐ๊ณผ ์์ํ๋๋๋ก ๊ฐ ํ์ด 2๊ฐ์ฉ count ๋ ๊ฒฐ๊ณผ๋ฅผ ํ์ธํ ์ ์๋ค.
๊ฒฐ๊ณผ
{
# ... ์๋ต
"aggregations" : {
"players" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "chicago",
"doc_count" : 2
},
{
"key" : "la",
"doc_count" : 2
}
]
}
}
}
# ๋ณต์กํ ํต๊ณ ๋ถ์ ์์
ํต๊ณ๋ ๋ถ์์ผ๋ก ๋ณด๊ธฐ์ ์กฐ๊ธ ๋ฌด๋ฆฌ๊ฐ ์์ด ๊ณ ๋ คํ ์ฌํญ์ ๋๋ ค์ ์ค์ตํด๋ณด์.
์ค์ ๋๊ตฌ๊ฒฝ๊ธฐ ์ฒ๋ผ ๊ฐ์ ์ ์ผ๋ฉด ์๋์ ๊ฐ์ด ์๋ฃ๋ฅผ ๋ง๋ค ์ ์๋ค.
Doc1 | Doc2 | Doc3 | Doc4 | |
---|---|---|---|---|
Team | Chicago | Chicago | LA | LA |
Name | Michael | Michael | Kobe | Kobe |
Points | 30 | 20 | 30 | 40 |
Rebounds | 3 | 5 | 2 | 4 |
Assists | 4 | 8 | 8 | 8 |
blocks | 3 | 4 | 5 | 6 |
ํ์ ๋ถ๋ฅํ๊ณ , ๊ฐ ํ ๋ณ๋ก ์ฑ์ ์ ๋ณด๋ ํต๊ณ๋ฅผ ๋ง๋ค์ด๋ณด์.
{
"size" : 0,
"aggs" : {
"team_stats" : {
"terms" : {
"field" : "team"
},
"aggs" : {
"stats_score" : {
"stats" : {
"field" : "points"
}
}
}
}
}
}
- ํ๋ณ๋ก document๋ฅผ ๋ฌถ์ด์ฃผ๊ณ
- ๊ฐ ํ๋ณ๋ก ์ ์๋ณ stats ํต๊ณ๋ฅผ ๋ฐํ
$ curl -XGET http://localhost:9200/_search?pretty --data-binary @stats_by_team.json -H 'Content-Type: application/json'
๊ฒฐ๊ณผ
# ... ์๋ต
"aggregations" : {
"team_stats" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "chicago",
"doc_count" : 2,
"stats_score" : {
"count" : 2,
"min" : 20.0,
"max" : 30.0,
"avg" : 25.0,
"sum" : 50.0
}
},
{
"key" : "la",
"doc_count" : 2,
"stats_score" : {
"count" : 2,
"min" : 30.0,
"max" : 40.0,
"avg" : 35.0,
"sum" : 70.0
}
}
]
}
#... ์๋ต
Chicagoํ๊ณผ LAํ ๊ฐ๊ฐ ์ ์ ํต๊ณ๊ฐ ๋ํ๋๋ ๊ฒ์ ํ์ธํ ์ ์๋ค.
๋ณธ ํฌ์คํ ์
Inflearn
์ ELK ์คํ (ElasticSearch, Logstash, Kibana) ์ผ๋ก ๋ฐ์ดํฐ ๋ถ์ (opens new window) ๊ฐ์๋ฅผ ์ฐธ๊ณ ํ์ฌ ์์ฑ๋์์ต๋๋ค.