前言
ElasticSearch,开源的搜索和数据分析引擎,也是名列前茅的 nosql。
很多时候会想拿它和关系型数据库做对比,严格上它们是没有太多可比性的。
不过把有的东西放在一起比较,会帮助我们快速去理解一些 ElasticSearch 的内容。
老黄这边抽空梳理了一些常用的 sql 语句 “对应” ElasticSearch 的操作,主要时针对 CURD 的。
示例用的是 mysql 和 ElasticSearch 7.12.0 。
表操作
为了简单起见,弄一个简单的订单表 order-2021
来演示。
删除表
drop table `order-2021`
DELETE http://localhost:9200/order-2021
创建表
create table `order-2021` (
`id` bigint(20) NOT NULL AUTO_INCREMENT,
`order_id` varchar(32) NOT NULL,
`cus_name` varchar(20) NOT NULL,
`item_name` varchar(64) NOT NULL,
`number` int NOT NULL,
`create_time` bigint(20) NOT NULL,
`update_time` bigint(20) NOT NULL,
PRIMARY KEY (`id`),
KEY `idx_order_id` (`order_id`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci
PUT http://localhost:9200/order-2021
Content-Type: application/json
{
"settings":{
"number_of_shards": 1
},
"mappings":{
"properties":{
"order_id" :{
"type":"keyword"
},
"cus_name" :{
"type":"keyword"
},
"item_name" :{
"type":"text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"number":{
"type":"integer"
},
"create_time" :{
"type":"long"
},
"update_time" :{
"type":"long"
}
}
}
}
NOTE:
- mysql 的索引需要一个一个的加,ES 这边不需要。
- mysql 有自增 Id, ES 这边也会有自动生成 Id 的操作,同样的,两边也可以自定义 Id。
- keyword 和 text 按需调整。
改表名
rename table `order-2021` to `order`;
POST http://localhost:9200/_aliases
Content-Type: application/json
{
"actions" : [
{ "add" : { "index" : "order-2021", "alias" : "order" } }
]
}
NOTE: ES 的是起一个别名,然后两个名字都可以用。
插入数据
单条插入
insert into `order-2021`
(order_id, cus_name, item_name, number, create_time, update_time)
values
('11', 'catcherwong', 'phone', 1, 1619877257000, 0)
POST http://localhost:9200/order-2021/_doc/
Content-Type: application/json
{
"order_id" : "11",
"cus_name":"catcherwong",
"item_name":"phone",
"number": 1,
"create_time": 1619877257000,
"update_time": 0
}
批量插入
insert into `order-2021` (order_id, cus_name, item_name, number, create_time, update_time)
values
('12', 'catcherwong', 'phone', 1, 1619877257000, 0),
('13', 'catcherwong', 'item-1', 2, 1619977257000, 0),
('14', 'catcherwong', 'item-2', 3, 1614877257000, 0);
POST http://localhost:9200/_bulk
Content-Type: application/x-ndjson
{ "index" : { "_index" : "order-2021" } }
{ "order_id" : "12", "cus_name":"catcherwong", "item_name":"phone", "create_time": 1619877257000, "update_time": 0 }
{ "index" : { "_index" : "order-2021" } }
{ "order_id" : "13", "cus_name":"catcherwong", "item_name":"item-1", "create_time": 1619977257000, "update_time": 0 }
{ "index" : { "_index" : "order-2021" } }
{ "order_id" : "14", "cus_name":"catcherwong", "item_name":"item-2", "create_time": 1614877257000, "update_time": 0 }
NOTE:
- ES 的批量操作需要注意,每一行最后要有换行符
结尾
- Content-Type 指定为 application/x-ndjson
更新数据
根据ID更新
update `order-2021`
set update_time = '1619877307000', cus_name = 'catcherwong-1'
where id = '6wvox3kB4OeD0spWtstW'
POST http://localhost:9200/order-2021/_update/6wvox3kB4OeD0spWtstW
Content-Type: application/json
{
"doc":{
"update_time" : 1619877307000,
"cus_name": "catcherwong-1"
}
}
根据查询条件更新
update `order-2021`
set update_time = '1619877307000', cus_name = 'catcherwong-1'
where order_id = '11'
POST http://localhost:9200/order-2021/_update_by_query
Content-Type: application/json
{
"script":{
"source":"ctx._source['cus_name']=params.cus_name;ctx._source['update_time']=params.update_time;",
"params":{
"cus_name":"catcherwong-1",
"update_time": 1619877307000
}
},
"query":{
"term":{
"order_id":"11"
}
}
}
NOTE: ES 的条件更新分为两部分,一个是 query ,也就是 where 部分, 一个是 script ,也就是 set 部分。
删除数据
根据Id删除
delete from `order-2021`
where id = 'c8cb33kBoze4GtqD9rTs'
DELETE http://localhost:9200/order-2021/_doc/c8cb33kBoze4GtqD9rTs
根据查询条件删除
delete from `order-2021`
where order_id = '11'
POST http://localhost:9200/order-2021/_delete_by_query
Content-Type: application/json
{
"query": {
"term": {
"order_id": "11"
}
}
}
查询数据
查询全部
select * from `order-2021`
GET http://localhost:9200/order-2021/_search
Content-Type: application/json
{
"query": {
"match_all": {}
}
}
条件查询
select * from `order-2021`
where cus_name in ("catcher-61333", "catcher-89631")
and create_time >= 0
and create_time <= 1622555657322
GET http://localhost:9200/order/_search
Content-Type: application/json
{
"query":{
"bool":{
"filter":[
{ "terms":{ "cus_name" : [ "catcher-61333", "catcher-89631" ] }},
{ "range":{ "create_time" : { "gte": 0, "lte": 1622555657322 } } }
]
}
}
}
NOTE:
- ES 的条件查询是有非常多的, 这里只列举了部分。
- ES 的查询,在默认情况下会有打分的操作,是会损耗性能的,而常规的 sql 查询时不需要这些,所以用 bool + filter 来忽略。
查询指定字段
select cus_name, order_id
from `order-2021`
where cus_name in ("catcher-61333", "catcher-89631")
and create_time >= 0
and create_time <= 1622555657322
GET http://localhost:9200/order-2021/_search
Content-Type: application/json
{
"_source":[ "cus_name", "order_id"],
"query":{
"bool":{
"filter":[
{ "terms":{ "cus_name" : [ "catcher-61333", "catcher-89631" ] }},
{ "range":{ "create_time" : { "gte": 0, "lte": 1622555657322 } } }
]
}
}
}
NOTE: 查询的时候,如果只要几个字段,那么可以通过
_source
来指定。
查询数量
select count(*)
from `order-2021`
where cus_name in ("catcher-61333", "catcher-89631")
and create_time >= 0
and create_time <= 1622555657322
GET http://localhost:9200/order-2021/_count
Content-Type: application/json
{
"query":{
"bool":{
"filter":[
{ "terms":{ "cus_name" : [ "catcher-61333", "catcher-89631" ] }},
{ "range":{ "create_time" : { "gte": 0, "lte": 1622555657322 } } }
]
}
}
}
NOTE: 根据指定条件获取数量,建议用
_count
来查询,_search
查询的结果里面的条数不一定准确。
浅分页
select cus_name, order_id
from `order-2021`
where cus_name in ("catcher-61333", "catcher-89631")
and create_time >= 0
and create_time <= 1622555657322
order by create_time desc
limit 0,10
GET http://localhost:9200/order-2021/_search
Content-Type: application/json
{
"_source":[ "cus_name", "order_id"],
"query":{
"bool":{
"filter":[
{ "terms":{ "cus_name" : [ "catcher-61333", "catcher-89631" ] }},
{ "range":{ "create_time" : { "gte": 0, "lte": 1622555657322 } } }
]
}
},
"size":10,
"from":0,
"sort":[
{ "create_time":{ "order":"desc"} }
]
}
NOTE: 浅分页,页码不宜过深,适合滚动加载的场景,深度分页可以考虑 SearchAfter
Group By
select number, count(*) as number_count
from `order-2021`
where create_time >= 0
and create_time <= 1622555657322
group by number
order by number asc
GET http://localhost:9200/order-2021/_search
Content-Type: application/json
{
"size":0,
"aggs": {
"number_count": {
"terms": {
"field": "number",
"order" : { "_key" : "asc" }
}
}
},
"query":{
"bool":{
"filter":[
{ "range":{ "create_time" : { "gte": 0, "lte": 1622555657322 } } }
]
}
}
}
NOTE: group by 属于聚合操作的一种,要用 aggs ,聚合,不用原数据,所以 size 设为 0。
Avg/Min/Max/Sum
select avg(number) as number_avg,
min(number) as number_min,
max(number) as number_max,
sum(number) as number_sum
from order
where create_time >= 0
and create_time <= 1622555657322
GET http://localhost:9200/order/_search
Content-Type: application/json
{
"size":0,
"query":{
"bool":{
"filter":[
{ "range":{ "create_time" : { "gte": 0, "lte": 1622555657322 } } }
]
}
},
"aggs": {
"number_avg": {
"avg": {
"field": "number"
}
},
"number_min": {
"min": {
"field": "number"
}
},
"number_max": {
"max": {
"field": "number"
}
},
"number_sum": {
"sum": {
"field": "number"
}
}
}
}
写在最后
Elasticsearch 这一块常用的 CURD 操作,和常用的关系型数据库对照一遍,其实很容易上手。
要用到一些 Elasticsearch 特有的功能时,去查询官方文档也基本可以搞定。