ElasticSearch
ES 之 增删改查
GET _search
{
"query": {
"match_all": {}
}
}
# 添加doc文本数据
PUT ttt/doc/1
{
"name":"ttt"
}
# 获取数据 格式:数据库/类型/索引
GET ttt/doc/2
PUT dengshttt
{
"name":"zz",
"age":19
}
# 获取所有数据doc 文本数据
GET dengshuai/dottt
# 指定id删除
DELETE dengshuai/doc/2
tttengshuai
#---------ttt查CRUD---------------#
### 新增 格式: 数据库/类型/索引
PUT s23/doc/1
{
"name":"帅",
"age":24,
"sex":"man",
"desc":"笨",
"tag":["睡觉"],
"both":"1997-11-16"
}
PUT s23/doc/2
{
"name":"亚伟",
"age":26,
"sex":"woman",
"desc":"吃",
"tag":["打游戏"],
"both":"1995-11-16"
}
PUT s23/doc/3
{
"name":"崽崽",
"age":9,
"sex":"woman",
"desc":"白",
"tag":["代码"],
"both":"1992-11-16"
}
PUT s23/doc/4
{
"name":"崽1",
"age":25,
"sex":"woman",
"desc":"白",
"tag":["代码"],
"both":"1993-11-16"
}
PUT s23/doc/5
{
"name":"崽2",
"age":27,
"sex":"woman",
"desc":"白",
"tag":["代码"],
"both":"1993-11-16"
}
#### 查
# 查所有 格式: 数据库/格式/_search/
GET s23/doc/_search/
# 查一个 格式: 数据库/类型/索引/
GET s23/doc/1
### 修改 格式: 数据库/类型/索引id/_update
POST s23/doc/1/_update
{
"doc":{
"tag":["王者","手机"]
}
}
### 删除
# 删一个 格式: 数据库/类型/索引
DELETE s23/doc/3
# 删所有 ,删库
DELETE s23
#### 字符串 条件查询
GET s23/doc/_search?q=age:24&sex:man
ES 之 DSL 查询
# Query DSL 灵活查询
### DSL 查询, Query DSL 灵活查询
### DSL 之 match 系列查询 ,match 返回所有匹配的分词.
GET s23/doc/_search
{
"query": {
"match": {
"age": "24"
}
}
}
### DSL 之 matchall,查询全部
GET s23/doc/_search
{
"query": {
"match_all": {
}
}
}
### DSL 之 sort排序查询 , 注意:无法按照字符串查询
GET s23/doc/_search
{
"query": {
"match_all": {
}
},
"sort": [
{
"age": {
"order": "desc"
}
}
]
}
### from/size 可以做简单分页
# from 从第几条开始获取, 超出返回空列表
# size 获取多少条
GET s23/doc/_search
{
"query": {
"match_all": {}
},
"from":0,
"size": 2 ,
"sort": [
{
"age": {
"order": "desc"
}
}
]
}
### source , 过滤字段
GET s23/doc/_search
{
"query": {
"match_all": {}
},
"sort": [
{
"age": {
"order": "desc"
}
}
],
"_source": ["name","age"]
}
### HEAD 操作 , 查看状态
HEAD s23 ===》 200
ES 访问地址:
http://127.0.0.1:9200/
K8 访问地址
http://127.0.0.1:5601/app/kibana
ES 之 进阶操作
# 1. 短语查询 match_phrase
# 2. 分词 match
# 3. 前缀查询 match_phrase_prefix
# 4. 多字段查询 multi_match
# 5. 高亮查询 highlight
# 6. 聚合函数 sum avg
# 7. mapping 映射
# 8. settings 配置
# -------- match_phrase 短语查询
GET s23/doc/_search
GET s23/doc/_search
{
"query": {"match": {
"desc" : "白"
}}
}
PUT a1/doc/3
{
"title":"美国和中国是好邻居"
}
### match 会将词语分词,只要符合条件就返回
GET a1/doc/_search
{
"query": {
"match": {
"title": "中国的首都"
}
}
}
### match_phrase 短语查询,
# slop 是控制 字符之间的间隔,默认是1
GET a1/doc/_search
{
"query": {
"match_phrase": {
"title": {
"query": "中国的首都",
"slop": 1
}
}
}
}
### match_phrase_prefix 词组最左前缀查询,根据短语最后一个词查询
PUT a2/doc/2
{
"title":"you love you"
}
GET a2/doc/_search
{
"query": {
"match_phrase_prefix": {
"title": "lo"
}
}
}
### multi_match , 多字段查询
# query: 放置查询的内容,
# fields: 字段
GET a2/doc/_search
{
"query": {
"multi_match": {
"query": "i",
"fields": ["title"]
}
}
}
### ------高亮查询--------
PUT t3/doc/2
{
"title":"我的同学是李四"
}
GET t3/doc/_search
{
"query": {
"match_all": {}
}
}
# 1. 高亮 highlight , 默认是em标签
GET t3/doc/_search
{
"query": {
"match": {
"title": "同学"
}
},
"highlight": {
"fields": {"title": {}}
}
}
# 2. 自定义高亮 ,pre_tags和post_tags
GET t3/doc/_search
{
"query": {
"match": {
"title": "同学"
}
},
"highlight": {
"pre_tags": "<br style='font-size=:20px'>",
"post_tags": "<br/>",
"fields": {"title": {}}
}
}
# --------聚合函数----------
# aggs
# name是自定义标题
# 类型 , 字段
# avg 平均
GET s23/doc/_search
{
"query": {
"match_all": {
}
},
"aggs": {
"my_avg_age": {
"avg": {
"field": "age"
}
}
}
}
# sum 求和
GET s23/doc/_search
{
"query": {
"match_all": {
}
},
"aggs": {
"my_sum_age": {
"sum": {
"field": "age"
}
}
}
}
# range 范围
# from 闭合
# to 不闭合
GET s23/doc/_search
{
"query": {
"match_all": {}
},
"aggs": {
"my_range": {
"range": {
"field": "age",
"ranges": [
{
"from": 30,
"to": 35
},
{
"from": 0,
"to":10
},
{
"from": 10,
"to":30
}
]
}
}
}
}
# ------mapping映射(如何存储和索引的过程) :自定义表结构-----
GET s23
# 映射爆炸: 字段索引太多,导致内存溢出
# 1. 自定义 表结构
PUT t1
{
"mappings": {
"doc":{
"properties":{
"name":{
"type":"text"
},
"age":{
"type":"long"
}
}
}
}
}
GET t1
# dynamic:true 动态映射,没有的字段,自动补充类型
DELETE t2
PUT t2
{
"mappings": {
"doc":{
"dynamic":true,
"properties":{
"t1":{
"type":"text"
},
"t2":{
"type":"text"
}
}
}
}
}
PUT t2/doc/1
{
"t1":"大撒旦撒旦所",
"t2":"都是"
}
PUT t2/doc/2
{
"t1":"大撒旦撒旦所",
"t2":"都是",
"t3":"alex"
}
GET t2/doc/_search
{
"query": {
"match": {
"t3": "alex"
}
}
}
# dynamic:false 静态映射 : 忽略未定义的字段,但是还是会存储数据
PUT t4
{
"mappings": {
"doc":{
"dynamic":false,
"properties":{
"t1":{
"type":"text"
},
"t2":{
"type":"text"
}
}
}
}
}
PUT t4/doc/1
{
"t1":"大撒旦撒旦所",
"t2":"都是"
}
PUT t4/doc/2
{
"t1":"大撒旦撒旦所",
"t2":"都是",
"t3":"alex"
}
PUT t4/doc/3
{
"t3":"ale"
}
GET t4/doc/3
GET t4/doc/_search
{
"query": {
"match": {
"t3": "alex"
}
}
}
# dynamic : strict 严格的映射类型
DELETE t5
PUT t5
{
"mappings": {
"doc":{
"dynamic":"strict",
"properties":{
"t1":{
"type":"text"
},
"t2":{
"type":"text"
}
}
}
}
}
PUT t5/doc/1
{
"t1":"大撒旦撒旦所",
"t2":"都是"
}
PUT t5/doc/2
{
"t1":"大撒旦撒旦所",
"t2":"都是",
"t3":"alex"
}
PUT t5/doc/3
{
"t3":"ale"
}
GET t5/doc/1
GET t5/doc/_search
{
"query": {
"match": {
"t3": "alex"
}
}
}
#
PUT s23/doc/6
{
"name":"alex",
"age":33,
"desc":"dsb",
"tag":["鸡汤"]
}
PUT s23/doc/7
{
"name":"wusir",
"age":33,
"desc":"wem",
"tag":["出风机"]
}
GET s231/doc/_search
{
"query": {
"match_all": {}
}
}
#### ----------Bool 查询--------
# 1. must and
# 2. should or
# 3. must_not ! 非
# 4. filter 过滤筛选
# 1. 年龄是33,姓名是wusir
GET s23/doc/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"age": "33"
}
},{
"match": {
"name": "wusir"
}
}
]
}
}
}
# 2. 年龄是23 ,或者姓名是wusir
GET s23/doc/_search
{
"query": {
"bool": {
"should": [
{
"match": {
"age": "33"
}
},{
"match": {
"name": "wusir"
}
}
]
}
}
}
# 3. 年龄既不是33,姓名也不是wusir
GET s23/doc/_search
{
"query": {
"bool": {
"must_not": [
{
"match": {
"age": "33"
}
},{
"match": {
"name": "wusir"
}
}
]
}
}
}
# 4. 查询年龄 大于30的
# 伪代码 :bool -> filter -> range -> 字段
# gt 大于 gte 大于等于
# lt 小于 lte 小于等于
GET s23/doc/_search
{
"query": {
"bool": {
"filter": {
"range": {
"age": {
"lte": 33
}
}
}
}
}
}
# 5. should 和 filter 不推荐一起使用,查询内容会出现匹配失败的问题
## ----- index ----
# index 为真,可以存储.false 则不能存储
PUT a7
{
"mappings":{
"doc":{
"properties":{
"t1":{
"type":"text",
"index":true
},
"t2":{
"type":"text",
"index":false
}
}
}
}
}
# put index .eg.1
PUT a7/doc/1
{
"t1":"a",
"t2":"b"
}
# get index
# t2 字段查询不到
GET a7/doc/_search
{
"query": {
"match": {
"t2": "b"
}
}
}
# ------copy_to------
# 将字段 t1 的值 拷贝到f1中
DELETE a8
GET a8
PUT a8
{
"mappings": {
"doc":{
"properties":{
"t1":{
"type":"text",
"copy_to":"f1"
},
"t2":{
"type":"text",
"copy_to":"f1"
},
"f1":{
"type":"text"
}
}
}
}
}
PUT a8/doc/1
{
"t1":"里斯",
"t2":"张三"
}
GET a8/doc/_search
{
"query": {"match": {
"t2": "张三"
}}
}
GET a8/doc/_search
{
"query": {
"match": {
"f1": "张三"
}
}
}
#-----嵌套设计表结构mappings------
PUT a9
{
"mappings": {
"doc":{
"properties":{
"name":{
"type":"text"
},
"age":{
"type":"long"
},
"info":{
"properties":{
"addr":{
"type":"text"
},
"tel":{
"type":"long"
}
}
}
}
}
}
}
PUT a10/doc/1
{
"name":"ttt",
"age":"23",
"info":{
"addr":"Dsahjkd",
"tel":"12321312 "
}
}
GET a10
## ----settings-----
# number_of_shards : 主分片
# number_of_replicas :复制分片
PUT a11
{
"settings": {
"number_of_shards":3,
"number_of_replicas":1
}
}
GET a11
PUT m5
{
"mappings": {
"doc": {
"dynamic":false,
"properties": {
"first_name":{
"type": "text",
"copy_to": "full_name"
},
"last_name": {
"type": "text",
"copy_to": "full_name"
},
"full_name": {
"type": "text"
}
}
}
}
}
PUT m5/doc/1
{
"first_name":"tom",
"last_name":"ben"
}
PUT m5/doc/2
{
"first_name":"john",
"last_name":"smith"
}
GET m5/doc/_search
{
"query": {
"match": {
"first_name": "tom"
}
}
}
GET m5/doc/_search
{
"query": {
"match": {
"full_name": "john"
}
}
}
#--------ignore_above----------
# 长度超过 ignore_avove设置的字符串长度. 将不会被索引和存储
PUT w1
{
"mappings": {
"doc":{
"properties":{
"t1":{
"type":"keyword",
"ignore_above":5
},
"t2":{
"type":"keyword",
"ignore_above":10
}
}
}
}
}
PUT w1/doc/1
{
"t1":"deng",
"t2":"dengshuainb"
}
GET w1/doc/_search
{
"query": {
"match_all": {}
}
}
# 1. 超过 ignore_above 定义长度的字段,值将不会被创建索引
GET w1/doc/_search
{
"query": {
"match": {
"t1": "dengshuainb"
}
}
}
ES 之 分词
#### ik 分词器
# ik_max_word 最细力度
# ik_smart 最粗力度
POST _analyze
{
"analyzer": "ik_max_word",
"text": "To be or not to be, ---莎士比亚"
}
POST _analyze
{
"analyzer": "ik_smart",
"text": "上海自来水来自海上"
}
POST _analyze
{
"analyzer": "ik_max_word",
"text": "上海自来水来自海上"
}
Python操作 ES
# 模块安装
pip3 install elasticsearch
# 实例化 es 对象
from elasticsearch import Elasticsearch
es = Elasticsearch()
"""
### es 集群
Elasticsearch(
["192.168.0.1","192.168.0.2","192.168.0.3"], # 集群列表
sniff_on_start=True, # 连接前测试
sniff_on_connection_fail=True, # 节点无响应时,刷新节点
sniffer_timeout=60, # s设置超时时间
ignore=400 # 设置忽略状态码
)
"""
基础操作
### 1. search 查询
# filter_pat参数 结果过滤
# hits.hits 是 第二层结果
# 背后技术 jsonpath
body = {
"query":{
"match":{
"age":19
}
}
}
print(
es.search(index='dengshuai',body=body)
)
print(
es.search(index='dengshuai',body=body,filter_path=['hits.hits','hits.total'])
)
print(
es.search(index='dengshuai',body=body,filter_path=['hits.*'])
)
ES 对象操作
# get 查不到会报错 , (不常用)
print(es.get(index='dengshuai',doc_type='doc',id=2))
# get 报错
# print(es.get(index='dengshuai',doc_type='doc',id=3))
# index 创建索引, 索引存在就更新,不存在就创建
print(es.index(index='b2',doc_type='doc',id=1,body={
"name":"zhangsan",
"desc":"hh"
}))
print(es.index(index='b2',doc_type='doc',id=2,body={
"name":"lisi",
"desc":"jj"
}))
# get_source 直接返回成字典数据
print(es.get_source(index='b2',doc_type='doc',id=1))
# count() 统计数量
print(es.count(index='b2',doc_type='doc'))
# create() 创建索引 , 只创建 ,不更新
print(es.create(index='b3',doc_type='doc',id=1,body={
'name':"dsads",
"desc":"dddd"
}))
# delete() 删除
print(es.delete(index='b3',doc_type='doc',id=1))
# delete_by_query() 删除符合条件的
# 备注: body 必须符合 es 语法
body = {
"query":{
"match":{
"name": "zhangsan",
}
}
}
print(es.delete_by_query(index='b2', body=body))
# exists() 是否存在
print(es.exists(index='b2',doc_type='doc',id=2))
# info 返回集群的信息
print(es.info())
# ping() 是否能否ping通集群
print(es.ping())
ES Indices 索引操作
######### Indices 索引操作
# indices.get_mapping() 返回mapping 映射信息
print(es.indices.get_mapping(index="b2"))
# indices.get_settings() 返回 settings 配置信息
print(es.indices.get_settings(index='b2'))
# get 获取 mapping 和setting 信息
print(es.indices.get(index="b2"))
# exists 是否存在 索引库
print(es.indices.exists(index='b4'))
# create 创建索引库, 创建mapping信息,setting信息. ******* 必须会
body={}
print(es.indices.create(index='b4',body=body))
print(es.indices.create(index='b5',body={
"mappings":{
"doc":{
"properties":{
"name":{
"type":"text",
}
}
}
}
}))
# delete() 删除
print(es.indices.delete(index='b5'))
# close () 关闭索引库, 深夜 维护
print(es.create(index='b6',doc_type='doc',id=1,body={
"name":"123",
}))
print(es.get(index='b6',doc_type="doc",id=1))
print(es.indices.close(index='b6'))
# open() 开启索引
print(es.indices.open("b6"))
time.sleep(1)
print(es.get(index='b6',doc_type="doc",id=1))
# analyze() 分析
print(es.indices.analyze(body={
"analyzer":"ik_smart",
"text":"上海自来水来自海上"
}))
# # cat.health 查看 集群是否是健康的
print(es.cat.health(format="json"))
print(es.cat.health())