• ES使用C# NEST创建索引支持nGram+MatchPhrase查询


    ES写法: 

    POST index_test
    {
      "settings": {
        "index.max_ngram_diff":5,
        "analysis": {
          "analyzer": {
            "ngram_analyzer_short": {
              "filter": "lowercase",
              "tokenizer": "ngram_tokenizer_short"
            },
            "ngram_analyzer_long": {
              "filter": "lowercase",
              "tokenizer": "ngram_tokenizer_long"
            }
          },
          "tokenizer": {
            "ngram_tokenizer_short": {
              "type": "nGram",
              "min_gram": "1",
              "max_gram": "4"
            },
            "ngram_tokenizer_long": {
              "type": "nGram",
              "min_gram": "5",
              "max_gram": "5"
            }
          }
        }
      },
      "mappings": {
        "properties": {
            "vcContent": {
              "type": "keyword",
              "fields": {
                "long_text": {
                  "type": "text",
                  "analyzer": "ngram_analyzer_long"
                },
                "short_text": {
                  "type": "text",
                  "analyzer": "ngram_analyzer_short"
                }
              }
            }
          }
      }
    }

    C#写法:

    1、创建索引

    public static async Task CreateIndexAsync<T>(this ElasticClient elasticClient, string indexName = "", int numberOfShards = 5, int numberOfReplicas = 1, int refreshInterval = 5) where T : class
            {
                if (string.IsNullOrWhiteSpace(indexName)) throw new ArgumentException("索引名称不可为空");
    
                if (!(await elasticClient.Indices.ExistsAsync(indexName)).Exists)
                {
                    var dict = new Dictionary<string, object>();
                    dict.Add("index.number_of_shards", numberOfShards);
                    dict.Add("index.number_of_replicas", numberOfReplicas);
                    dict.Add("index.refresh_interval", refreshInterval + "s");
                    dict.Add("index.max_result_window", 2000000000);
                    dict.Add("index.max_ngram_diff", 5);
                    var indsettings = new IndexSettings(dict);
                    indsettings.Analysis = new Analysis();
                    indsettings.Analysis.Analyzers = new Analyzers();
                    indsettings.Analysis.Tokenizers = new Tokenizers();
                    //短内容分析设置5个字符以内
                    var an1 = new CustomAnalyzer();
                    an1.Tokenizer = "ngram_tokenizer_short";
                    an1.Filter = new List<string>() { "lowercase" };
                    indsettings.Analysis.Analyzers.Add("ngram_analyzer_short", an1);
                    indsettings.Analysis.Tokenizers.Add("ngram_tokenizer_short", new Nest.NGramTokenizer { MaxGram = 4, MinGram = 1 });
                    //长内容分析设置5个字符以上
                    var an2 = new CustomAnalyzer();
                    an2.Tokenizer = "ngram_tokenizer_long";
                    an2.Filter = new List<string>() { "lowercase" };
                    indsettings.Analysis.Analyzers.Add("ngram_analyzer_long", an2);
                    indsettings.Analysis.Tokenizers.Add("ngram_tokenizer_long", new Nest.NGramTokenizer { MaxGram = 5, MinGram = 5 });
                    var indexState = new IndexState { Settings = indsettings };
                    var response = await elasticClient.Indices.CreateAsync(indexName, p => p.InitializeUsing(indexState)
                    .Map<T>(x => x.AutoMap<T>()
                    //属性配置ngram搜索
                    .Properties<T>(pp => pp.Keyword(t => t.Name("name").Fields(f =>
                             f.Text(t1 => t1.Name("long_text").Analyzer("ngram_analyzer_long"))
                             .Text(t2 => t2.Name("short_text").Analyzer("ngram_analyzer_short"))
                           )))));
                    if (!response.IsValid)
                    {
                        throw new Exception($"创建索引失败:{response.OriginalException.Message}");
                    }
                }

    2、搜索条件按字符长度指定搜索方式

    //查询数据
                var mustFilters = new List<Func<QueryContainerDescriptor<TempList>, QueryContainer>>();
                if (!string.IsNullOrEmpty(name))
                {
                    //条件查询
                    if (name.Length >= 5)
                    {
                        //长字符
                        mustFilters.Add(t => t.MatchPhrase(t => t.Field("name.long_text").Query(name)));
                    }
                    else
                    {
                        //短字符
                        mustFilters.Add(t => t.Term(t => t.Field("name.short_text").Value(name)));
                    }
                }//列表
                var idxName = "index_test";
                var result = await _esClientService.Client.SearchAsync<TempList>(q =>
                 q.Index(idxName)
                .Query(rq => rq.Bool(b => b.Must(mustFilters)))
                .From(0).Size(10000));
                return (List<TempList>)result.Documents;

    3、实体映射

    public class TempList
        {public int id { get; set; }
    public string name { get; set; }
    public string description { get; set; }
    public int type { get; set; }
        }

    参考:https://www.elastic.co/guide/en/elasticsearch/client/net-api/7.x/multi-fields.html

    NEST Nuget库版本:7.x.x

  • 相关阅读:
    JQuery实现1024小游戏
    Windows Server2008 R2安装wampserver缺少api-ms-win-crt-runtime-l1-1-0.dll解决方案
    ASP.NET MVC 邮件发送的功能(微软邮箱发送)。
    浅谈撞库防御策略
    极验高并发验证服务背后的技术实现
    2015年国内数据安全事件盘点
    转载——验证码的昨天、今天和明天
    转载——最近百度云盘不提供搜索,闲来无事,玩玩python爬虫,爬一下百度云盘的资源
    SQL 查询语句
    SQL Server 目录
  • 原文地址:https://www.cnblogs.com/yangyuping/p/16142161.html
Copyright © 2020-2023  润新知