• 高效比对,返回最短编辑距离算法匹配度最高的数据


            #region 高效比对返回匹配度最高的数据
            /// <summary>
            /// 高效比对返回匹配度最高的数据
            /// </summary>
            /// <param name="sourceList">源数据</param>
            /// <param name="targetList">目标数据</param>
            /// <returns></returns>
            public static List<MapToData> GetAutoMapData(List<MapToData> sourceList, List<MapToData> targetList)
            {
                #region 高效计算匹配
                List<MapToData> resultList = new List<MapToData>();
                Parallel.For(0, sourceList.Count, i =>
                {
                    var sourceValue = sourceList[i].key;
                    foreach (var item in targetList)
                    {
                        var targetValue = item.key;
                        var jsonObject = item.value;
                        int matchNum = LevenshteinDistance(sourceValue, targetValue);
                        resultList.Add(new MapToData { key = sourceValue, value = jsonObject, match = matchNum });
                    }
                });
    
                var q = from p in resultList
                        where p != null
                        orderby p.match descending
                        group new { p.key, p.value, p.match } by p.key into g
                        select new MapToData
                        {
                            key = g.FirstOrDefault().key,
                            value = g.FirstOrDefault().value,
                            match = g.FirstOrDefault().match
                        };
                return q.ToList();
                #endregion
            }
            #endregion
    
            #region LD最短编辑距离算法
    
            /// <summary>
            /// LD最短编辑距离算法
            /// </summary>
            /// <param name="source">源字符串</param>
            /// <param name="target">目标字符串</param>
            /// <returns></returns>
            public static int LevenshteinDistance(string source, string target)
            {
                int cell = source.Length;
                int row = target.Length;
                if (cell == 0)
                {
                    return row;
                }
                if (row == 0)
                {
                    return cell;
                }
                int[,] matrix = new int[row + 1, cell + 1];
                for (var i = 0; i <= cell; i++)
                {
                    matrix[0, i] = i;
                }
                for (var j = 1; j <= row; j++)
                {
                    matrix[j, 0] = j;
                }
                var tmp = 0;
                for (var k = 0; k < row; k++)
                {
                    for (var l = 0; l < cell; l++)
                    {
                        if (source[l].Equals(target[k]))
                            tmp = 0;
                        else
                            tmp = 1;
                        matrix[k + 1, l + 1] = Math.Min(Math.Min(matrix[k, l] + tmp, matrix[k + 1, l] + 1), matrix[k, l + 1] + 1);
                    }
                }
                return matrix[row, cell];
            }
            #endregion

        public class MapToData
        {
            /// <summary>
            /// 要匹配的字符串
            /// </summary>
            public string key = "";
    
            /// <summary>
            /// 匹配的结果
            /// </summary>
            public object value = new object();
            /// <summary>
            /// 匹配度
            /// </summary>
            public int match = 0;
        }
  • 相关阅读:
    常用的PHP图形处理函数
    PHP常用文件操作函数
    PHP常用正则表达式函数浅析
    PHP类常量的常见访问方法
    使用PDO操作MySQL
    js数组的遍历方法,维持索引?splice与forEach && 孤儿对象形成,造成内存泄漏,置空等待垃圾回收
    [DOM] Input elements should have autocomplete attributes (suggested: "new-password"): (More info: https://goo.gl/9p2vKq)
    $(...).get(...).addClass is not a function
    使用淘宝镜像的命令
    对象、数组与JSON字符串之间的转换
  • 原文地址:https://www.cnblogs.com/smartsmile/p/6234064.html
Copyright © 2020-2023  润新知