• Levenshtein Distance + LCS 算法计算两个字符串的相似度


    //LD最短编辑路径算法
    public static int LevenshteinDistance(string source, string target) 
    {
        int cell = source.Length;
        int row = target.Length;
        if (cell == 0) 
        {
            return row;
        }
        if (row == 0) 
        {
            return cell;
        }
        int[, ] matrix = new int[row + 1, cell + 1];
        for (var i = 0; i <= cell; i++) 
        {
            matrix[0, i] = i;
        }
        for (var j = 1; j <= row; j++) 
        {
            matrix[j, 0] = j;
        }
        var tmp = 0;
        for (var k = 0; k < row; k++) 
        {
            for (var l = 0; l < cell; l++) 
            {
                if (source[l].Equals(target[k])) 
                    tmp = 0;
                else 
                    tmp = 1;
                matrix[k + 1, l + 1] = Math.Min(Math.Min(matrix[k, l] + tmp, matrix[k + 1, l] + 1), matrix[k, l + 1] + 1);
            }
        }
        return matrix[row, cell];
    }
    
    
    //LCS最大公共序列算法
    public static int LongestCommonSubsequence(string source, string target) 
    {
        if (source.Length == 0 || target.Length == 0) 
            return 0;
        int len = Math.Max(target.Length, source.Length);
        int[, ] subsequence = new int[len + 1, len + 1];
        for (int i = 0; i < source.Length; i++) 
        {
            for (int j = 0; j < target.Length; j++) 
            {
                if (source[i].Equals(target[j])) 
                    subsequence[i + 1, j + 1] = subsequence[i, j] + 1;
                else 
                    subsequence[i + 1, j + 1] = 0;
            }
        }
        int maxSubquenceLenght = (from sq in subsequence.Cast < int > () select sq).Max < int > ();
        return maxSubquenceLenght;
    }
    
    //计算两个字符串相似度 数值越大越相似
    public static float StringSimilarity(string source, string target) 
    {
        var ld = LevenshteinDistance(source, target);
        var lcs = LongestCommonSubsequence(source, target);
        return ((float)lcs)/(ld+lcs);;
    }
    /// <summary>
    /// 获取两个字符串的相似度(适合中文)
    /// </summary>
    /// <param name=”sourceString”>第一个字符串</param>
    /// <param name=”str”>第二个字符串</param>
    /// <returns></returns>
    public static double SimilarityWith(string sourceString, string str)
    {
    	char[] ss = sourceString.ToCharArray();
    	char[] st = str.ToCharArray();
    	int t = 0; //命中
    	int k = 0; //非命中
    	foreach (var item in st)
    	{
    		if (ss.Contains(item))
    		{
    			t++;
    		}
    		else
    		{
    			k++;
    		}
    	}
    	return (double)t / ((double)k + (double)t);
    }


  • 相关阅读:
    Qt下如何修改文件的时间(全平台修改)
    Qt在windows 平台操作保存execel的表格(通过QAxObject来操作)
    VirtualTreeView控件
    VS2013设置release版本可调试
    工程脚本插件方案
    decode函数
    一个消息调度框架构建
    数据访问模式之Repository模式
    Angular.js Services
    OpenCascade简介
  • 原文地址:https://www.cnblogs.com/smartsmile/p/6234065.html
Copyright © 2020-2023  润新知