• 简单的字符串相似度计算


    计算Levenshtein 距离,再和较长字符串求比率

    代码
    /// <summary>
    /// Levenshtein Distance
    /// </summary>
    static class StringExt
    {
    /// <summary>
    /// 计算两个字符串的差异距离
    /// </summary>
    /// <param name="source">来源字符串</param>
    /// <param name="target">目标字符串</param>
    /// <returns>字符串差距</returns>
    public static int CalcDistance(this string source, string target)
    {
    int n = source.Length;
    int m = target.Length;
    if (m == 0) return n;
    if (n == 0) return m;
    var matrix
    = new int[n + 1, m + 1];
    for (int i = 1; i <= n; i++)
    {
    matrix[i,
    0] = i;
    }
    for (int i = 1; i <= m; i++)
    {
    matrix[
    0, i] = i;
    }

    for (int i = 1; i <= n; i++)
    {
    var si
    = source[i - 1];
    for (int j = 1; j <= m; j++)
    {
    var tj
    = target[j - 1];

    int cost;
    if (si == tj)
    cost
    = 0;
    else
    cost
    = 1;

    int above = matrix[i - 1, j] + 1;
    int left = matrix[i, j - 1] + 1;
    int diag = matrix[i - 1, j - 1] + cost;
    matrix[i, j]
    = Math.Min(above, Math.Min(left, diag));
    }
    }
    return matrix[n, m];
    }

    /// <summary>
    /// 计算两个字符串的相似度
    /// </summary>
    /// <param name="source">来源字符串</param>
    /// <param name="target">目标字符串</param>
    /// <returns>相似度</returns>
    public static double CalcSimilarity(this string source, string target)
    {
    int n = source.Length;
    int m = target.Length;
    if (n == 0 || m == 0)
    return 0;
    int distance = source.CalcDistance(target);
    int max = Math.Max(n, m);
    return 1.0 * (max - distance) / max;
    }
    }

  • 相关阅读:
    解决谷歌高版本没有设置字符编码的选项的问题
    System.nanoTime与System.currentTimeMillis的区别
    jQuery学习笔记(一)
    linux问题
    阿里资源学习
    PHP-Gealman
    php-fpm
    GIT使用
    设置导出的excel数据
    CI框架, 参数验证
  • 原文地址:https://www.cnblogs.com/heros/p/1910562.html
Copyright © 2020-2023  润新知