• 余弦相似度 算法


    import java.util.ArrayList;
    import java.util.List;

    public class ComputerDecision {
    //List<?> vector1 ;
    //List<?> vector2 ;
    List<Integer> vector1 = new ArrayList<Integer>();
    List<Integer> vector2 = new ArrayList<Integer>();
    static List<Double> vector3 = new ArrayList<Double>();
    static List<Double> vector4 = new ArrayList<Double>();

    public ComputerDecision(String string1, String string2) {
    //把输入字符串中多个空格变为一个
    String[] vector1String = string1.trim().replaceAll("\s+", " ").split(" ");
    String[] vector2String = string2.trim().replaceAll("\s+", " ").split(" ");

    for (String string : vector1String) {
    vector1.add(Integer.parseInt(string));
    }
    for (String string : vector2String) {
    vector2.add(Integer.parseInt(string));
    }
    }

    public static void ComputerDecisiond1(String string1, String string2) {
    //把输入字符串中多个空格变为一个
    String[] vector1String = string1.trim().replaceAll("\s+", " ").split(" ");
    String[] vector2String = string2.trim().replaceAll("\s+", " ").split(" ");

    for (String string : vector1String) {
    vector3.add(Double.valueOf(string));
    }
    for (String string : vector2String) {
    vector4.add(Double.valueOf(string));
    }
    }

    // 求余弦相似度
    public double sim() {
    double result = 0;
    result = pointMulti(vector1, vector2) / sqrtMulti(vector1, vector2);

    return result;
    }
    public static double simd() {
    double result = 0;
    result = pointMultid(vector3, vector4) / sqrtMultid(vector3, vector4);

    return result;
    }

    private double sqrtMulti(List<Integer> vector1, List<Integer> vector2) {
    double result = 0;
    result = squares(vector1) * squares(vector2);
    result = Math.sqrt(result);
    return result;
    }

    private static double sqrtMultid(List<Double> vector1, List<Double> vector2) {
    double result = 0;
    result = squaresd(vector1) * squaresd(vector2);
    result = Math.sqrt(result);
    return result;
    }

    // 求平方和
    private double squares(List<Integer> vector) {
    double result = 0;
    for (Integer integer : vector) {
    result += integer * integer;
    }
    return result;
    }
    private static double squaresd(List<Double> vector) {
    double result = 0;
    for (Double integerd : vector) {
    result += integerd * integerd;
    }
    return result;
    }

    // 点乘法
    private double pointMulti(List<Integer> vector1, List<Integer> vector2) {
    double result = 0;
    for (int i = 0; i < vector1.size(); i++) {
    result += vector1.get(i) * vector2.get(i);
    }
    return result;
    }
    private static double pointMultid(List<Double> vector1, List<Double> vector2) {
    double result = 0;
    for (int i = 0; i < vector1.size(); i++) {
    result += vector1.get(i) * vector2.get(i);
    }
    return result;
    }
    public void ComputerDecisiond(String string1, String string2) {
    //把输入字符串中多个空格变为一个
    String[] vector1String = string1.trim().replaceAll("\s+", " ").split(" ");
    String[] vector2String = string2.trim().replaceAll("\s+", " ").split(" ");

    for (String string : vector1String) {
    vector3.add( Double.valueOf(string.toString()));
    }
    for (String string : vector2String) {
    vector4.add(Double.valueOf(string.toString()));
    }
    }
    public static void main(String[] args) {

    String string = "1 0 1 1 1 1 1 1 0 0 1 0 1 0 0 0 1 1 1 0 0 0 1";
    String string2 ="1 0 1 1 1 1 1 1 0 1 0 0 0 1 0 0 1 1 1 0 0 0 1";
    //String string3 = "5 7 9";
    //String string4 = "5 9 9";

    String string3 = "2.5 5.5 9.1 12.1 ";
    String string4 = "1.5 5.4 9.0 12.2 ";
    //ComputerDecision computerDecition = new ComputerDecisiond(string3,
    // string4);
    //System.out.println(computerDecition.sim());
    ComputerDecisiond1(string3,
    string4);
    System.out.println(simd());

    }
    }

    ==========================================================

    字符串

    import java.util.HashMap;
    import java.util.Iterator;
    import java.util.Map;

    public class Cossimilar
    {
    /*
    * 计算两个字符串(英文字符)的相似度,简单的余弦计算,未添权重
    */
    public static double getSimilarDegree(String str1, String str2)
    {
    //创建向量空间模型,使用map实现,主键为词项,值为长度为2的数组,存放着对应词项在字符串中的出现次数
    //Map<String, int[]> vectorSpace = new LinkedHashMap<String, int[]>();
    Map<String, int[]> vectorSpace = new HashMap<String, int[]>();
    int[] itemCountArray = null;//为了避免频繁产生局部变量,所以将itemCountArray声明在此

    //以空格为分隔符,分解字符串
    String strArray[] = str1.split(" ");
    for(int i=0; i<strArray.length; ++i)
    {
    if(vectorSpace.containsKey(strArray[i]))
    ++(vectorSpace.get(strArray[i])[0]);
    else
    {
    itemCountArray = new int[2];
    itemCountArray[0] = 1;
    itemCountArray[1] = 0;
    vectorSpace.put(strArray[i], itemCountArray);
    }
    }

    strArray = str2.split(" ");
    for(int i=0; i<strArray.length; ++i)
    {
    if(vectorSpace.containsKey(strArray[i]))
    ++(vectorSpace.get(strArray[i])[1]);
    else
    {
    itemCountArray = new int[2];
    itemCountArray[0] = 0;
    itemCountArray[1] = 1;
    vectorSpace.put(strArray[i], itemCountArray);
    }
    }

    //计算相似度
    double vector1Modulo = 0.00;//向量1的模
    double vector2Modulo = 0.00;//向量2的模
    double vectorProduct = 0.00; //向量积
    Iterator iter = vectorSpace.entrySet().iterator();

    while(iter.hasNext())
    {
    Map.Entry entry = (Map.Entry)iter.next();
    itemCountArray = (int[])entry.getValue();

    vector1Modulo += itemCountArray[0]*itemCountArray[0];
    vector2Modulo += itemCountArray[1]*itemCountArray[1];

    vectorProduct += itemCountArray[0]*itemCountArray[1];
    }

    vector1Modulo = Math.sqrt(vector1Modulo);
    vector2Modulo = Math.sqrt(vector2Modulo);

    //返回相似度
    return (vectorProduct/(vector1Modulo*vector2Modulo));
    }

    /*
    *
    */
    public static void mainxx(String args[])
    {
    String str1 = "gold silver truck";
    String str2 = "Shipment of gold damaged in a fire";
    String str3 = "Delivery of silver arrived in a silver truck";
    String str4 = "Shipment of gold arrived in a truck";
    String str5 = "gold gold gold gold gold gold";
    String str6 = "gold gold gold gold gold dd";

    System.out.println(Cossimilar.getSimilarDegree(str1, str2));
    System.out.println(Cossimilar.getSimilarDegree(str1, str3));
    System.out.println(Cossimilar.getSimilarDegree(str1, str4));
    System.out.println(Cossimilar.getSimilarDegree(str1, str5));
    System.out.println(Cossimilar.getSimilarDegree(str5, str6));
    System.out.println(Cossimilar.getSimilarDegree(str6, str6));
    }
    }

  • 相关阅读:
    Linux下vi和vim模式相互切换
    Linux文件系统目录
    华硕笔记本(UEFI)支持U盘启动
    什么是RSS feed?订阅RSS有什么好处?
    JSTL的时间格式化标签的使用
    token session区别 以及JWT无状态token
    c++ 重载成员函数的地址如何获取
    c++ __VA_ARGS__
    协程任务调度器
    yield return 1
  • 原文地址:https://www.cnblogs.com/donaldlee2008/p/5232285.html
Copyright © 2020-2023  润新知