• 信息检索————NDCG计算


    先贴代码,原理有时间补上。

     1 import numpy as np
     2 import copy
     3 from math import log
     4 
     5 # input data
     6 ideal = np.array([5,5,4,4,3,3,2,2,1,1])
     7 relevant_score_list = np.array([5.0,5.0,4.0,4.0,3.0,3.0,2.0,2.0,1.0,1.0])
     8 recall_list = np.array([8,11,2,12,3,1,5,4,13,7])
     9 
    10 def dcg(rscore, m):
    11     return (2.0 ** rscore - 1.0) / np.log2(2.0 + m)
    12 
    13 def dcg_k(rslist,rlist,k):
    14     # 计算第k个dcg值
    15     dcgscore_list = [0.0 for i in range(11)]##计算1,2,...,k的dcg值
    16     for i in range(k):
    17         relevant_score = 0
    18         if rlist[i] < len(rslist):
    19             relevant_score = rslist[rlist[i]-1]
    20         dcgscore_list[i+1] = dcgscore_list[i] + dcg(relevant_score, i)
    21     return dcgscore_list;
    22 
    23 def idcg_k(ideal,k):
    24     # idcg是一个系统最理想情况下的返回结果排序,也就是一个案例中所给的最佳排序
    25     idcgscore_list = [0.0 for i in range(11)] #计算1,2,...,k的idcg值
    26     for i in range(k):
    27         idcgscore_list[i+1] = idcgscore_list[i] + dcg(ideal[i], i)
    28     return idcgscore_list;
    29 
    30 def ndcg(rslist,rlist,k):
    31     dcgscore_list = dcg_k(rslist,rlist,k)
    32     # 计算归一化因子z, 最完美情况应该是recall结果按照相关性分数降序排列
    33     idcgscore_list = idcg_k(ideal,k)
    34 
    35     ndcg_list = [0.0 for i in range(k)]
    36     for i in range(k):
    37         ndcg_list[i] = round(dcgscore_list[i+1] / idcgscore_list[i+1], 3)
    38     print(ndcg_list)
    39     
    40 
    41 def ndcg(ideal, rlist, k):
    42     recall = [0.0 for i in range(k)]
    43     for i in range(k):
    44         if rlist[i] <= len(ideal):
    45             recall[i] = ideal[rlist[i]-1]
    46     
    47     dcg, dcg_max, ndcg = 0.0, 0.0, 0.0
    48     for i, (true, predict) in enumerate(zip(ideal, recall)):
    49         dcg += (2 ** predict - 1) / log(2 + i)
    50         dcg_max += (2 ** true -1) / log(2 + i)
    51         ndcg = dcg / dcg_max
    52     return ndcg
    53     
    54 # print(ndcg(relevant_score_list,recall_list,10))
    55 # ndcg(relevant_score_list,recall_list,10)
    56 ndcg(ideal, recall_list, 10)

    代码中两种方式实现了ndcg的计算,调用的时候需要注意一下。

  • 相关阅读:
    python_Memcached
    python_day10_IO多路复用
    java_list,set,map集合
    python3.0_day9_scoket基础之篇
    redis.conf配置文件详解
    Java_数组
    面向接口编程初识(转)
    SSH三种框架及表示层、业务层和持久层的理解(转)
    解决win10磁盘占用过大的问题(亲测有效)
    ORA-12541:TNS:无监听程序
  • 原文地址:https://www.cnblogs.com/sgatbl/p/12768187.html
Copyright © 2020-2023  润新知