• LOF异常检测算法实现


    #LOF异常检测算法主要用异常点的检测输出
    from scipy.spatial.distance import cdist
    import numpy as np
    class LOF:
    def __init__(self, data, k, epsilon=1.0):
    self.data = data
    self.k = k
    self.epsilon = epsilon
    self.N = self.data.shape[0]

    def get_dist(self):
    # 计算欧式距离矩阵
    return cdist(self.data, self.data)

    def _kdist(self, arr):
    # 计算k距离
    inds_sort = np.argsort(arr)
    neighbor_ind = inds_sort[1:self.k + 1] # 邻域内点索引
    return neighbor_ind, arr[neighbor_ind[-1]]

    def get_rdist(self):
    # 计算可达距离
    dist = self.get_dist()
    nei_kdist = np.apply_along_axis(self._kdist, 1, dist)
    nei_inds, kdist = zip(*nei_kdist)
    for i, k in enumerate(kdist):
    ind = np.where(dist[i] < k) # 实际距离小于k距离,则可达距离为k距离
    dist[i][ind] = k
    return nei_inds, dist

    def get_lrd(self, nei_inds, rdist):
    # 计算局部可达密度
    lrd = np.zeros(self.N)
    for i, inds in enumerate(nei_inds):
    s = 0
    for j in inds:
    s += rdist[j, i]
    lrd[i] = self.k / s
    return lrd

    def run(self):
    # 计算局部离群因子
    nei_inds, rdist = self.get_rdist()
    lrd = self.get_lrd(nei_inds, rdist)
    score = np.zeros(self.N)
    for i, inds in enumerate(nei_inds):
    N = len(inds)
    lrd_nei = sum(lrd[inds])
    score[i] = lrd_nei / self.k / lrd[i]

    return score, np.where(score > self.epsilon)[0]

    if __name__ == '__main__':

    np.random.seed(42)
    import pandas as pd
    data=pd.read_excel("finaldata.xlsx")
    data=data.loc[:,["p1","p2","p3"]]
    print(data)
    x = np.random.normal(2, 1, size=(10000, 2))
    print(x)
    y = np.random.normal(5, 1, size=(20, 2))
    print(y)
    z = np.vstack((x, y))
    print(z)
    data=z
    data=np.array(data)
    lof = LOF(data,5, epsilon=3)
    score, out_ind = lof.run()
    outliers = data[out_ind]

    import matplotlib.pyplot as plt
    plt.scatter(data[:, 0], data[:, 1], color='b')
    plt.scatter(outliers[:, 0], outliers[:, 1], color='r')
    plt.show()
    print(data)
    print(outliers)
    print(out_ind)

  • 相关阅读:
    15_门面模式
    14_责任链模式
    13_观察者模式
    12_状态模式
    11_策略模式
    10_命令模式
    09_适配器模式
    08_装饰者模式
    07_代理模式
    linux邮件服务器postfix配置实例
  • 原文地址:https://www.cnblogs.com/Yanjy-OnlyOne/p/13367505.html
Copyright © 2020-2023  润新知