• python k-means 一堆乱七八糟的程序


    python k-means

    F:PythonProjectK-Means

    import pandas as pd
    import numpy as np
    from sklearn.preprocessing import StandardScaler
    from sklearn.model_selection import train_test_split
    import matplotlib.pyplot as plt
    
    
    from sklearn.cluster import KMeans
    
    threshold_value = 0.85
    
    
    
    
    def main():
        # load data
        df_wine = pd.read_csv('d_1.txt', header=None)  # 本地加载
        df_wine2 = pd.read_csv('f_1.txt', header=None)  # 本地加载
        
        # split the data,train:test=7:3
        #x, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values
    
        #print(df_wine.iloc[:, 2:].values)
        #print(df_wine.iloc[:, 1:2].values)
        #print(df_wine.iloc[:, 0:1].values)
    
        #x,y,z = df_wine.iloc[:, 2:].values, df_wine.iloc[:, 1:2].values, df_wine.iloc[:, 0:1].values
        x=df_wine.iloc[:, 2:].values
        y=df_wine.iloc[:, 1].values
        z_frame=df_wine.iloc[:, 0:2].values
    
        z_frame_f = df_wine2.iloc[:, 0:2].values
        label_name_f = df_wine2.iloc[:, 2].values
        
    
        
        list_len = 20
        x=x[0:list_len]
        y=y[0:list_len]
        z_frame=z_frame[0:list_len]
    
        #z_frame_f=z_frame_f[0:list_len]
        #label_name_f=label_name_f[0:list_len]
        
    
        
        #print(z_frame)
        #print("-------------------------------------------")
        #print(z_frame_f)
    
        #print("{0}    {1}".format(x,y))
        print("{0}    {1}".format(len(x),len(y)))
        #print(x)
        
        #x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, stratify=y, random_state=0)
        x_train = x[0:len(x)-5]
        y_train = y[0:len(y)-5]
    
        x_test = x[0:5]
        y_test = y[0:5]
    
    
        print(len(x_train))
        print(x_train)
        print("----------------------------------------")
        #print(y_train)
    
        # standardize the feature 标准化单位方差
        sc = StandardScaler()
        x_train_std = sc.fit_transform(x_train)
        x_test_std = sc.fit_transform(x_test)
        #print(x_train_std)
        print(len(x_train_std))
    
        # 构造协方差矩阵,得到特征向量和特征值
        cov_matrix = np.cov(x_train_std.T)
        eigen_val, eigen_vec = np.linalg.eig(cov_matrix)
        # print("values
     ", eigen_val, "
    vector
     ", eigen_vec)
    
        print(len(eigen_val))
        print(len(eigen_vec))
    
        # 解释方差比
        tot = sum(eigen_val)  # 总特征值和
        var_exp = [(i / tot) for i in sorted(eigen_val, reverse=True)]  # 计算解释方差比,降序
        #print(var_exp)
    
        #[0.3516026271036254, 0.2154102386841404, 0.09449164581680554, 
        #0.0919054990988971, 0.08265939106635344, 0.055431032435754, 
        #0.04012443059852082, 0.028756191609729642, 0.017827639508716207, 
        #0.011781879332959133, 0.008141811912227535, 0.0018676128322704462]
        
    
        
        cum_var_exp = np.cumsum(var_exp)  # 累加方差比率
    
        print(cum_var_exp)
        #[0.35789126 0.56364606 0.66236146 0.7537545  0.83350328 0.88822259 
        #0.93227841 0.96230417 0.9793677  0.99038737 0.9981856  1.        ]
    
        index_x0 = -1
        for i in range(len(cum_var_exp)):
            index_value = cum_var_exp[i]
            if index_value >threshold_value:
                index_x0 = i
                break
    
        
        
        
        
        print("PCA:",index_x0)
        # 特征变换
        eigen_pairs = [(np.abs(eigen_val[i]), eigen_vec[:, i]) for i in range(len(eigen_val))]
        eigen_pairs.sort(key=lambda k: k[0], reverse=True)  # (特征值,特征向量)降序排列
        
        eigen_pairs2 = np.array(eigen_pairs)
        print(type(eigen_pairs))
        print(type(eigen_pairs2))
        print(len(eigen_pairs))
        #print(eigen_pairs)
        print("====================================")
        #print(eigen_pairs[0][1])
        #print(eigen_pairs[1][1][0:4])
    
        
        output_matrix = x
         
        X =  np.array(output_matrix)
        print("---------m----------------")
        #print(eigen_pairs2[:,:2])
        
        w = np.hstack((eigen_pairs[0][1][:, np.newaxis], eigen_pairs[1][1][:, np.newaxis]))  # 降维投影矩阵W
        #print("-------------------------")
        #print(w)
        x_train_pca = x_train_std.dot(w)
        print("-------------------------")
        #print(x_train_pca)
        color = ['r', 'g', 'b']
        marker = ['s', 'x', 'o']
        for i, c, m in zip(np.unique(y_train), color, marker):
            #print("{0}   {1}".format(x_train_pca[y_train == i, 0],x_train_pca[y_train == i, 1]))
            #print("{0}     {1}    {2}".format(len(x_train_pca[y_train == i, 0]),len(x_train_pca[y_train == i, 1]),len(y)))
            
            plt.scatter(x_train_pca[y_train == i, 0],x_train_pca[y_train == i, 1],c=c, label=i, marker=m)
            
        plt.title('Result')
        plt.xlabel('PC1')
        plt.ylabel('PC2')
        plt.legend(loc='lower left')
        plt.show()
    
    
    
    
        #print("============================")
        estimator = KMeans(n_clusters=3)#构造聚类器
        #print(estimator.labels_)
        estimator.fit(X)#聚类
        label_pred = estimator.labels_ #获取聚类标签
        center_p = estimator.cluster_centers_  #聚类中心
        #print(estimator.labels_)
        #print(y_train)
        print("============聚类中心================")
        print(center_p)
        print("============================")
        print(label_pred)
        #print(X)
        
        
        #绘制k-means结果
        '''
        x0 = X[label_pred == 0]
        x1 = X[label_pred == 1]
        x2 = X[label_pred == 2]
        '''
    
        x0=[]
        x1=[]
        x2=[]
    
        y0=[]
        y1=[]
        y2=[]
    
        '''
        for i in range(len(label_pred)):
            if label_pred[i] == 0:
                x0.append(X[i])
                y0.append(y_train[i])
            elif label_pred[i] == 1:
                x1.append(X[i])
                y1.append(y_train[i])
            elif label_pred[i] == 2:
                x2.append(X[i])
                y2.append(y_train[i])
        '''
    
        for i in range(len(label_pred)):
            if label_pred[i] == 0:
                x0.append(X[i])
                index_z = z_frame[i]
                index_z_1 = index_z[0]
                index_z_2 = index_z[1]
                for m in range(len(z_frame_f)):
                    index_z_f = z_frame_f[m]
                    index_z_f_1 = index_z_f[0]
                    index_z_f_2 = index_z_f[1]
                    if index_z_f_1==index_z_1 and index_z_2==index_z_f_2:
                        index_name1 = label_name_f[m]
                        print("1   {0}  {1}  {2}".format(index_z_f_1,index_z_2,index_name1))
                        y0.append(index_name1)
                
            elif label_pred[i] == 1:
                x1.append(X[i])
                index_z = z_frame[i]
                index_z_1 = index_z[0]
                index_z_2 = index_z[1]
                for m in range(len(z_frame_f)):
                    index_z_f = z_frame_f[m]
                    index_z_f_1 = index_z_f[0]
                    index_z_f_2 = index_z_f[1]
                    if index_z_f_1==index_z_1 and index_z_2==index_z_f_2:
                        index_name1 = label_name_f[m]
                        print("2   {0}  {1}  {2}".format(index_z_f_1,index_z_2,index_name1))
                        y1.append(index_name1)
                
            elif label_pred[i] == 2:
                x2.append(X[i])
                index_z = z_frame[i]
                index_z_1 = index_z[0]
                index_z_2 = index_z[1]
                for m in range(len(z_frame_f)):
                    index_z_f = z_frame_f[m]
                    index_z_f_1 = index_z_f[0]
                    index_z_f_2 = index_z_f[1]
                    if index_z_f_1==index_z_1 and index_z_2==index_z_f_2:
                        index_name1 = label_name_f[m]
                        print("3   {0}  {1}  {2}".format(index_z_f_1,index_z_2,index_name1))
                        y2.append(index_name1)
                
                
        print("=========================================")
        #print(x0)
    
        print("
    ====1===")
        print(y0)
        print("====2===")
        print(y1)
        print("====3===")
        print(y2)
    
    
        
        x0=np.array(x0)
        x1=np.array(x1)
        x2=np.array(x2)
    
    
        final_matrix = []
        for i in range(len(y_train)):
            #y_train[i] -=1
            final_matrix.append(y_train[i])
            final_matrix.append(label_pred[i])
            final_matrix.append(x_train[i])
        
        #print(final_matrix)
        #print("{0}   {1}  
      {2}  
         {3}  
    ".format(len(label_pred),len(y_train),label_pred,y_train))
    
        print("
    
    
    
    
    ============================")
        print(label_pred)
        print(y_train)
        print("============================")
        
        plt.scatter(x0[:, 0], x0[:, 1], c = "red", marker='o', label='label0')
        plt.scatter(x1[:, 0], x1[:, 1], c = "green", marker='*', label='label1')
        plt.scatter(x2[:, 0], x2[:, 1], c = "blue", marker='+', label='label2')
        #plt.xlabel('petal length')
        #plt.ylabel('petal width')
        plt.legend(loc=2)
        plt.show()
        
    
    
    if __name__ == '__main__':
        main()

    ########################33

    QQ 3087438119
  • 相关阅读:
    SQL TO XML
    C#中ArrayList 与 string、string[]数组 的转换
    C# 文件下载四方法
    Ilist<>与List<>的区别
    lamda表达式
    遍历实例对象的所有属性
    Readonly 与Const
    使用eclipse上传项目到开源中国代码托管Git@osc教程
    junit4中Assert断言的使用以及Mockito框架mock模拟对象的简单使用
    【每天一题ACM】 斐波那契数列(Fibonacci sequence)的实现
  • 原文地址:https://www.cnblogs.com/herd/p/14785086.html
Copyright © 2020-2023  润新知