• 机器学习-kmeans的使用


    import numpy as np
    import pandas as pd
    import matplotlib
    from matplotlib import pyplot as plt
    %matplotlib inline
    matplotlib.rcParams['font.sans-serif'] = ['SimHei']
    data = pd.read_csv('./010-data_multivar.csv',header=None)
    #拆分数据
    dataset_X,dataset_y = data.iloc[:,:-1],data.iloc[:,-1]
    # print(dataset_X.head())
    dataset_X = dataset_X.values
    dataset_y = dataset_y.values

    无标签数据集可视化,将第一列feature作为X,第二列feature作为y

    def visual_2D_dataset_dist(dataset):
        '''将二维数据集dataset显示在散点图中'''
        assert dataset.shape[1]==2,'only support dataset with 2 features'
        plt.figure()
        X=dataset[:,0]
        Y=dataset[:,1]
        plt.scatter(X,Y,marker='v',c='g',label='dataset')
        
        X_min,X_max=np.min(X)-1,np.max(X)+1
        Y_min,Y_max=np.min(Y)-1,np.max(Y)+1
        plt.title('dataset distribution')
        plt.xlim(X_min,X_max)
        plt.ylim(Y_min,Y_max)
        plt.xlabel('feature_0')
        plt.ylabel('feature_1')
        plt.legend()
        
    visual_2D_dataset_dist(dataset_X)

    构造 kmeans

    from sklearn.cluster import KMeans
    #init
    kmeans = KMeans(init='k-means++',n_clusters=4,n_init=5)
    kmeans.fit(dataset_X)
    
    

    将dataset_X聚类效果可视化

    def visual_kmeans_effect(k_means,dataset):
        assert dataset.shape[1]==2,'only support dataset with 2 features'
        X=dataset[:,0]
        Y=dataset[:,1]
        X_min,X_max=np.min(X)-1,np.max(X)+1
        Y_min,Y_max=np.min(Y)-1,np.max(Y)+1
        # meshgrid 生成网格点坐标矩阵
        X_values,Y_values=np.meshgrid(np.arange(X_min,X_max,0.01),
                                      np.arange(Y_min,Y_max,0.01))
        # 预测网格点的标记
        predict_labels=k_means.predict(np.c_[X_values.ravel(),Y_values.ravel()])
        predict_labels=predict_labels.reshape(X_values.shape)
        plt.figure()
        plt.imshow(predict_labels,interpolation='nearest',
                   extent=(X_values.min(),X_values.max(),
                           Y_values.min(),Y_values.max()),
                   cmap=plt.cm.Paired,
                   aspect='auto',
                   origin='lower')
        
        # 将数据集绘制到图表中
        plt.scatter(X,Y,marker='v',facecolors='none',edgecolors='k',s=30)
        
        # 将中心点绘制到图中
        centroids=k_means.cluster_centers_
        plt.scatter(centroids[:,0],centroids[:,1],marker='o',
                    s=100,linewidths=2,color='k',zorder=5,facecolors='b')
        plt.title('K-Means effect graph')
        plt.xlim(X_min,X_max)
        plt.ylim(Y_min,Y_max)
        plt.xlabel('feature_0')
        plt.ylabel('feature_1')
        plt.show()
        
    visual_kmeans_effect(kmeans,dataset_X)
     

    # 鸢尾花聚类

    from sklearn.datasets import load_iris
    datairis = load_iris()
    dataset = datairis.data
    from sklearn.cluster import KMeans
    #init
    kmeans = KMeans(init='k-means++',n_clusters=4,n_init=5)
    kmeans.fit(dataset)
    print(kmeans.labels_) #去除标签
    print(datairis)
     
  • 相关阅读:
    qt example
    http://download.qt-project.org/archive/qt/4.5/qt-all-opensource-src-4.5.2.tar.bz2
    http://qt-project.org/wiki/Category:Developing_with_Qt::QtWebKit#ff7c0fcd6a31e735a61c001f75426961
    aa
    Qt webKit可以做什么(四)--实现本地QObject和JavaScript交互
    Qt Creator needs a compiler set up to build. Configure a compiler in the kit options
    Qt WebKit and HTML5 geolocation | Qt Project forums | Qt Project
    让 QtWebkit 支持跨域CROS
    一万年太久,只争朝夕_散文网
    请问,毛主席他老人家所说的,“一万年太久,只争朝夕。”是什么意思?
  • 原文地址:https://www.cnblogs.com/junjun511/p/10730987.html
Copyright © 2020-2023  润新知