模块名称:pca.py
PCA原理与紧致技巧原理待补。。。
#-*-coding:UTF-8-*- ''' Created on 2015年3月2日 @author: Ayumi Phoenix ch01 p-14 图像的主成分分析 ''' from PIL import Image import numpy def pca(X): """主成分分析: 输入;矩阵X 每一行为一条训练数据 返回:投影矩阵(按照维度重要性排序),方差,和均值""" X = numpy.asarray(X) n_data,dim = X.shape # axis_0, axis_1 mean_X = X.mean(axis=0) X -= mean_X if n_data < dim: # 维数大于样本数,使用紧致技巧 R_sigma = numpy.dot(X,X.T) # m x m eign_values, eign_vectors = numpy.linalg.eigh(R_sigma) # 返回H矩阵或对称阵的特征值和特征向量(递增顺序) tmp = numpy.dot(X.T,eign_vectors) # (n2,m) x (m,m) V = tmp[::-1] # 矩阵V每行向量都是正交的 S = numpy.sqrt(eign_values) for i in xrange(V.shape[1]): V[:,i] /= S else: # PCA - SVD U,S,V = numpy.linalg.svd(X) V = V[:n_data] # 仅仅返回前n_data维数据才合理 # 返回投影向量矩阵, 特征值开方, 均值 return V, S, mean_X if __name__=="__main__": from PIL import Image import numpy import pylab import imtools as imt path = r"E:dataset libPCV_datafontimagesa_thumbs" imlist = imt.get_imlist(path) im = numpy.array(Image.open(imlist[0])) m,n = im.shape[0:2] n_im = len(imlist) im_matrix = numpy.array([numpy.array(Image.open(each_im)).flatten() for each_im in imlist],'f') V,S,im_mean = pca(im_matrix) # 显示均值图像与前七个特征图 pylab.figure() pylab.gray() pylab.subplot(2,4,1) pylab.imshow(im_mean.reshape(m,n)) for i in xrange(7): pylab.subplot(2,4,i+2) pylab.imshow(V[i].reshape(m,n)) # 从新投影为新样本 k = 10 print im_matrix.shape,V.shape # 取V前k个特征向量 y = numpy.dot(im_matrix,V[0:k,:].T) # (m,n2) * ((k,n2).T) = [m,k] print y.shape # 显示还原图像 im_matrix_tidle = numpy.dot(y,V[0:k,:]) + im_mean pylab.figure() pylab.gray() for i in xrange(8): pylab.subplot(2,4,i+1) pylab.imshow(im_matrix_tidle[i].reshape(m,n)) pylab.show() # 保存均值和主成分数据 import pickle f = open('font_pca_models.pkl','wb') pickle.dump(im_mean, f) pickle.dump(V, f) f.close() # 载入均值和主成分数据 import pickle f = open('font_pca_models.pkl','rb') im_mean = f.load(f) # 载入对象顺序必须和保存顺序一样 V = f.load(f) f.close()
均值图片与前7个特征向量:
前7张图片降维后的还原图像