逻辑:
代码
import os from com.test.my_test03 import * # 将每一个文件的内容拼接成一行 def img2vector(fileName): returnVect = np.zeros((1, 1024)) file = open(fileName) for i in range(32): line = file.readline() if(line != " "): for j in range(32): returnVect[0, 32*i +j] = int(line[j]) return returnVect class IdentifImgClassTest(object): # 每个文件的代表数字 hwLabels = [] # 获取TrainData目录下所有的文件 trainingFileList = os.listdir('D:/code/python/test2/data/TrainData') # 训练集的数据量,空间中分布的数据量 m=1934 m = len(trainingFileList) # 1934*1024的零矩阵 trainingMat = np.zeros((m, 1024)) # 构造训练集 for i in range(m): fileName = trainingFileList[i] fileNameSplit = fileName.split('.')[0] num = int(fileNameSplit.split('_')[0]) hwLabels.append(num) trainingMat[i, :] = img2vector('D:/code/python/test2/data/TrainData/%s' % fileName) #构造测试集 testFileList = os.listdir('D:\code\python\test2\data\TestData') errorCount = 0.0 mTest = len(testFileList) for i in range(mTest): fileNameStr = testFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) vectorUnderTest = img2vector('D:/code/python/test2/data/TestData/%s' % fileNameStr) # 将训练集数据和第一条测试数据 以及K值传递给classify这个方法 classifierResult = classify(vectorUnderTest, trainingMat, hwLabels, 3) print("识别出的数字是: %d, 真实数字是: %d" % (classifierResult, classNumStr)) if (classifierResult != classNumStr): errorCount += 1.0 print(" 识别错误次数 %d" % errorCount) errorRate = errorCount / float(mTest) print(" 正确率: %f" % (1 - errorRate)) if __name__ == '__main__': IdentifImgClassTest()
结果: