• 机器学习:PLA


    国庆期间的作业:
     
    * 了解掌握线性感知机算法(PLA)的基本原理和算法流程,并使用PLA来解决一个实际的分类问题。
     
    数据集介绍:
    data1.csv —— 维度为100x3,包含100个样本,前两列是数据特征,最后一列是输出标签label。该数据集线性可分。
     
    线性可分,采用PLA
    线性不可分:采用Pocket Learning Algorithm
     
     
    #利用Python实现感知机算法的原始形式
    # -*- coding: utf-8 -*-
    
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    
    #1、创建数据集
    def createdata():
     df=pd.read_csv('data1.csv',names=['x','y','labels'])
     samples=df[['x','y']]
     samples=samples.values
     labels=df['labels']
     labels=labels.tolist()
     return samples,labels
    
    #训练感知机模型
    class Perceptron:
     def __init__(self,x,y,a=1):
      self.x=x
      self.y=y
      self.w=np.zeros((x.shape[1],1))#初始化权重,w1,w2均为0
      self.b=0
      self.a=1#学习率
      self.numsamples=self.x.shape[0]
      self.numfeatures=self.x.shape[1]
    
     def sign(self,w,b,x):
      y=np.dot(x,w)+b
      return int(y)
    
     def update(self,label_i,data_i):
      tmp=label_i*self.a*data_i
      tmp=tmp.reshape(self.w.shape)
      #更新w和b
      self.w=tmp+self.w
      self.b=self.b+label_i*self.a
    
     def train(self):
      isFind=False
      while not isFind:
       count=0
       for i in range(self.numsamples):
        tmpY=self.sign(self.w,self.b,self.x[i,:])
       # print ('a')
        if tmpY*self.y[i]<=0:#如果是一个误分类实例点
     #     ss="误分类点为:"+self.x[i,:]+"此时的w和b为:"+self.w+ self.b
         print ("误分类点为:",self.x[i,:],"此时的w和b为:",self.w, self.b)
         count+=1
         self.update(self.y[i],self.x[i,:])
       if count==0:
        print ('最终训练得到的w和b为:',self.w,self.b)
        isFind=True
      return self.w,self.b
    
    #画图描绘
    class Picture:
     def __init__(self,data,labels,w,b):
      self.b=b
      self.w=w
      self.data=data
      self.labels=labels
      plt.figure(1)
      plt.title('Perceptron Learning Algorithm',size=14)
      plt.xlabel('x0-axis',size=14)
      plt.ylabel('x1-axis',size=14)
    
      xData=np.linspace(4,7,100)
      yData=self.expression(xData)
      plt.plot(xData,yData,color='r',label='sample data')
    
      for i in range(data.shape[0]):
       if labels[i] != -1:
        plt.scatter(data[i][0],data[i][1],s=15)
       else:
        plt.scatter(data[i][0],data[i][1],s=15,marker='x')
      plt.savefig('2d.png',dpi=175)
        
     def expression(self,x):
      y=(-self.b-self.w[0]*x)/self.w[1]#注意在此,把x0,x1当做两个坐标轴,把x1当做自变量,x2为因变量
      return y
    
     def Show(self):
      plt.show()
    
    
    if __name__ == '__main__':
     samples,labels=createdata()
     myperceptron=Perceptron(x=samples,y=labels)
     weights,bias=myperceptron.train()
     Picture=Picture(samples,labels,weights,bias)
     Picture.Show()

    2. 线性不可分

    经过测试,针对该数据庥,迭代次数=10000时,效果较好。

    #!/usr/bin/python3
    # -*- coding: utf-8 -*-
    
    """
    Description :
        pocket algorithm
    """
    
    import time
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    
    # load data from DataSet.txt
    data_set = []
    data_label = []
    #file = open('DataSet.txt')
    # file = open('DataSet_linear_separable.txt')
    '''
    for line in file:
        line = line.split('	')
        for i in range(len(line)):
            line[i] = float(line[i])
        data_set.append(line[0:2])
        data_label.append(int(line[-1]))
    file.close()
    data = np.array(data_set)
    
    for i in range(len(data_label)):
        if data_label[i] != 1:
            data_label[i] = -1
    label = np.array(data_label)
    '''
    df=pd.read_csv('data2.csv',names=['x','y','label'])
    data=df[['x','y']]
    data=data.values
    label=df['label']
    label=label.tolist()
    
    
    # Initialize w, b, alpha
    w = np.array([0.5, 1])
    b = 0
    alpha = 0.4
    trainLoss = []
    # Calculate train_loss
    f = (np.dot(data, w.T) + b) * label
    idx = np.where(f <= 0)
    idx = np.array(idx, dtype=int)
    idx = idx.tolist()
    '''
    l=[]
    for m in idx:
        for i in m:
            l.append(i)
    '''
    idx = [i for item in idx for i in item]
    
    for i in idx:
     train_loss = -np.sum((np.dot(data[i], w.T) + b) * label[i]) / (np.sqrt(w[0]**2+w[1]**2))
     trainLoss.append(train_loss)
    # iteration
    max_iter = 10000
    iteration = 1
    start = time.time()
    while iteration <= max_iter:
        print('iteration:',iteration)
        if f[idx].size == 0:
            break
        for sample in data[idx]:
            i = 0
            w += alpha * sample * label[idx[i]]
            b += alpha * label[idx[i]]
            i += 1
        print('Iteration:%d  w:%s  b:%s' % (iteration, w, b))
        f = (np.dot(data, w.T) + b) * label
        idx = np.where(f <= 0)
        idx = (np.array(idx, dtype=int)).tolist()
        idx = [i for item in idx for i in item]
        for i in idx:
            train_loss = -np.sum((np.dot(data[i], w.T) + b) * label[i]) / (np.sqrt(w[0] ** 2 + w[1] ** 2))
            trainLoss.append(train_loss)
        iteration = iteration + 1
    
    if f[idx].size == 0:
        accuracy = 100
    else:
        accuracy = len(f[idx]) / len(label) * 100
    end = time.time()
    print('Pocket learning algorithm is over')
    print('train time is %f s.' % (end - start))
    print('-'*50)
    print('min trainLoss: %f' % np.min(trainLoss))
    print('Classification accuracy: %.2f%%' % accuracy)
    
    # draw
    '''
    x1 = np.arange(1, 100, 0.1)
    x2 = (w[0] * x1 + b) / (-w[1])
    idx_p = np.where(label == 1)
    idx_n = np.where(label != 1)
    data_p = data[idx_p]
    data_n = data[idx_n]
    plt.figure()
    plt.scatter(data_p[:, 0], data_p[:, 1], color='b')
    plt.scatter(data_n[:, 0], data_n[:, 1], color='r')
    plt.plot(x1, x2)
    plt.show()
    '''
    
    plt.figure(1)
    plt.title('Pocket learning algorithm',size=14)
    plt.xlabel('x0-axis',size=14)
    plt.ylabel('x1-axis',size=14)
    
    xData=np.linspace(5,8,100)
    yData=(w[0] * xData + b) / (-w[1])
    plt.plot(xData,yData,color='r',label='sample data')
    
    for i in range(data.shape[0]):
        if label[i] != -1:
            plt.scatter(data[i][0],data[i][1],s=15)
        else:
           plt.scatter(data[i][0],data[i][1],s=15,marker='x')
    plt.savefig('PLA(PocketLearning_1.png',dpi=175)
    
    
    plt.figure()
    plt.plot(trainLoss)
    plt.ylabel('trainLoss')
    plt.xlabel('Iteration')
    plt.savefig('PLA(PocketLearning_2.png',dpi=175)
    plt.show()

     
  • 相关阅读:
    数组优化 Dijkstra 最短路
    F
    树 (p155, 从中序和后续回复二叉树)
    矩阵连乘 LRJ白书 p141 栈 解析表达式
    Train Problem II HDU 1023 卡特兰数
    codevs 1166 矩阵取数游戏
    BZOJ 2754: [SCOI2012]喵星球上的点名
    2017.6.11 校内模拟赛
    HDU 2896 病毒侵袭
    UvaLive 4670 Dominating Patterns
  • 原文地址:https://www.cnblogs.com/marathoner/p/9772500.html
Copyright © 2020-2023  润新知