• SMO启发式选择


    %%
    %   svm 简单算法设计 --启发式选择
    %%
    clc
    clear
    close all
    % step=0.05;error=1.2;
    % [data, label]=generate_sample(step,error);
    category=load('category.mat');
    label=category.label;
    feature=load('feature.mat');
    data=feature.data;
    [num_data,d] = size(data); % 样本数量,维度,维度在下面好像没有用到
    %% 定义向量机参数
    alphas = ones(num_data,1)-0.999999;
    b = 0;
    error = zeros(num_data,2);
    tol = 0.001;
    C = 600000;
    iter = 0;
    max_iter = 30;
    alpha_change = 0;
    entireSet = 1;%作为一个标记看是选择全遍历还是部分遍历
    
    %第一个变量先遍历间隔边界(0<alpha<C)上的支持向量点(此时松弛变量等于0),检验其是否满足KKT条件,若全部满足再遍历整个样本
    %第一个变量选取违反KKT条件最严重的样本点所对应的变量,意思是首先更新最糟糕的点
    %选择第二个变量要使得|E1-E2|最大,即使得乘子的变化最大,要用启发式标准
    %第二个变量的选择好像是先看有没有违反KKT条件的点,若有则选择,若没有则按照|E1-E2|来选择
    
    while (iter < max_iter) && ((alpha_change > 0) || entireSet)
        alpha_change = 0;
        % -----------全遍历样本-------------------------
        if entireSet 
            for i = 1:num_data
                Ei = calEk(data,alphas,label,b,i);%计算误差
                %此处的条件既是选取第一个变量的标准,首先考虑的是间隔边界(0<alpha<C)上的支持向量点中不满足KKT条件的点所对应的变量
                %该条件困扰了我两天,实际上原来的写法过于虚伪,让人看不透摸不清,实际上写清楚了让人一看就明了。
                if (label(i)*Ei<-0.001 && alphas(i)<C)||(label(i)*Ei>0.001 && alphas(i)>0)
                %if (0<alphas(i) && alphas(i)<C && label(i)*Ei~=0)%写成这个形式要让alphas的初值大于零否则进不来循环体。
                    %选择下一个alphas
                    [j,Ej] = select(i,data,num_data,alphas,label,b,C,Ei,entireSet);
                    alpha_I_old = alphas(i);
                    alpha_J_old = alphas(j);
                    if label(i) ~= label(j)
                        L = max(0,alphas(j) - alphas(i));
                        H = min(C,C + alphas(j) - alphas(i));
                    else
                        L = max(0,alphas(j) + alphas(i) -C);
                        H = min(C,alphas(j) + alphas(i));
                    end
                    if L==H
                        continue;end
                    eta = 2*data(i,:)*data(j,:)'- data(i,:)*...
                        data(i,:)' - data(j,:)*data(j,:)';
                    if eta >= 0 
                        continue;end
                    alphas(j) = alphas(j) - label(j)*(Ei-Ej)/eta;
                    %限制范围
                    if alphas(j) > H
                        alphas(j) = H;
                    elseif alphas(j) < L
                        alphas(j) = L;
                    end
                    if abs(alphas(j) - alpha_J_old) < 1e-4
                        continue;end
                    alphas(i) = alphas(i) + label(i)*label(j)*(alpha_J_old-alphas(j));
                    b1 = b - Ei - label(i)*(alphas(i)-alpha_I_old)*data(i,:)*data(i,:)'- label(j)*(alphas(j)-alpha_J_old)*data(i,:)*data(j,:)';
                    b2 = b - Ej - label(i)*(alphas(i)-alpha_I_old)*data(i,:)*data(j,:)'- label(j)*(alphas(j)-alpha_J_old)*data(j,:)*data(j,:)';
                    if (alphas(i) > 0) && (alphas(i) < C)
                        b = b1;
                    elseif (alphas(j) > 0) && (alphas(j) < C)
                        b = b2;
                    else
                        b = (b1+b2)/2;
                    end
                    alpha_change = alpha_change + 1;
                end
            end
             iter = iter + 1;
       % --------------部分遍历(alphas=0~C)的样本--------------------------
        else
            index = find(alphas>0 & alphas < C);
            for ii = 1:length(index)
                i = index(ii);
                Ei = calEk(data,alphas,label,b,i);%计算误差
                if (label(i)*Ei<-0.001 && alphas(i)<C)||...
                        (label(i)*Ei>0.001 && alphas(i)>0)
                    %选择下一个样本
                    [j,Ej] = select(i,data,num_data,alphas,label,b,C,Ei,entireSet);
                    alpha_I_old = alphas(i);
                    alpha_J_old = alphas(j);
                    if label(i) ~= label(j)
                        L = max(0,alphas(j) - alphas(i));
                        H = min(C,C + alphas(j) - alphas(i));
                    else
                        L = max(0,alphas(j) + alphas(i) -C);
                        H = min(C,alphas(j) + alphas(i));
                    end
                    if L==H
                        continue;end
                    eta = 2*data(i,:)*data(j,:)'- data(i,:)*...
                        data(i,:)' - data(j,:)*data(j,:)';
                    if eta >= 0
                        continue;end
                    alphas(j) = alphas(j) - label(j)*(Ei-Ej)/eta;  
                    %限制范围
                    if alphas(j) > H
                        alphas(j) = H;
                    elseif alphas(j) < L
                        alphas(j) = L;
                    end
                    if abs(alphas(j) - alpha_J_old) < 1e-4
                        continue;end
                    alphas(i) = alphas(i) + label(i)*...
                        label(j)*(alpha_J_old-alphas(j));
                    b1 = b - Ei - label(i)*(alphas(i)-alpha_I_old)*...
                        data(i,:)*data(i,:)'- label(j)*...
                        (alphas(j)-alpha_J_old)*data(i,:)*data(j,:)';
                    b2 = b - Ej - label(i)*(alphas(i)-alpha_I_old)*...
                        data(i,:)*data(j,:)'- label(j)*...
                        (alphas(j)-alpha_J_old)*data(j,:)*data(j,:)';
                    if (alphas(i) > 0) && (alphas(i) < C)
                        b = b1;
                    elseif (alphas(j) > 0) && (alphas(j) < C)
                        b = b2;
                    else
                        b = (b1+b2)/2;
                    end
                    alpha_change = alpha_change + 1;
                end
            end
            iter = iter + 1;
        end
        % --------------------------------
        if entireSet %第一次全遍历了,下一次就变成部分遍历
            entireSet = 0;
        elseif alpha_change == 0 
            %如果部分遍历所有都没有找到需要交换的alpha,再改为全遍历
            entireSet = 1;
        end
        disp(['iter ================== ',num2str(iter)]);    
    end
    
    % 计算权值W
    W = (alphas.*label)'*data;
    %记录支持向量位置
    index_sup = find(alphas ~= 0);
    %计算预测结果
    predict = (alphas.*label)'*(data*data') + b;
    predict = sign(predict);
    % 显示结果
    figure;
    index1 = find(predict==-1);
    data1 = (data(index1,:))';
    plot(data1(1,:),data1(2,:),'+r');
    hold on
    index2 = find(predict==1);
    data2 = (data(index2,:))';
    plot(data2(1,:),data2(2,:),'*');
    hold on
    dataw = (data(index_sup,:))';
    plot(dataw(1,:),dataw(2,:),'og','LineWidth',2);
    % 画出分界面,以及b上下正负1的分界面
    hold on
    k = -W(1)/W(2);
    x = -1.2:0.1:1.2;
    y = k*x + b;
    plot(x,y,x,y-1,'r--',x,y+1,'r--');
    title(['松弛变量范围C = ',num2str(C)]);
    
    function Ek = calEk(data,alphas,label,b,k)
    pre_Li = (alphas.*label)'*(data*data(k,:)') + b;
    Ek = pre_Li - label(k);
    
    function [J,Ej] = select(i,data,num_data,alphas,label,b,C,Ei,choose)
    maxDeltaE = 0;maxJ = -1;
    if choose == 1 %全遍历---随机选择alphas
        j = randi(num_data ,1);
        if j == i
            temp = 1;
            while temp
                j = randi(num_data,1);
                if j ~= i
                    temp = 0;
                end
            end
        end
        J = j;
        Ej = calEk(data,alphas,label,b,J);
    else %部分遍历--启发式的选择alphas
        index = find(alphas>0 & alphas < C);
        for k = 1:length(index)
            if i == index(k)
                continue;
            end
            temp_e = calEk(data,alphas,label,b,k);
            deltaE = abs(Ei - temp_e); %选择与Ei误差最大的alphas
            if deltaE > maxDeltaE
                maxJ = k;
                maxDeltaE = deltaE;
                Ej = temp_e;
            end
        end
        J = maxJ;
    end
    

    去吧,去吧,到彼岸去吧,彼岸是光明的世界!
  • 相关阅读:
    linux防火墙iptables
    etc/fstab
    EDT改成CST
    echo
    dd
    chown
    CAT
    Linux grep
    CHECKSUM比较两表字段值差异
    通过GitHub部署项目到Nginx服务器
  • 原文地址:https://www.cnblogs.com/lengyue365/p/5044899.html
Copyright © 2020-2023  润新知