• matlab 信息增益函数 入侵检测kudcupp


    function InforGain = gain(data) 
        [m, n] = size(data);
        InforGain = zeros(n-1,2);
        labels = data(:,n);
        for i=1:n
            tmp{i} = [];
            percen{i} = [];
            rate=[];
            col = data(:,i);
            unicol = unique(col);
            %disp(unicol);
            
            %计算每一列有几类,并把每一类的信息熵和比例存储起来
            if i<42
            for j = 1:length(unicol)
                num = length(find(col==unicol(j)));
                pnum = length(find(col==unicol(j) & labels == 0));
                pnum1 = length(find(col==unicol(j) & labels == 1));
                pnum2 = length(find(col==unicol(j) & labels == 2));
                rate1 = pnum/num;
                rate2 = pnum1/num;
                rate3 = pnum2/num;
                rate=[rate1 rate2 rate3];
                rate(isnan(rate)) = 0;    
                a=rate(1)*log2(rate(1));
                b=rate(2)*log2(rate(2));
                c=rate(3)*log2(rate(3));
                
                if(isnan(a))
                    a=0;
                end
                    
                 if(isnan(b))
                    b=0;
                 end 
                 
                 if(isnan(c))
                    c=0;
                 end
                
                 gain = -(a+b+c);
                tmp{i}=[tmp{i} gain];
                percen{i}=[percen{i} num/length(col)];  
                %disp(percen{i});
            end
            end
                if i==42
                      pnum3 = length(find(labels == 0));
                      pnum4 = length(find(labels == 1));
                      pnum5 = length(find(labels == 2));
                    rate1 = pnum3/length(labels);
                    rate2 = pnum4/length(labels);
                    rate3 = pnum5/length(labels);  
                    
                gain = -(rate1*log2(rate1)+rate2*log2(rate2)+rate3*log2(rate3));
                tmp{42}=[tmp{42} gain];
                percen{42}=[percen{42} num/length(col)];
                end                     
      end
        %整体信息熵
        InforEntropy = tmp{length(tmp)}(1)
        disp(length(tmp));
        disp(InforEntropy);
        %将NAN转化为0
        for i = 1:length(tmp)
            tmp{i}(isnan(tmp{i})) = 0;
            disp(1111111111111111111);
            disp(tmp{i});
        end
        %求每一个属性列的信息增益
        for i = 1:length(percen)-1
            InforGain(i,:) = [i,roundn(InforEntropy-sum(tmp{i}.*percen{i}),-4)];
            %disp(InforEntropy-sum(tmp{i}.*percen{i}));
            %disp(InforEntropy);
        end
    end
    
    close all;
    clear all;
    clc;
    data = csvread('corrected9.csv');
    InforGain = hanshu(data);
    
       
    
  • 相关阅读:
    Windows安装Linux虚拟机(CentOS7)
    模拟随机双色球
    git忽略文件夹提交以及gitignore修改后不生效的解决办法
    PHP随手记2--获取随机n位不重复字符
    PHP随手记1--内置函数date
    cocos2d-x源码分析-----触摸事件的实现
    cocos2d-x源码分析-----主循环(android)
    2014年的计划
    cocos2d-x源码分析-----入口分析(android)
    C++中二进制数据强制转换问题
  • 原文地址:https://www.cnblogs.com/princeness/p/11664905.html
Copyright © 2020-2023  润新知