• Appscanner实验还原code3


    # Author: Baozi
    #-*- codeing:utf-8 -*-
    import _pickle as pickle
    from sklearn import ensemble
    import random
    from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, 
        confusion_matrix
    import numpy as np
    
    ##########
    ##########
    
    # TRAINING_PICKLE = 'motog-old-65-withnoise-statistical.p'      # 1a
    TRAINING_PICKLE = 'trunc-dataset1a-noisefree-statistical.p'  # 1a
    # TESTING_PICKLE  = 'motog-new-65-withnoise-statistical.p'      # 2
    TESTING_PICKLE = 'trunc-dataset2-noisefree-statistical.p'  # 2
    
    print('Loading pickles...')
    trainingflowlist = pickle.load(open(TRAINING_PICKLE, 'rb'), encoding='iso-8859-1')
    testingflowlist = pickle.load(open(TESTING_PICKLE, 'rb'), encoding='iso-8859-1')
    print('Done...')
    print('')
    
    print('Training with ' + TRAINING_PICKLE + ': ' + str(len(trainingflowlist)))
    print('Testing with ' + TESTING_PICKLE + ': ' + str(len(testingflowlist)))
    print('')
    
    for THR in range(10):
    
        p = []
        r = []
        f = []
        a = []
        c = []
    
        for i in range(5):
            print(i)
            ########## PREPARE STUFF
            trainingexamples = []
            classifier = ensemble.RandomForestClassifier()
            classifier2 = ensemble.RandomForestClassifier()
    
            ########## GET FLOWS
            for package, time, flow in trainingflowlist:
                trainingexamples.append((flow, package))
            # print('')
    
            ########## SHUFFLE DATA to ensure classes are "evenly" distributed
            random.shuffle(trainingexamples)
    
            ########## TRAINING PART 1
            X1_train = []
            y1_train = []
            #####################################################
            for flow, package in trainingexamples[:int(float(len(trainingexamples)) / 2)]:
                X1_train.append(flow)
                y1_train.append(package)
    
            # print('Fitting classifier...')
            classifier.fit(X1_train, y1_train)
            # print('Classifier fitted!')
            # print(''
    
            ########## TRAINING PART 2 (REINFORCEMENT)
            X2_train = []
            y2_train = []
            tmpx_train = []
            tmpy_train = []
    
            count = 0
            count1 = 0
            count2 = 0
    
            ###############################################
            for flow, package in trainingexamples[int(float(len(trainingexamples)) / 2):]:
                # flow = np.array(flow).reshape(1,-1)
                # tmp.append(flow)
                tmpx_train.append(flow)
                tmpy_train.append(package)
    
            predictions = classifier.predict(tmpx_train)
            #print(type(predictions))#<class 'numpy.ndarray'>
            #print(predictions[0])#com.myfitnesspal.android-auto.csv
            for flow, package in trainingexamples[int(float(len(trainingexamples)) / 2):]:
                X2_train.append(flow)
                prediction = predictions[count]
    
                if (prediction == package):
                    y2_train.append(package)
                    count1 += 1
                else:
                    y2_train.append('ambiguous')
                    count2 += 1
                count += 1
        print("Step Finished!!!!!!!!!!!")
        # print(count1)
        # print(count2)
    
        # print('Fitting 2nd classifier...')
        classifier2.fit(X2_train, y2_train)
        # print('2nd classifier fitted!'
        # print(''
    
        ########## TESTING
    
        threshold = float(THR) / 10
    
        X_test = []
        y_test = []
        tmpx_test = []
        tmpy_test = []
        count  = 0
        totalflows = 0
        consideredflows = 0
    
        for package, time, flow in testingflowlist:
            tmpx_test.append(flow)
            tmpy_test.append(package)
    
        predictionss = classifier2.predict(tmpx_test)#此时的分类器可以预测带有ambiguous标签的样本
        prediction_proba = classifier2.predict_proba(tmpx_test)#此时的分类器可以预测带有ambiguous标签的样本
        #print(type(prediction_proba))#<class 'numpy.ndarray'>
        print(prediction_proba[0])
    
        for package, time, flow in testingflowlist:
            prediction = predictionss[count]
            if (prediction != 'ambiguous'):
                prediction_probability = max(prediction_proba[0])
                totalflows += 1
    
                if (prediction_probability >= threshold):
                    consideredflows += 1
    
                X_test.append(flow)
                y_test.append(package)
            count += 1
    
        y_pred = classifier2.predict(X_test)
    
        p.append(precision_score(y_test, y_pred, average="macro") * 100)
        r.append(recall_score(y_test, y_pred, average="macro") * 100)
        f.append(f1_score(y_test, y_pred, average="macro") * 100)
        a.append(accuracy_score(y_test, y_pred) * 100)
        c.append(float(consideredflows) * 100 / totalflows)
    
    print('Threshold: ' + str(threshold))
    print(np.mean(p))
    print(np.mean(r))
    print(np.mean(f))
    print(np.mean(a))
    print(np.mean(c))
    print('')
    做一枚奔跑的老少年!
  • 相关阅读:
    使用css的类名交集复合选择器 《转》
    在Web.config或App.config中的添加自定义配置 <转>
    [转]给ListView加上ComboBox或是TextBox控件
    WinForm Timer控件,三级联动[省,市,区]
    WinForm用户窗体
    WinForm 进程 ,线程
    WinForm MDI窗体容器
    WinForm 控件TabelControl对TabelPage页的添加,删除操作
    [转]C#重绘TabControl的Tabpage标签,添加图片及关闭按钮
    WinForm sender初级应用
  • 原文地址:https://www.cnblogs.com/xiaoshayu520ly/p/10469420.html
Copyright © 2020-2023  润新知