import _pickle as pickle from sklearn import svm, ensemble import random from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix import numpy as np ########## ########## TRAINTESTBOUNDARY = 0.75 #PICKLE_NAME = 'lg-new-new-65-withnoise-statistical.p' PICKLE_NAME = 'trunc-dataset1-noisefree-statistical.p' print('Loading ' + PICKLE_NAME + '...') flowlist = pickle.load(open(PICKLE_NAME, 'rb'),encoding='iso-8859-1') print('Done...') print('') print('Flows loaded: ' + str(len(flowlist))) p = [] r = [] f = [] a = [] for i in range(5): ########## PREPARE STUFF examples = [] trainingexamples = [] testingexamples = [] #classifier = svm.SVC(gamma=0.001, C=100, probability=True) classifier = ensemble.RandomForestClassifier() ########## GET FLOWS for package, time, flow in flowlist: examples.append((flow, package)) print('') ########## SHUFFLE DATA to ensure classes are "evenly" distributed random.shuffle(examples) ########## TRAINING trainingexamples = examples[:int(TRAINTESTBOUNDARY * len(examples))] X_train = [] y_train = [] for flow, package in trainingexamples: X_train.append(flow) y_train.append(package) print('Fitting classifier...') classifier.fit(X_train, y_train) print('Classifier fitted!') print('') ########## TESTING counter = 0 correct = 0 testingexamples = examples[int(TRAINTESTBOUNDARY * len(examples)):] X_test = [] y_test = [] y_pred = [] for flow, package in testingexamples: X_test.append(flow) y_test.append(package) ##### y_pred = classifier.predict(X_test) print("########################") print(precision_score(y_test, y_pred, average="macro")) print(recall_score(y_test, y_pred, average="macro")) print(f1_score(y_test, y_pred, average="macro")) print(accuracy_score(y_test, y_pred)) print('') p.append(precision_score(y_test, y_pred, average="macro")) r.append(recall_score(y_test, y_pred, average="macro")) f.append(f1_score(y_test, y_pred, average="macro")) a.append(accuracy_score(y_test, y_pred)) print(p) print(r) print(f) print(a) print('') print(np.mean(p)) print(np.mean(r)) print(np.mean(f)) print(np.mean(a))