python 统计频率
1 import csv 2 import numpy as np 3 4 5 # the method for parse data into a matrix 6 def input_data_to_matrix(): 7 csv_file = open('binary_data.csv') 8 csv_reader = csv.reader(csv_file, delimiter=',') 9 Matrix = [] 10 N = 0 11 for row in csv_reader: 12 Matrix.append([int(i) for i in row]) 13 N += 1 14 print(" Showing the matrix: ") 15 print(np.array(Matrix)) 16 return Matrix, N 17 18 19 # the method to compute p(ai=?|l=?) 20 def p(i: int, ai: int, l: int, Matrix: list): 21 n = N = 0 22 for a in Matrix: 23 if a[5] == l: 24 if a[i] == ai: 25 n += 1 26 N += 1 27 print("p(a[{}]={}|l={})={}".format(i, ai, l, n / N)) 28 29 30 if __name__ == '__main__': 31 # part 1 32 # Reading data and parse its content into a matrix 33 # (20 scores) 34 Matrix, N = input_data_to_matrix() 35 36 # part 2 37 # Compute the prior probabilities p(l = 0) and p(l = 1) 38 # (20 scores) 39 print(" Result of p(l=0) and p(l=1) ") 40 n = [0, 0] 41 for a in Matrix: 42 if a[5] == 0: 43 n[0] += 1 44 else: 45 n[1] += 1 46 for i in range(2): 47 print("p(l={})={}".format(i, n[i] / N)) 48 49 # part 3 50 # Compute the conditional probabilities 51 # p(ai=0|l=0),i=0,1,2,3,4 and p(ai=1|l=0),i=0,1,2,3,4, 52 # p(ai=0|l=1),i=0,1,2,3,4 and p(ai=1|l=1),i=0,1,2,3,4 53 # (60 scores) 54 No = 1 55 for l in range(2): 56 for ai in range(2): 57 print(" Result Group_{} of: p(ai={}|l={}),i=0,1,2,3,4 ".format(No, ai, l)) 58 No += 1 59 for i in range(5): 60 p(i, ai, l, Matrix)