按照网上的代码,自己敲了一下,改了一点点,理解加深了一下。
还有训练HMM的EM算法没看懂,下次接着看;
参考连接:http://www.cnblogs.com/hanahimi/p/4011765.html
1 # -*- coding: utf-8 -*-
2
3 '''
4 function: 根据网上的代码,学习实现 HMM,前向计算概率,后向预测状态序列,学习算法参数
5 date: 2017.8.9
6 '''
7
8 import numpy as np
9
10 class HMM(object):
11 """docstring Ann, Bnm, piln HMM"""
12 def __init__(self, Ann, Bnm, piln):
13 self.A = Ann
14 self.B = Bnm
15 self.pi = piln
16 self.N = self.A.shape[0] #状态的种类个数
17 self.M = self.B.shape[1] #观测序列的长度
18
19 #打印hmm的信息
20 def printHmm(self):
21 print("=======================================")
22 print('hmm content N = ', self.N, ' M = ', self.M)
23 for i in range(self.N):
24 if i == 0:
25 print('hmm.A ', self.A[i,:],'hmm.B', self.B[i,:])
26 else:
27 print(' ', self.A[i,:], ' ', self.B[i,:])
28 print('hmm.pi', self.pi)
29 print('========================================')
30
31 '''
32 function: 维特比算法
33 input: A,B,pi,O
34 output: P(o|lambda) 最大的时候,状态的路径序列
35 '''
36 def viterbi(self, O):
37 T = len(O) #观察序列的长度
38
39 #初始化,这里从0~T-1
40 #delta t行 n列,代表有t个时间点,每个时间点可能有n种状态
41 delta = np.zeros((T, self.N), np.float) #二维数组记录计算的所有概率,包括了最有的点
42 phi = np.zeros((T, self.N), np.int) #记录概率最大路径的前一个状态
43 I = np.zeros(T, np.int) #这里如果不显示表明类型为 np.int,就是float?
44 for i in range(self.N):
45 delta[0,i] = self.pi[i] * self.B[i,O[0]] #t = 0时刻,各个状态的起始概率
46 phi[0,i] = 0 #t=0时刻前缀状态都是0
47
48 #递推
49 for t in range(1,T): #从 1 开始
50 for i in range(self.N):
51 delta[t,i] = self.B[i,O[t]] * np.array([delta[t-1,j]*self.A[j,i]
52 for j in range(self.N)]).max()
53 phi[t,i] = np.array([delta[t-1, j]*self.A[j,i] for j in range(self.N)]).argmax()
54 #结束
55 prob = delta[T-1,:].max() #T-1时刻是最后时刻,哪个状态在最后时刻概率最大就是最优路径的起始点
56 I[T-1] = phi[T-1,:].argmax() #最优路径的起点状态编号
57 #状态序列获取
58 for t in range(T-2, -1, -1): #从 T-1 到 -1(不包括-1),间隔是-1,即递减
59 I[t] = phi[t+1, I[t+1]]
60 return I, prob
61
62 '''
63 function: 前向算法计算擦观察序列 O 出现的概率
64 input: A,B,pi,O
65 output: prob
66 '''
67 def forward(self, O):
68 T = len(O)
69 alpha = np.zeros((T, self.N), np.float) #暂存计算的所有概率,按照时间点向前推进
70 #初始化
71 for i in range(self.N):
72 alpha[0,i] = self.pi[i] * self.B[i, O[0]]
73
74 #迭代计算
75 for t in range(T-1):
76 for i in range(self.N): #这里B[i,O[t]]也可以放在for的外面乘
77 alpha[t+1,i] = np.array([alpha[t, j]*self.A[j,i]*self.B[i,O[t+1]] for
78 j in range(self.N)]).sum()
79
80 #终止
81 prob = np.array([alpha[T-1, j] for j in range(self.N)]).sum()
82 return prob
83
84 '''
85 function: 后向算法,计算观测序列出现的概率
86
87 '''
88 def backword(self, O):
89 T = len(O)
90 beta = np.zeros((T, self.N), np.float) #暂存计算的概率
91
92 #初始化
93 for i in range(self.N):
94 beta[T-1, i] = 1 #从后向前
95 #迭代计算
96 for t in range(T-2, -1, -1):
97 for i in range(self.N):
98 beta[t,i] = np.array([[A[j,i] * B[j,O[t+1]] * beta[t+1,j]] for
99 j in range(self.N)]).sum()
100
101 prob = np.array([self.pi[j] * self.B[i,O[1]] * beta[1,j] for j in range(self.N)]).sum()
102
103 return prob
104
105
106
107 if __name__ == 'main':
108
109 print('python my HMM')
110
111 #HMM模型的参数
112 A = [[0.8125,0.1875],[0.2,0.8]]
113 B = [[0.875,0.125], [0.25,0.75]] #每一行的和是 1
114 pi = [0.5,0.5]
115 hmm = HMM(A,B,pi) #构建HMM
116
117 print(hmm)
118
119 print('python my HMM')
120
121 #HMM模型的参数
122 A = np.mat([[0.8125,0.1875],[0.2,0.8]])
123 B = np.mat([[0.875,0.125], [0.25,0.75]]) #每一行的和是 1
124 pi = [0.5,0.5]
125 O = [[1,0,0,1,1,0,0,0,0],
126 [1,1,0,1,0,0,1,1,0],
127 [0,0,1,1,0,0,1,1,1]]
128
129 hmm = HMM(A,B,pi) #构建HMM
130
131 #计算前向概率,产生特定观测序列O的概率
132 prob = hmm.forward(O[0])
133 print('前向算法产生 O 序列的概率是: ' + str(prob))
134
135 #后向算法计算观测序列的概率
136 prob = hmm.backword(O[0])
137 print('后向算法概率是: ' + str(prob))
138 #计算隐含概率,维特比算法
139 path, prob2 = hmm.viterbi(O[0])
140 print('产生 O 序列最大概率路径是: ' + str(path))
141 print('概率是: ' + str(prob2))
142
143 hmm.printHmm()