上一节我们介绍了隐马尔科夫的概率计算问题,本节,我们介绍一下隐马尔科夫的学习问题。在介绍学习问题之前,先让我们用python来实现几个重要概念。
需要注意的是:下面的代码根据的是李航《统计学习方法》中的原始公式。这里的公式没有取对数。因此如果你生成的数据超过300,就会发现明显的溢出问题。因此下面的代码是个玩具。我们先给出这个代码,之后,在给出取对数后的代码。
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
@author: XiangguoSun
@contact: sunxiangguodut@qq.com
@file: HMM.py
@time: 2017/3/27 12:40
@software: PyCharm
"""
import numpy as np
def simulate(model,T):
A = model[0]
B = model[1]
pi = model[2]
def draw_from(probs):
return np.where(np.random.multinomial(1, probs) == 1)[0][0]
observations = np.zeros(T, dtype=int)
states = np.zeros(T, dtype=int)
states[0] = draw_from(pi)
observations[0] = draw_from(B[states[0], :])
for t in range(1, T):
states[t] = draw_from(A[states[t - 1], :])
observations[t] = draw_from(B[states[t], :])
pp = np.unique(states)
return observations,pp
def forward_prob(model, Observe):
'''
马尔科夫前向算法
'''
A, B, pi = model
T = Observe.size
alpha = pi*B[:, Observe[0]]
alpha_all = np.copy(alpha).reshape((1, -1))
# print "(1)计算初值alpha_1(i): ",alpha
#
# print "(2) 递推..."
for t in xrange(0, T-1):
alpha = alpha.dot(A)*B[:, Observe[t+1]]
alpha_all = np.append(alpha_all, alpha.reshape((1, -1)), 0)
# print "t=", t + 1, " alpha_", t + 1, "(i):", alpha
# print "(3)终止。alpha_",T,"(i): ", alpha
# print "输出Prob: ",alpha.sum()
return alpha.sum(),alpha_all
def backward_prob(model,Observe,States):
'''
马尔科夫后向算法
'''
A, B, pi = model
M = States.size
T = Observe.size
beta = np.ones((M,)) # beta_T
beta_all = np.copy(beta).reshape((1,-1))
# print "(1)计算初值beta_",T,"(i): ", beta
# print "(2) 递推..."
for t in xrange(T - 2, -1, -1): # t=T-2,...,0
beta = A.dot(B[:, Observe[t + 1]] * beta)
beta_all = np.append(beta_all, beta.reshape((1, -1)), 0)
# print "t=", t + 1, " beta_", t + 1, "(i):", beta
beta_all = beta_all[::-1,:]
# print "(3)终止。alpha_", 1, "(i): ", beta
prob = pi.dot(beta * B[:, Observe[0]])
# print "输出Prob: ", prob
return prob,beta_all
def getPar(model, Observe, States):
'''得到alpha_all和beta_all'''
T = Observe.size
prob_1, alpha_all = forward_prob(model, Observe)
prob_2, beta_all = backward_prob(model, Observe, States)
#print "alpha_all: ", alpha_all
#print "beta_all: ", beta_all
'''根据alpha_all和beta_all计算gamma和xi矩阵'''
# 计算gamma矩阵
denominator = (alpha_all * beta_all).sum(1)
#print denominator
#print alpha_all * beta_all
gamma = alpha_all * beta_all / denominator.reshape((-1, 1))
# print "gamma is :"
# print gamma # gamma行表示时刻t,列表示状态i
# # 计算xi矩阵
item_t = []
for t in xrange(0, T - 1):
item_t.append(((alpha_all[t] * (A.T)).T) * (B[:, Observe[t + 1]] * beta_all[t + 1]))
ProOut_t = np.array(item_t)
denomin = ProOut_t.sum(1).sum(1)
xi = ProOut_t / (denomin.reshape(-1, 1, 1)) # xi axi0表示时刻t,axi1和2表示对应的i,j
# print "xi is :"
# print xi
'''根据gamma 和xi 计算几个重要的期望值'''
# print ProOut_t
iTga = gamma.sum(0)
iT_1ga = gamma[0:-1, :].sum(0)
ijT_1xi = xi.sum(0)
return gamma, xi, iTga, iT_1ga, ijT_1xi
def baum_welch(Observe,States,modell,epsion=0.001):
model = modell
A,B,pi = model
M = B.shape[1]
done = False
while not done:
gamma, xi, iTga, iT_1ga, ijT_1xi = getPar(model,Observe,States)
new_A = ijT_1xi/iT_1ga
bb = []
for k in xrange(0,M):
I = np.where(k == Observe, 1,0)
gamma_temp = ((gamma.T)*I).T
bb.append(gamma_temp.sum(0)/iTga)
new_B = np.array(bb).T
#print new_B
new_pi = gamma[0]
if np.max(abs(new_A-A))<epsion and
np.max(abs(new_B-B))<epsion and
np.max(abs(new_pi-pi))<epsion:
done = True
A = new_A
B = new_B
pi = new_pi
model = (A,B,pi)
return model
if __name__ == '__main__':
#这是我们的实际模型参数:
A = np.array([[0.5, 0.2, 0.3],
[0.3, 0.5, 0.2],
[0.2, 0.3, 0.5]])
B = np.array([[0.5, 0.5],
[0.4, 0.6],
[0.7, 0.3]])
pi = np.array([0.2, 0.4, 0.4])
model = (A, B, pi)
#下面我们用实际的模型参数来生成测试数据
Observe, States = simulate(model,100)
print "Observe
",Observe
print "States
", States
#模型初始化
iniA = np.array([[0.3, 0.3, 0.3],
[0.3, 0.3, 0.3],
[0.3, 0.3, 0.3]])
iniB = np.array([[0.5, 0.5],
[0.5, 0.5],
[0.5, 0.5]])
inipi = np.array([0.3, 0.3, 0.3])
inimodel = (iniA, iniB, inipi)
model = baum_welch(Observe,States,inimodel,0.001)
print "A: "
print model[0]
print "B: "
print model[1]
print "pi: "
print model[2]
从实验结果上看,A矩阵的估计时最准确的,B矩阵稍差,pi最不准确。对于隐马尔科夫的参数估计,采用非监督的baum-welch方法并不是特别出色。需要有更多的样本数据。