欲知更多,请关注公众号:音频探险记
LPC
线性预测系数的基本思想:由于语音样点之间存在着相关性,那么当前点/未来点可以用过去的p个样本点进行预测,即
其中就是要求的LPC,P表示预测阶数。
好处:可以得到声道模型及其模型参数的方法,广泛用于语音识别以及语音合成中。
import librosa
import python_speech_features
import soundfile as sf
import numpy as np
def lpc(y, order):
dtype = y.dtype.type
ar_coeffs = np.zeros(order + 1, dtype=dtype)
ar_coeffs[0] = dtype(1) # 1.0
ar_coeffs_prev = np.zeros(order + 1, dtype=dtype)
ar_coeffs_prev[0] = dtype(1)
# 前向和后向的预测误差
fwd_pred_error = y[1:]
bwd_pred_error = y[:-1]
den = np.dot(fwd_pred_error, fwd_pred_error) + np.dot(bwd_pred_error, bwd_pred_error)
for i in range(order):
if den <= 0:
raise FloatingPointError("numerical error, input ill-conditioned?")
reflect_coeff = dtype(-2) * np.dot(bwd_pred_error, fwd_pred_error) / dtype(den)
ar_coeffs_prev, ar_coeffs = ar_coeffs, ar_coeffs_prev
for j in range(1, i+2):
ar_coeffs[j] = ar_coeffs_prev[j] + reflect_coeff * ar_coeffs_prev[i - j + 1]
# 前向预测误差和后向预测误差更新
fwd_pred_error_tmp = fwd_pred_error
fwd_pred_error = fwd_pred_error + reflect_coeff * bwd_pred_error
bwd_pred_error = bwd_pred_error + reflect_coeff * fwd_pred_error_tmp
q = dtype(1) - reflect_coeff ** 2
den = q * den - bwd_pred_error[-1]**2 - fwd_pred_error[0]**2
fwd_pred_error = fwd_pred_error[1:]
bwd_pred_error = bwd_pred_error[:-1]
return ar_coeffs
y, sr = sf.read('q1.wav')
frame_size = 160
num_frames = len(y) // frame_size
print(lpc(y, 32))
'''
[ 1.00000000e+00 -3.95327600e+00 8.29868847e+00 -1.27752183e+01
1.60320420e+01 -1.71512784e+01 1.59802135e+01 -1.29520778e+01
8.83717438e+00 -4.58646820e+00 8.91619704e-01 1.80827086e+00
-3.30606685e+00 3.68847432e+00 -3.13823922e+00 2.08868507e+00
-1.04485702e+00 1.47865339e-01 6.35567557e-01 -1.15391128e+00
1.35048967e+00 -1.27918423e+00 9.65718801e-01 -5.09474786e-01
5.94380366e-03 4.28867366e-01 -7.08129489e-01 8.19126446e-01
-7.55779509e-01 5.73570390e-01 -3.63595930e-01 1.78320700e-01
-4.54597679e-02]
'''
LPCC
LPCC全称线性预测倒谱系数(linear predictive cepstral coefficient, LPCC),可以在计算得到LPC后,如果如下的计算公式计算得到LPCC
LPCC是LPC系数在倒谱域的表示,计算量小易于实现,对元音的描述能力较好,对辅音的描述能力较差,抗噪性能差[1]
[1] 数字语音处理及MATLAB仿真
相应代码如下
import librosa
import python_speech_features
import soundfile as sf
import numpy as np
def lpc(y, order):
dtype = y.dtype.type
ar_coeffs = np.zeros(order + 1, dtype=dtype)
ar_coeffs[0] = dtype(1) # 1.0
ar_coeffs_prev = np.zeros(order + 1, dtype=dtype)
ar_coeffs_prev[0] = dtype(1)
# 前向和后向的预测误差
fwd_pred_error = y[1:]
bwd_pred_error = y[:-1]
den = np.dot(fwd_pred_error, fwd_pred_error) + np.dot(bwd_pred_error, bwd_pred_error)
for i in range(order):
if den <= 0:
raise FloatingPointError("numerical error, input ill-conditioned?")
reflect_coeff = dtype(-2) * np.dot(bwd_pred_error, fwd_pred_error) / dtype(den)
ar_coeffs_prev, ar_coeffs = ar_coeffs, ar_coeffs_prev
for j in range(1, i+2):
ar_coeffs[j] = ar_coeffs_prev[j] + reflect_coeff * ar_coeffs_prev[i - j + 1]
# 前向预测误差和后向预测误差更新
fwd_pred_error_tmp = fwd_pred_error
fwd_pred_error = fwd_pred_error + reflect_coeff * bwd_pred_error
bwd_pred_error = bwd_pred_error + reflect_coeff * fwd_pred_error_tmp
q = dtype(1) - reflect_coeff ** 2
den = q * den - bwd_pred_error[-1]**2 - fwd_pred_error[0]**2
fwd_pred_error = fwd_pred_error[1:]
bwd_pred_error = bwd_pred_error[:-1]
return ar_coeffs
y, sr = sf.read('q1.wav')
# 得到lpc系数
lpc_coeff = lpc(y, 32)
lpc_order = 32
# lpcc 系数个数
lpcc_order = 48
lpcc_coeff = np.zeros(lpcc_order)
lpcc_coeff[0] = lpc_coeff[0]
for m in range(1, lpc_order):
lpcc_coeff[m] = lpc_coeff[m]
for k in range(0,m):
lpcc_coeff[m] = lpc_coeff[m] + lpcc_coeff[k] * lpc_coeff[m - k] * k / m
for m in range(lpc_order, lpcc_order):
for k in range(m - lpc_order, m):
lpcc_coeff[m] = lpcc_coeff[m] + lpcc_coeff[k] * lpc_coeff[m - k] * k / m
print(lpcc_coeff)
print(lpc_coeff)
'''
[ 1.00000000e+00 -3.95327600e+00 1.61128841e+01 -5.52410036e+01
1.79819243e+02 -5.85851356e+02 1.94600697e+03 -6.60704007e+03
2.28633585e+04 -8.03469568e+04 2.85871218e+05 -1.02738713e+06
3.72307949e+06 -1.35861755e+07 4.98734770e+07 -1.84019377e+08
6.82011923e+08 -2.53758247e+09 9.47444366e+09 -3.54837702e+10
1.33263280e+11 -5.01739551e+11 1.89335516e+12 -7.15952267e+12
2.71242538e+13 -1.02940475e+14 3.91300106e+14 -1.48962409e+15
5.67857747e+15 -2.16748811e+16 8.28305613e+16 -3.16889100e+17
2.21650898e+18 -1.23415673e+19 6.99397095e+19 -3.97121614e+20
2.25665287e+21 -1.28331886e+22 7.30333576e+22 -4.15919083e+23
2.37018398e+24 -1.35153342e+25 7.71134177e+25 -4.40229745e+26
2.51457017e+27 -1.43705211e+28 8.21666908e+28 -4.70028689e+29]
'''