• mfcc的特征提取python 代码实现和解析


     1 #!/usr/bin/python
     2 # -*- coding: UTF-8 -*-
     3 
     4 import numpy
     5 import scipy.io.wavfile
     6 from matplotlib import pyplot as plt
     7 from scipy.fftpack import dct
     8 
     9 sample_rate,signal=scipy.io.wavfile.read('stop.wav')
    10 
    11 print(sample_rate,len(signal))
    12 #读取前3.5s 的数据
    13 signal=signal[0:int(3.5*sample_rate)]
    14 print(signal)
    15 
    16 
    17 
    18 #预先处理
    19 pre_emphasis = 0.97
    20 emphasized_signal = numpy.append(signal[0], signal[1:] - pre_emphasis * signal[:-1])
    21 
    22 
    23 frame_size=0.025
    24 frame_stride=0.1
    25 frame_length,frame_step=frame_size*sample_rate,frame_stride*sample_rate
    26 signal_length=len(emphasized_signal)
    27 frame_length=int(round(frame_length))
    28 frame_step=int(round(frame_step))
    29 num_frames=int(numpy.ceil(float(numpy.abs(signal_length-frame_length))/frame_step))
    30 
    31 
    32 pad_signal_length=num_frames*frame_step+frame_length
    33 z=numpy.zeros((pad_signal_length-signal_length))
    34 pad_signal=numpy.append(emphasized_signal,z)
    35 
    36 
    37 indices = numpy.tile(numpy.arange(0, frame_length), (num_frames, 1)) + numpy.tile(numpy.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
    38 
    39 frames = pad_signal[numpy.mat(indices).astype(numpy.int32, copy=False)]
    40 
    41 #加上汉明窗
    42 frames *= numpy.hamming(frame_length)
    43 # frames *= 0.54 - 0.46 * numpy.cos((2 * numpy.pi * n) / (frame_length - 1))  # Explicit Implementation **
    44 
    45 #傅立叶变换和功率谱
    46 NFFT = 512
    47 mag_frames = numpy.absolute(numpy.fft.rfft(frames, NFFT))  # Magnitude of the FFT
    48 #print(mag_frames.shape)
    49 pow_frames = ((1.0 / NFFT) * ((mag_frames) ** 2))  # Power Spectrum
    50 
    51 
    52 
    53 low_freq_mel = 0
    54 #将频率转换为Mel
    55 nfilt = 40
    56 high_freq_mel = (2595 * numpy.log10(1 + (sample_rate / 2) / 700))
    57 mel_points = numpy.linspace(low_freq_mel, high_freq_mel, nfilt + 2)  # Equally spaced in Mel scale
    58 hz_points = (700 * (10**(mel_points / 2595) - 1))  # Convert Mel to Hz
    59 
    60 bin = numpy.floor((NFFT + 1) * hz_points / sample_rate)
    61 
    62 fbank = numpy.zeros((nfilt, int(numpy.floor(NFFT / 2 + 1))))
    63 
    64 for m in range(1, nfilt + 1):
    65     f_m_minus = int(bin[m - 1])   # left
    66     f_m = int(bin[m])             # center
    67     f_m_plus = int(bin[m + 1])    # right
    68     for k in range(f_m_minus, f_m):
    69         fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1])
    70     for k in range(f_m, f_m_plus):
    71         fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m])
    72 filter_banks = numpy.dot(pow_frames, fbank.T)
    73 filter_banks = numpy.where(filter_banks == 0, numpy.finfo(float).eps, filter_banks)  # Numerical Stability
    74 filter_banks = 20 * numpy.log10(filter_banks)  # dB
    75 
    76 num_ceps = 12
    77 mfcc = dct(filter_banks, type=2, axis=1, norm='ortho')[:, 1 : (num_ceps + 1)]
    78 (nframes, ncoeff) = mfcc.shape
    79 
    80 n = numpy.arange(ncoeff)
    81 cep_lifter =22
    82 lift = 1 + (cep_lifter / 2) * numpy.sin(numpy.pi * n / cep_lifter)
    83 mfcc *= lift  #*
    84 
    85 #filter_banks -= (numpy.mean(filter_banks, axis=0) + 1e-8)
    86 mfcc -= (numpy.mean(mfcc, axis=0) + 1e-8)
    87 
    88 print(mfcc.shape)
    89 plt.plot(filter_banks)
    90 
    91 plt.show()

    测试结果:

  • 相关阅读:
    UITabbarItem只显示图标
    [转]translatesAutoresizingMaskIntoConstraints详解
    [转载]podfile语法
    获取数组NSArray元素的className
    HTTP的FormData和Payload的传输格式
    WCDB错误"No matching constructor for initialization of 'WCTColumnBinding'"
    UIStakView的添加与移除
    为什么说Python采用的是基于值的内存管理模式?
    PostgreSQL数据库
    标准库 time
  • 原文地址:https://www.cnblogs.com/dylancao/p/9790707.html
Copyright © 2020-2023  润新知