话不多说,直接怼代码,有不懂的,可以留言
简单的实现,前后端的语音交互。
import os from uuid import uuid4 from aip import AipSpeech from aip import AipNlp import settings """ 你的 APPID AK SK """ APP_ID = '11617876' API_KEY = 'KqqpO9GclBimrcSNrSANPhUQ' SECRET_KEY = 'xc7IFW4w6DVtuNQlMkBX05Ulhx5Mm5zh' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) nlp_client = AipNlp(APP_ID, API_KEY, SECRET_KEY) # 音频转为文本 def audio2text(file_name): file_path = os.path.join(settings.AUDIO_PCM_DIR, file_name) cmd_str = f'ffmpeg -y -i {file_path} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {file_path}.pcm' os.system(cmd_str) with open(f'{file_path}.pcm', 'rb') as f: audio_context = f.read() res = client.asr(audio_context, 'pcm', 16000, { "dev_pid": 1537 }) print("res",res) if res.get('err_no'): return res return res.get('result')[0] # 文本转为音频 def text2audio(text): file_name = f"{uuid4()}.mp3" file_path = os.path.join(settings.AUDIO_DIR, file_name) res = client.synthesis(text, 'zh', 1, { "vol": 5, 'pit': 7, "spd": 4, "per": 4 }) if isinstance(res, dict): return res with open(file_path, 'wb') as f: f.write(res) return file_name # 词法的匹配分析 def my_nlp(text): print("text", nlp_client.simnet('你今年几岁了', text)) if nlp_client.simnet('你今年几岁了', text).get('score') >= 0.72: return '我今年73了,不然84也行' elif nlp_client.simnet('你叫什么名字', text).get('score') >= 0.72: return '我的名字叫傻逼' elif nlp_client.simnet('你在哪儿工作',text).get('score') >= 0.72: return '京东' else: return '不知道你在说什么'
html页面
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>Title</title> </head> <body> <audio autoplay="autoplay" controls id="play_mp3" src="http://127.0.0.1:5000/getfile/ad74226f-3646-40d5-9548-7e817f1cb405.mp3"> 浏览器不支持 </audio> <button onclick="start_reco()">开始录音</button> <button onclick="stop_reco()">结束录音</button> </body> <script type="text/javascript" src="/static/Recorder.js"></script> <script type="text/javascript"> //建立 websocket 连接 var ws = new WebSocket('ws://127.0.0.1:5000/upload'); // ws.onopen = function () { // ws.send("hello") // }; // 如何录音 var reco = null; // 创建一个录音audiocontext对象 var audio_context = new AudioContext(); // 解决浏览器的兼容问题 navigator.getUserMedia = (navigator.getUserMedia || navigator.webkitGetUserMedia || navigator.mozGetUserMedia || navigator.msGetUserMedia); navigator.getUserMedia({audio:true},create_stream,function (err) { console.log(err) }); function create_stream(stream) { var stream_input = audio_context.createMediaStreamSource(stream); reco = new Recorder(stream_input) } // 开始录音 function start_reco() { reco.record(); } // 结束录音 function stop_reco() { reco.stop(); get_audio(); reco.clear(); } // 向后端发送录制的音频 function get_audio() { reco.exportWAV(function (wav_file) { ws.send(wav_file) }) } // 接收后端发送的信息,触发回调函数 ws.onmessage = function (ev) { // 从ev.data中获取参数 var data = JSON.parse(ev.data); document.getElementById("play_mp3").src = "http://127.0.0.1:5000/getfile/"+data.filename; } </script> </html>
后端flask 实现逻辑
import os import json from uuid import uuid4 from geventwebsocket.handler import WebSocketHandler from geventwebsocket.websocket import WebSocket from gevent.pywsgi import WSGIServer from flask import Flask,request,render_template,send_file import baidu_aip import settings app = Flask(__name__) # type:Flask @app.route('/index') def index(): return render_template('index.html') @app.route('/getfile/<filename>') def getfile(filename): file_path = os.path.join(settings.AUDIO_DIR,filename) return send_file(file_path) @app.route('/upload') def upload(): ws = request.environ.get('wsgi.websocket') if not ws: return '请使用websocket连接' while True: message = ws.receive() if isinstance(message,bytearray): # if message: file_name = f"{uuid4()}.wav" file_path = os.path.join(settings.AUDIO_PCM_DIR,file_name) with open(file_path,'wb') as f: f.write(message) asr_str = baidu_aip.audio2text(file_name) answer = baidu_aip.my_nlp(asr_str) file_mp3_name = baidu_aip.text2audio(answer) send_dic = json.dumps({ "filename":file_mp3_name, "play_type":'audio', "sendtime":111 }) ws.send(send_dic) else: ws.close() if __name__ == '__main__': http_server = WSGIServer(('127.0.0.1',5000),app,handler_class=WebSocketHandler) http_server.serve_forever()
附上Recorder.js源码
(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.Recorder = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){ "use strict"; module.exports = require("./recorder").Recorder; },{"./recorder":2}],2:[function(require,module,exports){ 'use strict'; var _createClass = (function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i];descriptor.enumerable = descriptor.enumerable || false;descriptor.configurable = true;if ("value" in descriptor) descriptor.writable = true;Object.defineProperty(target, descriptor.key, descriptor); } }return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps);if (staticProps) defineProperties(Constructor, staticProps);return Constructor; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.Recorder = undefined; var _inlineWorker = require('inline-worker'); var _inlineWorker2 = _interopRequireDefault(_inlineWorker); function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } var Recorder = exports.Recorder = (function () { function Recorder(source, cfg) { var _this = this; _classCallCheck(this, Recorder); this.config = { bufferLen: 4096, numChannels: 2, mimeType: 'audio_pcm/wav' }; this.recording = false; this.callbacks = { getBuffer: [], exportWAV: [] }; Object.assign(this.config, cfg); this.context = source.context; this.node = (this.context.createScriptProcessor || this.context.createJavaScriptNode).call(this.context, this.config.bufferLen, this.config.numChannels, this.config.numChannels); this.node.onaudioprocess = function (e) { if (!_this.recording) return; var buffer = []; for (var channel = 0; channel < _this.config.numChannels; channel++) { buffer.push(e.inputBuffer.getChannelData(channel)); } _this.worker.postMessage({ command: 'record', buffer: buffer }); }; source.connect(this.node); this.node.connect(this.context.destination); //this should not be necessary var self = {}; this.worker = new _inlineWorker2.default(function () { var recLength = 0, recBuffers = [], sampleRate = undefined, numChannels = undefined; self.onmessage = function (e) { switch (e.data.command) { case 'init': init(e.data.config); break; case 'record': record(e.data.buffer); break; case 'exportWAV': exportWAV(e.data.type); break; case 'getBuffer': getBuffer(); break; case 'clear': clear(); break; } }; function init(config) { sampleRate = config.sampleRate; numChannels = config.numChannels; initBuffers(); } function record(inputBuffer) { for (var channel = 0; channel < numChannels; channel++) { recBuffers[channel].push(inputBuffer[channel]); } recLength += inputBuffer[0].length; } function exportWAV(type) { var buffers = []; for (var channel = 0; channel < numChannels; channel++) { buffers.push(mergeBuffers(recBuffers[channel], recLength)); } var interleaved = undefined; if (numChannels === 2) { interleaved = interleave(buffers[0], buffers[1]); } else { interleaved = buffers[0]; } var dataview = encodeWAV(interleaved); var audioBlob = new Blob([dataview], { type: type }); self.postMessage({ command: 'exportWAV', data: audioBlob }); } function getBuffer() { var buffers = []; for (var channel = 0; channel < numChannels; channel++) { buffers.push(mergeBuffers(recBuffers[channel], recLength)); } self.postMessage({ command: 'getBuffer', data: buffers }); } function clear() { recLength = 0; recBuffers = []; initBuffers(); } function initBuffers() { for (var channel = 0; channel < numChannels; channel++) { recBuffers[channel] = []; } } function mergeBuffers(recBuffers, recLength) { var result = new Float32Array(recLength); var offset = 0; for (var i = 0; i < recBuffers.length; i++) { result.set(recBuffers[i], offset); offset += recBuffers[i].length; } return result; } function interleave(inputL, inputR) { var length = inputL.length + inputR.length; var result = new Float32Array(length); var index = 0, inputIndex = 0; while (index < length) { result[index++] = inputL[inputIndex]; result[index++] = inputR[inputIndex]; inputIndex++; } return result; } function floatTo16BitPCM(output, offset, input) { for (var i = 0; i < input.length; i++, offset += 2) { var s = Math.max(-1, Math.min(1, input[i])); output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true); } } function writeString(view, offset, string) { for (var i = 0; i < string.length; i++) { view.setUint8(offset + i, string.charCodeAt(i)); } } function encodeWAV(samples) { var buffer = new ArrayBuffer(44 + samples.length * 2); var view = new DataView(buffer); /* RIFF identifier */ writeString(view, 0, 'RIFF'); /* RIFF chunk length */ view.setUint32(4, 36 + samples.length * 2, true); /* RIFF type */ writeString(view, 8, 'WAVE'); /* format chunk identifier */ writeString(view, 12, 'fmt '); /* format chunk length */ view.setUint32(16, 16, true); /* sample format (raw) */ view.setUint16(20, 1, true); /* channel count */ view.setUint16(22, numChannels, true); /* sample rate */ view.setUint32(24, sampleRate, true); /* byte rate (sample rate * block align) */ view.setUint32(28, sampleRate * 4, true); /* block align (channel count * bytes per sample) */ view.setUint16(32, numChannels * 2, true); /* bits per sample */ view.setUint16(34, 16, true); /* data chunk identifier */ writeString(view, 36, 'data'); /* data chunk length */ view.setUint32(40, samples.length * 2, true); floatTo16BitPCM(view, 44, samples); return view; } }, self); this.worker.postMessage({ command: 'init', config: { sampleRate: this.context.sampleRate, numChannels: this.config.numChannels } }); this.worker.onmessage = function (e) { var cb = _this.callbacks[e.data.command].pop(); if (typeof cb == 'function') { cb(e.data.data); } }; } _createClass(Recorder, [{ key: 'record', value: function record() { this.recording = true; } }, { key: 'stop', value: function stop() { this.recording = false; } }, { key: 'clear', value: function clear() { this.worker.postMessage({ command: 'clear' }); } }, { key: 'getBuffer', value: function getBuffer(cb) { cb = cb || this.config.callback; if (!cb) throw new Error('Callback not set'); this.callbacks.getBuffer.push(cb); this.worker.postMessage({ command: 'getBuffer' }); } }, { key: 'exportWAV', value: function exportWAV(cb, mimeType) { mimeType = mimeType || this.config.mimeType; cb = cb || this.config.callback; if (!cb) throw new Error('Callback not set'); this.callbacks.exportWAV.push(cb); this.worker.postMessage({ command: 'exportWAV', type: mimeType }); } }], [{ key: 'forceDownload', value: function forceDownload(blob, filename) { var url = (window.URL || window.webkitURL).createObjectURL(blob); var link = window.document.createElement('a'); link.href = url; link.download = filename || 'output.wav'; var click = document.createEvent("Event"); click.initEvent("click", true, true); link.dispatchEvent(click); } }]); return Recorder; })(); exports.default = Recorder; },{"inline-worker":3}],3:[function(require,module,exports){ "use strict"; module.exports = require("./inline-worker"); },{"./inline-worker":4}],4:[function(require,module,exports){ (function (global){ "use strict"; var _createClass = (function () { function defineProperties(target, props) { for (var key in props) { var prop = props[key]; prop.configurable = true; if (prop.value) prop.writable = true; } Object.defineProperties(target, props); } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; })(); var _classCallCheck = function (instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }; var WORKER_ENABLED = !!(global === global.window && global.URL && global.Blob && global.Worker); var InlineWorker = (function () { function InlineWorker(func, self) { var _this = this; _classCallCheck(this, InlineWorker); if (WORKER_ENABLED) { var functionBody = func.toString().trim().match(/^functions*w*s*([ws,]*)s*{([wW]*?)}$/)[1]; var url = global.URL.createObjectURL(new global.Blob([functionBody], { type: "text/javascript" })); return new global.Worker(url); } this.self = self; this.self.postMessage = function (data) { setTimeout(function () { _this.onmessage({ data: data }); }, 0); }; setTimeout(function () { func.call(self); }, 0); } _createClass(InlineWorker, { postMessage: { value: function postMessage(data) { var _this = this; setTimeout(function () { _this.self.onmessage({ data: data }); }, 0); } } }); return InlineWorker; })(); module.exports = InlineWorker; }).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) },{}]},{},[1])(1) });