• 关于语音合成和识别


    最近研究了下语音合成和语音识别。分别看了一些文章,也下载jdk写了些代码测试了下。

    发现,对于语音合成。中文来说,百度语音和科大讯飞,基本都差不多。

    英文的话,百度合成出来的效果不佳。科大讯飞稍好点。但是总体都没有国外语音合成好。比如 iSpeech、FreeTTS,可能国外的主语都是英语的缘故吧。

    百度日调用额度比较多,据说有2万额度。讯飞每天就500,有点少。iSpeech 是要收费的。FreeTTS 可以离线使用。

    百度识别和合成代码:

    public class SoundAPI
    {
    	private static final Logger logger = LoggerFactory.getLogger(SoundAPI.class);
    	final static String FILE_PATH = Config.getString("download.folder");
    	// 设置APPID/AK/SK
    	private static final String APP_ID = "你的APP ID";
    	private static final String API_KEY = "你的key";
    	private static final String SECRET_KEY = "你的秘钥";
    	// 初始化一个AipSpeech
    	private static AipSpeech client = null;
    	private static long iniTime = 0L;
    	/** 30 天 24 小时 **/
    	private static final long MONTH_TIME = 30 * 24 * 60 * 60 * 1000;
    	private static final Base64 base64 = new Base64();
    
    	private static void iniAPI()
    	{
    		boolean needToReset = false;
    		// 判断是否一个月了,如果一个月后,需要重新初始话
    		long currentTime = System.currentTimeMillis();
    		if (currentTime - iniTime > MONTH_TIME)
    		{
    			needToReset = true;
    		}
    		if (client == null || needToReset)
    		{
    			client = new AipSpeech(APP_ID, API_KEY, SECRET_KEY);
    			/** 2秒超时时间 **/
    			client.setConnectionTimeoutInMillis(2000);
    
    			iniTime = System.currentTimeMillis();
    		}
    	}
    
    	public static String getSoundMp3(String text, String fileName, QuestionTypeEnum questionType)
    	{
    		String rtnfileName = "";
    		String type = "zh";
    		if (StringUtils.isEmpty(text))
    			return "";
    
    		try
    		{
    			iniAPI();
    
    			if (QuestionTypeEnum.ENGLISH_WORD.getType().equals(questionType.getType()))
    			{
    				type = "en";
    			}
    
    			TtsResponse res = client.synthesis(text, type, 1, null);
    			byte[] data = res.getData();
    			if (data != null)
    			{
    
    				// String uuid = UUID.randomUUID().toString().replace("-",
    				// "").toLowerCase();
    				String uuid = base64.encodeToString(fileName.getBytes());
    				rtnfileName = type + "/" + uuid.replaceAll("=", "") + ".mp3";
    				String path = FILE_PATH + rtnfileName;
    				File file = new File(path);
    				if (!file.exists())
    				{
    					Util.writeBytesToFileSystem(data, path);
    				}
    
    			} else
    			{
    				JSONObject jsonObj = res.getResult();
    				logger.info("invoke baidu synthesis API error:", jsonObj);
    			}
    		} catch (Exception e)
    		{
    			rtnfileName = "";
    			logger.error("invoke baidu synthesis API error:", e);
    		}
    
    		return rtnfileName;
    	}
    
    	public static String recognizeSound(String filePath, QuestionTypeEnum questionType)
    	{
    		String result = "";
    		JSONObject asrRes = null;
    
    		if (StringUtils.isEmpty(filePath))
    			return "";
    
    		try
    		{
    			iniAPI();
    			if (QuestionTypeEnum.ENGLISH_WORD.getType().equals(questionType.getType()))
    			{
    				HashMap<String, Object> options = new HashMap<>();
    				options.put("dev_pid", 1737);
    				asrRes = client.asr(filePath, "pcm", 16000, options);
    			} else
    			{
    				asrRes = client.asr(filePath, "pcm", 16000, null);
    			}
    
    			result = getResult(asrRes);
    
    		} catch (Exception e)
    		{
    			logger.error("invoke baidu asr API error:", e);
    		}
    
    		return result;
    	}
    
    	private static String getResult(JSONObject asrRes)
    	{
    		String result = "";
    		if (asrRes.getInt("err_no") == 0)
    		{
    			JSONArray arrayResult = asrRes.getJSONArray("result");
    			StringBuilder sbResult = new StringBuilder();
    			for (int i = 0; i < arrayResult.length(); i++)
    			{
    				if (i == 0)
    				{
    					sbResult.append(arrayResult.get(i).toString());
    				} else
    				{
    					if (!StringUtils.isEmpty(arrayResult.get(i).toString()))
    						sbResult.append(";" + arrayResult.get(i).toString());
    				}
    			}
    
    			result = sbResult.toString().replaceAll(",", "");
    		} else
    		{
    			logger.error("invoke baidu asr API error:", asrRes);
    		}
    		return result;
    	}
    

      科大讯飞的语音识别及合成

    public class IatAPI
    {
    	private static final Logger logger = LoggerFactory.getLogger(IatAPI.class);
    	/**
    	 * 科大讯飞语音识别写入参考
    	 * https://github.com/IflytekAIUI/DemoCode/blob/master/webapi/java/Iat.java
    	 */
    	final static String APPID = "你的APPID";
    	final static String APPKEY_IAT = "你的秘钥";
    	final static String URL_IAT = "http://api.xfyun.cn/v1/service/v1/iat";
    	final static String IP = "服务器IP地址";
    
    	/**
    	 * 
    	 * 发送语音,获取文字
    	 * 
    	 * @param audioByteArray
    	 * @return
    	 * @throws Exception
    	 */
    	public static String process(String filePath) throws Exception
    	{
    		Map<String, String> header = getHeader("raw", "sms16k");
    		// 读取音频文件,转二进制数组,然后Base64编码
    		byte[] audioByteArray = FileUtil.read2ByteArray(filePath);
    		String audioBase64 = new String(Base64.encodeBase64(audioByteArray), "UTF-8");
    		String bodyParam = "audio=" + audioBase64;
    		// logger.info(bodyParam);
    		String result = HttpUtil.doPost(URL_IAT, header, bodyParam);
    
    		return result;
    	}
    
    	/**
    	 * 组装http请求头
    	 * 
    	 * @param aue
    	 * @param resultLevel
    	 * @param language
    	 * @param category
    	 * @return
    	 * @throws UnsupportedEncodingException
    	 */
    	private static Map<String, String> getHeader(String aue, String engineType) throws UnsupportedEncodingException
    	{
    		// 系统当前时间戳
    		String X_CurTime = System.currentTimeMillis() / 1000L + "";
    		// 业务参数
    		String param = "{"aue":"" + aue + """ + ","engine_type":"" + engineType + ""}";
    		String X_Param = new String(Base64.encodeBase64(param.getBytes("UTF-8")));
    		// 接口密钥
    		String apiKey = APPKEY_IAT;
    		// 讯飞开放平台应用ID
    		String X_Appid = APPID;
    		// 生成令牌
    		String X_CheckSum = DigestUtils.md5Hex(apiKey + X_CurTime + X_Param);
    
    		// 组装请求头
    		Map<String, String> header = new HashMap<String, String>();
    		header.put("Content-Type", "application/x-www-form-urlencoded; charset=utf-8");
    		header.put("X-Param", X_Param);
    		header.put("X-CurTime", X_CurTime);
    		header.put("X-CheckSum", X_CheckSum);
    		header.put("X-Appid", X_Appid);
    		header.put("X-Real-Ip", IP);
    		return header;
    
    	}
    

      

    public class TtsAPI
    {
    	private static final Logger logger = LoggerFactory.getLogger(TtsAPI.class);
    	/**
    	 * 科大讯飞语音识别写入参考
    	 * https://github.com/IflytekAIUI/DemoCode/blob/master/webapi/java/Iat.java
    	 */
    	final static String APPID = "你的APP id";
    	final static String APPKEY_TTS = "你的秘钥";
    	final static String URL_TTS = "http://api.xfyun.cn/v1/service/v1/tts";
    	final static String IP = "服务器地址";
    	final static String FILE_PATH = Config.getString("download.folder");
    
    	/**
    	 * 
    	 * 发送文字,获取语音
    	 * 
    	 * @param text
    	 * @throws Exception
    	 */
    	public static String process(String text) throws Exception
    	{
    		String result = null;
    		Long startTime = System.currentTimeMillis();
    		try
    		{
    			Map<String, String> header = getHeader("audio/L16;rate=16000", "lame", "xiaoyan", "50", "50", "", "text",
    					"50");
    			Map<String, Object> resultMap = HttpUtil.doMultiPost(URL_TTS, header, "text=" + text);
    			// 合成成功
    			if ("audio/mpeg".equals(resultMap.get("Content-Type")))
    			{
    				FileUtil.save(FILE_PATH, resultMap.get("sid") + ".mp3", (byte[]) resultMap.get("body"));
    				result = resultMap.get("sid") + ".mp3";
    			} else
    			{ // 合成失败
    				logger.error(resultMap.get("body").toString());
    			}
    		} catch (Exception e)
    		{
    			logger.error("there is error:", e);
    		}
    
    		Long endTime = System.currentTimeMillis();
    		logger.info("finish get voice:" + (endTime - startTime));
    
    		return result;
    	}
    
    	/**
    	 * 组装http请求头
    	 * 
    	 * @param aue
    	 * @param resultLevel
    	 * @param language
    	 * @param category
    	 * @return
    	 * @throws UnsupportedEncodingException
    	 */
    	private static Map<String, String> getHeader(String auf, String aue, String voiceName, String speed, String volume,
    			String engineType, String textType, String pitch) throws UnsupportedEncodingException
    	{
    		String curTime = System.currentTimeMillis() / 1000L + "";
    		StringBuilder param = new StringBuilder("{"auf":"" + auf + """);
    		if (!StringUtil.isNullOrEmpty(aue))
    		{
    			param.append(","aue":"" + aue + """);
    		}
    		if (!StringUtil.isNullOrEmpty(voiceName))
    		{
    			param.append(","voice_name":"" + voiceName + """);
    		}
    		if (!StringUtil.isNullOrEmpty(speed))
    		{
    			param.append(","speed":"" + speed + """);
    		}
    		if (!StringUtil.isNullOrEmpty(volume))
    		{
    			param.append(","volume":"" + volume + """);
    		}
    		if (!StringUtil.isNullOrEmpty(pitch))
    		{
    			param.append(","pitch":"" + pitch + """);
    		}
    		if (!StringUtil.isNullOrEmpty(engineType))
    		{
    			param.append(","engine_type":"" + engineType + """);
    		}
    		if (!StringUtil.isNullOrEmpty(textType))
    		{
    			param.append(","text_type":"" + textType + """);
    		}
    		param.append("}");
    
    		String paramBase64 = new String(Base64.encodeBase64(param.toString().getBytes("UTF-8")));
    		String checkSum = DigestUtils.md5Hex(APPKEY_TTS + curTime + paramBase64);
    		Map<String, String> header = new HashMap<String, String>();
    		header.put("Content-Type", "application/x-www-form-urlencoded; charset=utf-8");
    		header.put("X-Param", paramBase64);
    		header.put("X-CurTime", curTime);
    		header.put("X-CheckSum", checkSum);
    		header.put("X-Real-Ip", IP);
    		header.put("X-Appid", APPID);
    		// logger.info(JSON.toJSONString(header));
    		return header;
    	}
    

      

  • 相关阅读:
    JAVA类型之间的转换
    Mysql语句
    Tomcat 优化
    JVM原理及调优
    static
    指针与引用
    sizeof
    遇到问题:c++ 直接cout输出char类型变量地址乱码
    编程过程中全面考虑问题的能力
    表、栈和队列
  • 原文地址:https://www.cnblogs.com/liguoyi/p/9231607.html
Copyright © 2020-2023  润新知