# 设置分句的标志符号;可以根据实际需要进行修改 # cutlist = "。!?".decode('utf-8') cutlist = [' ', ' ', '。', ';', '?', '.', ';', '?', '...', '、、、', ':'] # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',','] # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',',','、'] # 检查某字符是否分句标志符号的函数;如果是,返回True,否则返回False def FindToken(cutlist, char): if char in cutlist: return True else: return False # 进行分句的核心函数 def Cut(cutlist, lines): # 参数1:引用分句标志符;参数2:被分句的文本,为一行中文字符 l = [] # 句子列表,用于存储单个分句成功后的整句内容,为函数的返回值 line = [] # 临时列表,用于存储捕获到分句标志符之前的每个字符,一旦发现分句符号后,就会将其内容全部赋给l,然后就会被清空 for i in lines: # 对函数参数2中的每一字符逐个进行检查 (本函数中,如果将if和else对换一下位置,会更好懂) if FindToken(cutlist, i): # 如果当前字符是分句符号 line.append(i) # 将此字符放入临时列表中 l.append(''.join(line)) # 并把当前临时列表的内容加入到句子列表中 line = [] # 将符号列表清空,以便下次分句使用 else: # 如果当前字符不是分句符号,则将该字符直接放入临时列表中 line.append(i) return l r_s = [] # 以下为调用上述函数实现从文本文件中读取内容并进行分句。 # with open('mybaidu.parp.b.txt','r',encoding='utf-8') as fr : # for lines in fr: # l = Cut(list(cutlist), list(lines)) # for line in l: # if len(line.replace(' ', '')) == 0: # continue # if line.strip() != "": # line=line.strip() # r_s.append(line) # # # li = line.strip().split() # # for sentence in li: # # r_s.append(sentence) str_ = '' # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',',' '] with open('mybaidu.parp.b.txt', 'r', encoding='utf-8') as fr: for lines in fr: if len(lines.replace(' ', '')) == 0: continue # str_='{}{}'.format(str_,lines.replace(' ','')) # if len(lines.replace(' ','').replace(' ',''))==0: # continue str_ = '{}{}'.format(str_, lines) # l = Cut(list(cutlist), list(lines)) # for line in l: # if line.strip() != "": # line=line.strip() from aip import AipSpeech bd_k_l = ['11059852', '5Kk01GtG2fjCwpzEkwdn0mjw', 'bp6Wyx377Elq7RsCQZzTBgGUFzLm8G2A'] APP_ID, API_KEY, SECRET_KEY = bd_k_l import math bd_str_per_limit=1024 rep_times=math.ceil(len(str_)/bd_str_per_limit) for i in range(rep_times): cut_str=str_[i*bd_str_per_limit:i*bd_str_per_limit+bd_str_per_limit] mp3_dir = 'C:\Users\sas\PycharmProjects\produce_video\result_liukeyun\' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) result = client.synthesis(str_, 'zh', 1, { 'vol': 5, }) uid = 'liukeyuanCAKE_whole_para' # 识别正确返回语音二进制 错误则返回dict 参照下面错误码 f_w = '{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid', '.mp3') if not isinstance(result, dict): # f_w = '{}{}{}{}'.format(mp3_dir, 'g3uid', uid, '.mp3') f_w = '{}{}{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid','bd_str_per_limit',i, '.mp3') # ,'g3db',uid,'g3uid' # with open('auido.b.mp3', 'wb') as f: with open(f_w, 'wb') as f: f.write(result) import os os._exit(2)
换行符影响
# 设置分句的标志符号;可以根据实际需要进行修改 # cutlist = "。!?".decode('utf-8') cutlist = [' ', ' ', '。', ';', '?', '.', ';', '?', '...', '、、、', ':'] # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',','] # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',',','、'] # 检查某字符是否分句标志符号的函数;如果是,返回True,否则返回False def FindToken(cutlist, char): if char in cutlist: return True else: return False # 进行分句的核心函数 def Cut(cutlist, lines): # 参数1:引用分句标志符;参数2:被分句的文本,为一行中文字符 l = [] # 句子列表,用于存储单个分句成功后的整句内容,为函数的返回值 line = [] # 临时列表,用于存储捕获到分句标志符之前的每个字符,一旦发现分句符号后,就会将其内容全部赋给l,然后就会被清空 for i in lines: # 对函数参数2中的每一字符逐个进行检查 (本函数中,如果将if和else对换一下位置,会更好懂) if FindToken(cutlist, i): # 如果当前字符是分句符号 line.append(i) # 将此字符放入临时列表中 l.append(''.join(line)) # 并把当前临时列表的内容加入到句子列表中 line = [] # 将符号列表清空,以便下次分句使用 else: # 如果当前字符不是分句符号,则将该字符直接放入临时列表中 line.append(i) return l r_s = [] # 以下为调用上述函数实现从文本文件中读取内容并进行分句。 # with open('mybaidu.parp.b.txt','r',encoding='utf-8') as fr : # for lines in fr: # l = Cut(list(cutlist), list(lines)) # for line in l: # if len(line.replace(' ', '')) == 0: # continue # if line.strip() != "": # line=line.strip() # r_s.append(line) # # # li = line.strip().split() # # for sentence in li: # # r_s.append(sentence) str_ = '' # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',',' '] with open('mybaidu.parp.b.txt', 'r', encoding='utf-8') as fr: for lines in fr: if len(lines.replace(' ', '')) == 0: continue # str_='{}{}'.format(str_,lines.replace(' ','')) # if len(lines.replace(' ','').replace(' ',''))==0: # continue str_ = '{}{}'.format(str_, lines.replace(' ','')) # l = Cut(list(cutlist), list(lines)) # for line in l: # if line.strip() != "": # line=line.strip() from aip import AipSpeech bd_k_l = ['11059852', '5Kk01GtG2fjCwpzEkwdn0mjw', 'bp6Wyx377Elq7RsCQZzTBgGUFzLm8G2A'] APP_ID, API_KEY, SECRET_KEY = bd_k_l import math bd_str_per_limit=1024 rep_times=math.ceil(len(str_)/bd_str_per_limit) for i in range(rep_times): cut_str=str_[i*bd_str_per_limit:i*bd_str_per_limit+bd_str_per_limit] print(cut_str) print('----------------------------------') mp3_dir = 'C:\Users\sas\PycharmProjects\produce_video\result_liukeyun\' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) result = client.synthesis(cut_str, 'zh', 1, { 'vol': 5, }) uid = 'liukeyuanCAKE_whole_para' # 识别正确返回语音二进制 错误则返回dict 参照下面错误码 f_w = '{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid', '.mp3') if not isinstance(result, dict): # f_w = '{}{}{}{}'.format(mp3_dir, 'g3uid', uid, '.mp3') f_w = '{}{}{}{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid','bd_str_per_limit','_NO_trN_',i, '.mp3') # ,'g3db',uid,'g3uid' # with open('auido.b.mp3', 'wb') as f: with open(f_w, 'wb') as f: f.write(result) import os os._exit(2)
# 设置分句的标志符号;可以根据实际需要进行修改 # cutlist = "。!?".decode('utf-8') cutlist = [' ', ' ', '。', ';', '?', '.', ';', '?', '...', '、、、', ':'] # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',','] # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',',','、'] # 检查某字符是否分句标志符号的函数;如果是,返回True,否则返回False def FindToken(cutlist, char): if char in cutlist: return True else: return False # 进行分句的核心函数 def Cut(cutlist, lines): # 参数1:引用分句标志符;参数2:被分句的文本,为一行中文字符 l = [] # 句子列表,用于存储单个分句成功后的整句内容,为函数的返回值 line = [] # 临时列表,用于存储捕获到分句标志符之前的每个字符,一旦发现分句符号后,就会将其内容全部赋给l,然后就会被清空 for i in lines: # 对函数参数2中的每一字符逐个进行检查 (本函数中,如果将if和else对换一下位置,会更好懂) if FindToken(cutlist, i): # 如果当前字符是分句符号 line.append(i) # 将此字符放入临时列表中 l.append(''.join(line)) # 并把当前临时列表的内容加入到句子列表中 line = [] # 将符号列表清空,以便下次分句使用 else: # 如果当前字符不是分句符号,则将该字符直接放入临时列表中 line.append(i) return l r_s = [] # 以下为调用上述函数实现从文本文件中读取内容并进行分句。 # with open('mybaidu.parp.b.txt','r',encoding='utf-8') as fr : # for lines in fr: # l = Cut(list(cutlist), list(lines)) # for line in l: # if len(line.replace(' ', '')) == 0: # continue # if line.strip() != "": # line=line.strip() # r_s.append(line) # # # li = line.strip().split() # # for sentence in li: # # r_s.append(sentence) str_ = '' # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',',' '] with open('mybaidu.parp.b.txt', 'r', encoding='utf-8') as fr: for lines in fr: # if len(lines.replace(' ', '')) == 0: # continue # str_='{}{}'.format(str_,lines.replace(' ','')) if len(lines.replace(' ','').replace(' ',''))==0: continue str_ = '{}{}'.format(str_, lines.replace(' ','')) # l = Cut(list(cutlist), list(lines)) # for line in l: # if line.strip() != "": # line=line.strip() from aip import AipSpeech bd_k_l = ['11059852', '5Kk01GtG2fjCwpzEkwdn0mjw', 'bp6Wyx377Elq7RsCQZzTBgGUFzLm8G2A'] APP_ID, API_KEY, SECRET_KEY = bd_k_l import math #bd_str_per_limit=1024 bd_str_per_limit=300 rep_times=math.ceil(len(str_)/bd_str_per_limit) for i in range(rep_times): cut_str=str_[i*bd_str_per_limit:i*bd_str_per_limit+bd_str_per_limit] print(cut_str) print('----------------------------------') mp3_dir = 'C:\Users\sas\PycharmProjects\produce_video\result_liukeyun\' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) result = client.synthesis(cut_str, 'zh', 1, { 'vol': 5, }) uid = 'CAKE' # 识别正确返回语音二进制 错误则返回dict 参照下面错误码 f_w = '{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid', '.mp3') if not isinstance(result, dict): # f_w = '{}{}{}{}'.format(mp3_dir, 'g3uid', uid, '.mp3') f_w = '{}{}{}{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid','noBRBlankLine','',i, '.mp3') # ,'g3db',uid,'g3uid' # with open('auido.b.mp3', 'wb') as f: with open(f_w, 'wb') as f: f.write(result) import os os._exit(2)