• 语音切割


    # 设置分句的标志符号;可以根据实际需要进行修改
    # cutlist = "。!?".decode('utf-8')
    
    cutlist = ['
    ', '	', '。', ';', '?', '.', ';', '?', '...', '、、、', ':']
    
    
    # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',']
    # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',',','、']
    
    
    # 检查某字符是否分句标志符号的函数;如果是,返回True,否则返回False
    def FindToken(cutlist, char):
        if char in cutlist:
            return True
        else:
            return False
    
    
    # 进行分句的核心函数
    def Cut(cutlist, lines):  # 参数1:引用分句标志符;参数2:被分句的文本,为一行中文字符
        l = []  # 句子列表,用于存储单个分句成功后的整句内容,为函数的返回值
        line = []  # 临时列表,用于存储捕获到分句标志符之前的每个字符,一旦发现分句符号后,就会将其内容全部赋给l,然后就会被清空
    
        for i in lines:  # 对函数参数2中的每一字符逐个进行检查 (本函数中,如果将if和else对换一下位置,会更好懂)
            if FindToken(cutlist, i):  # 如果当前字符是分句符号
                line.append(i)  # 将此字符放入临时列表中
                l.append(''.join(line))  # 并把当前临时列表的内容加入到句子列表中
                line = []  # 将符号列表清空,以便下次分句使用
            else:  # 如果当前字符不是分句符号,则将该字符直接放入临时列表中
                line.append(i)
        return l
    
    
    r_s = []
    # 以下为调用上述函数实现从文本文件中读取内容并进行分句。
    # with open('mybaidu.parp.b.txt','r',encoding='utf-8') as fr :
    #     for lines in fr:
    #         l = Cut(list(cutlist), list(lines))
    #         for line in l:
    #             if len(line.replace(' ', '')) == 0:
    #                 continue
    #             if line.strip() != "":
    #                 line=line.strip()
    #                 r_s.append(line)
    #
    #                 # li = line.strip().split()
    #                 # for sentence in li:
    #                 #     r_s.append(sentence)
    str_ = ''
    
    # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',','
    ']
    
    with open('mybaidu.parp.b.txt', 'r', encoding='utf-8') as fr:
        for lines in fr:
            if len(lines.replace(' ', '')) == 0:
                continue
            # str_='{}{}'.format(str_,lines.replace('
    ',''))
            # if len(lines.replace(' ','').replace('
    ',''))==0:
            #     continue
            str_ = '{}{}'.format(str_, lines)
            # l = Cut(list(cutlist), list(lines))
            # for line in l:
            #     if line.strip() != "":
            #         line=line.strip()
    
    
    from aip import AipSpeech
    
    bd_k_l = ['11059852', '5Kk01GtG2fjCwpzEkwdn0mjw', 'bp6Wyx377Elq7RsCQZzTBgGUFzLm8G2A']
    APP_ID, API_KEY, SECRET_KEY = bd_k_l
    
    
    import math
    bd_str_per_limit=1024
    rep_times=math.ceil(len(str_)/bd_str_per_limit)
    
    for i in range(rep_times):
        cut_str=str_[i*bd_str_per_limit:i*bd_str_per_limit+bd_str_per_limit]
    
        mp3_dir = 'C:\Users\sas\PycharmProjects\produce_video\result_liukeyun\'
    
        client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
        result = client.synthesis(str_, 'zh', 1, {
            'vol': 5,
        })
        uid = 'liukeyuanCAKE_whole_para'
        # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
        f_w = '{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid', '.mp3')
    
        if not isinstance(result, dict):
            # f_w = '{}{}{}{}'.format(mp3_dir, 'g3uid', uid, '.mp3')
            f_w = '{}{}{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid','bd_str_per_limit',i, '.mp3')
            # ,'g3db',uid,'g3uid'
            #  with open('auido.b.mp3', 'wb') as f:
            with open(f_w, 'wb') as f:
                f.write(result)
    
    import os
    os._exit(2)
    

      

    换行符影响 

    # 设置分句的标志符号;可以根据实际需要进行修改
    # cutlist = "。!?".decode('utf-8')
    
    cutlist = ['
    ', '	', '。', ';', '?', '.', ';', '?', '...', '、、、', ':']
    
    
    # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',']
    # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',',','、']
    
    
    # 检查某字符是否分句标志符号的函数;如果是,返回True,否则返回False
    def FindToken(cutlist, char):
        if char in cutlist:
            return True
        else:
            return False
    
    
    # 进行分句的核心函数
    def Cut(cutlist, lines):  # 参数1:引用分句标志符;参数2:被分句的文本,为一行中文字符
        l = []  # 句子列表,用于存储单个分句成功后的整句内容,为函数的返回值
        line = []  # 临时列表,用于存储捕获到分句标志符之前的每个字符,一旦发现分句符号后,就会将其内容全部赋给l,然后就会被清空
    
        for i in lines:  # 对函数参数2中的每一字符逐个进行检查 (本函数中,如果将if和else对换一下位置,会更好懂)
            if FindToken(cutlist, i):  # 如果当前字符是分句符号
                line.append(i)  # 将此字符放入临时列表中
                l.append(''.join(line))  # 并把当前临时列表的内容加入到句子列表中
                line = []  # 将符号列表清空,以便下次分句使用
            else:  # 如果当前字符不是分句符号,则将该字符直接放入临时列表中
                line.append(i)
        return l
    
    
    r_s = []
    # 以下为调用上述函数实现从文本文件中读取内容并进行分句。
    # with open('mybaidu.parp.b.txt','r',encoding='utf-8') as fr :
    #     for lines in fr:
    #         l = Cut(list(cutlist), list(lines))
    #         for line in l:
    #             if len(line.replace(' ', '')) == 0:
    #                 continue
    #             if line.strip() != "":
    #                 line=line.strip()
    #                 r_s.append(line)
    #
    #                 # li = line.strip().split()
    #                 # for sentence in li:
    #                 #     r_s.append(sentence)
    str_ = ''
    
    # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',','
    ']
    
    with open('mybaidu.parp.b.txt', 'r', encoding='utf-8') as fr:
        for lines in fr:
            if len(lines.replace(' ', '')) == 0:
                continue
            # str_='{}{}'.format(str_,lines.replace('
    ',''))
            # if len(lines.replace(' ','').replace('
    ',''))==0:
            #     continue
            str_ = '{}{}'.format(str_, lines.replace('
    ',''))
            # l = Cut(list(cutlist), list(lines))
            # for line in l:
            #     if line.strip() != "":
            #         line=line.strip()
    
    
    from aip import AipSpeech
    
    bd_k_l = ['11059852', '5Kk01GtG2fjCwpzEkwdn0mjw', 'bp6Wyx377Elq7RsCQZzTBgGUFzLm8G2A']
    APP_ID, API_KEY, SECRET_KEY = bd_k_l
    
    
    import math
    bd_str_per_limit=1024
    rep_times=math.ceil(len(str_)/bd_str_per_limit)
    
    for i in range(rep_times):
        cut_str=str_[i*bd_str_per_limit:i*bd_str_per_limit+bd_str_per_limit]
        print(cut_str)
        print('----------------------------------')
        mp3_dir = 'C:\Users\sas\PycharmProjects\produce_video\result_liukeyun\'
    
        client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
        result = client.synthesis(cut_str, 'zh', 1, {
            'vol': 5,
        })
        uid = 'liukeyuanCAKE_whole_para'
        # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
        f_w = '{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid', '.mp3')
    
        if not isinstance(result, dict):
            # f_w = '{}{}{}{}'.format(mp3_dir, 'g3uid', uid, '.mp3')
            f_w = '{}{}{}{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid','bd_str_per_limit','_NO_trN_',i, '.mp3')
            # ,'g3db',uid,'g3uid'
            #  with open('auido.b.mp3', 'wb') as f:
            with open(f_w, 'wb') as f:
                f.write(result)
    
    import os
    os._exit(2)
    

      

    # 设置分句的标志符号;可以根据实际需要进行修改
    # cutlist = "。!?".decode('utf-8')
    
    cutlist = ['
    ', '	', '。', ';', '?', '.', ';', '?', '...', '、、、', ':']
    
    
    # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',']
    # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',',','、']
    
    
    # 检查某字符是否分句标志符号的函数;如果是,返回True,否则返回False
    def FindToken(cutlist, char):
        if char in cutlist:
            return True
        else:
            return False
    
    
    # 进行分句的核心函数
    def Cut(cutlist, lines):  # 参数1:引用分句标志符;参数2:被分句的文本,为一行中文字符
        l = []  # 句子列表,用于存储单个分句成功后的整句内容,为函数的返回值
        line = []  # 临时列表,用于存储捕获到分句标志符之前的每个字符,一旦发现分句符号后,就会将其内容全部赋给l,然后就会被清空
    
        for i in lines:  # 对函数参数2中的每一字符逐个进行检查 (本函数中,如果将if和else对换一下位置,会更好懂)
            if FindToken(cutlist, i):  # 如果当前字符是分句符号
                line.append(i)  # 将此字符放入临时列表中
                l.append(''.join(line))  # 并把当前临时列表的内容加入到句子列表中
                line = []  # 将符号列表清空,以便下次分句使用
            else:  # 如果当前字符不是分句符号,则将该字符直接放入临时列表中
                line.append(i)
        return l
    
    
    r_s = []
    # 以下为调用上述函数实现从文本文件中读取内容并进行分句。
    # with open('mybaidu.parp.b.txt','r',encoding='utf-8') as fr :
    #     for lines in fr:
    #         l = Cut(list(cutlist), list(lines))
    #         for line in l:
    #             if len(line.replace(' ', '')) == 0:
    #                 continue
    #             if line.strip() != "":
    #                 line=line.strip()
    #                 r_s.append(line)
    #
    #                 # li = line.strip().split()
    #                 # for sentence in li:
    #                 #     r_s.append(sentence)
    str_ = ''
    
    # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',','
    ']
    
    with open('mybaidu.parp.b.txt', 'r', encoding='utf-8') as fr:
        for lines in fr:
            # if len(lines.replace(' ', '')) == 0:
            #     continue
            # str_='{}{}'.format(str_,lines.replace('
    ',''))
            if len(lines.replace(' ','').replace('
    ',''))==0:
                continue
            str_ = '{}{}'.format(str_, lines.replace('
    ',''))
            # l = Cut(list(cutlist), list(lines))
            # for line in l:
            #     if line.strip() != "":
            #         line=line.strip()
    
    
    from aip import AipSpeech
    
    bd_k_l = ['11059852', '5Kk01GtG2fjCwpzEkwdn0mjw', 'bp6Wyx377Elq7RsCQZzTBgGUFzLm8G2A']
    APP_ID, API_KEY, SECRET_KEY = bd_k_l
    
    
    import math
    #bd_str_per_limit=1024
    bd_str_per_limit=300
    rep_times=math.ceil(len(str_)/bd_str_per_limit)
    
    for i in range(rep_times):
        cut_str=str_[i*bd_str_per_limit:i*bd_str_per_limit+bd_str_per_limit]
        print(cut_str)
        print('----------------------------------')
        mp3_dir = 'C:\Users\sas\PycharmProjects\produce_video\result_liukeyun\'
    
        client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
        result = client.synthesis(cut_str, 'zh', 1, {
            'vol': 5,
        })
        uid = 'CAKE'
        # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
        f_w = '{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid', '.mp3')
    
        if not isinstance(result, dict):
            # f_w = '{}{}{}{}'.format(mp3_dir, 'g3uid', uid, '.mp3')
            f_w = '{}{}{}{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid','noBRBlankLine','',i, '.mp3')
            # ,'g3db',uid,'g3uid'
            #  with open('auido.b.mp3', 'wb') as f:
            with open(f_w, 'wb') as f:
                f.write(result)
    
    import os
    os._exit(2)
    

      

  • 相关阅读:
    python(4):Python应用场景
    python(3):python的优缺点
    MySQL(17):用户登录实例
    MySQL(16):参数化、封装
    MySQL(15):与python交互的增删改查
    MySQL(14):与python交互的类型
    MySQL(13):索引
    c++ --程序编译
    c++ -- 顺序容器操作
    c++ --string操作
  • 原文地址:https://www.cnblogs.com/rsapaper/p/8795540.html
Copyright © 2020-2023  润新知