• 写了一个自动用google翻译文档的工具


    写了一个自动用google翻译文档的工具

    features:

    • [x] 支持word
    • [x] 每一个段落下面放上对照的翻译
    from googletrans import Translator
    import sys
    import docx
    
    fname = sys.argv[1] if len(
        sys.argv) > 1 else r'F:GoogleDriveSync3jobrelatedThe Fast Forward MBA in Project Management ( PDFDrive.com ).full.docx'
    
    
    translator = Translator()
    foname = fname + '-cn.docx'
    doc = docx.Document(fname)
    docdes = docx.Document(fname)
    
    N = len(doc.paragraphs)
    for i in range(N):
        print(1.0*i/N,)
        subCont = doc.paragraphs[i].text
        try:
            s = translator.translate(subCont, src='en', dest='zh-cn')
            docdes.paragraphs[i].add_run('
    ' + str(s.text) + '
    ')
        except Exception as e:
            print('except:', e)
            
    docdes.save(foname)
    
    
    from googletrans import Translator
    import sys, os
    import docx
    
    fname = sys.argv[1] if len(
        sys.argv) > 1 else r'D:UserscutepDownloadsThrow-Away-the-First-90-Days.docx'
    
    def trans(fname):
        translator = Translator()
        foname = fname + '-cn.docx'
        doc = docx.Document(fname)
        docdes = docx.Document(fname)
    
        N = len(doc.paragraphs)
        NextTarget = 0.1
        i = 0
        while i<N:
            percentage = 1.0*i/N
            if i%10==0: print(percentage)
            if percentage>NextTarget:
                outputfile = '%s-%.2f-cn.docx'%(fname, NextTarget)
                print(outputfile)
                docdes.save(outputfile)
                NextTarget = NextTarget + 0.1
    
            spacer = '
    ========================
    '
            spacer_short = '========================'
            subCont = doc.paragraphs[i].text
            j = i+1
            while len(subCont)<4500 and j<N:
                subCont = subCont + spacer + doc.paragraphs[j].text
                j = j+1
            print(i,j)
            if subCont.strip():
                #try:
                s = translator.translate(subCont, src='en', dest='zh-cn')
                ss = s.text.split(spacer_short)
                assert len(ss)==j-i, '%d, %d'%(len(ss), j-i)
                for k in range(j-i):
                    docdes.paragraphs[k+i].add_run('
    ' + ss[k] + '
    ')
                #except Exception as e:
                #    print('except:', e)
            i = j
    
        docdes.save(foname)
    
    if __name__ == '__main__':
        if os.path.isfile(fname):
            trans(fname)
        else:
            from multiprocessing import Process
    
            ps=[]
            for filename in os.listdir(fname):
                if filename.lower().endswith('.docx'):
                    p = Process(target=trans, args=(fname + '\' + filename,))
                    p.start()
                    ps.append(p)
    
            for p in ps:
                p.join()
    
  • 相关阅读:
    Jquery 表单批量验证
    学习总结 本科学习生涯
    学习总结 大学英语四六级
    学习总结 普通话等级考试
    学习总结 NCRE二级和三级
    oracle程序块
    正则表达式
    redis常用命令
    FaaS 基于多租户技术 SaaS平台设计
    量化交易之网格情缘
  • 原文地址:https://www.cnblogs.com/cutepig/p/13771965.html
Copyright © 2020-2023  润新知